/ PROJECTS

Stock Market Cluster Analysis with NetworkX

1. Data Load and Preprocessing

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
from sklearn import cluster, covariance, manifold
from community import community_louvain as louvain
import matplotlib.cm as cm
import networkx as nx
import networkx.algorithms.community as nxcom
from importlib import reload
import csv
import os
import re
%matplotlib inline
df_prices = pd.read_csv("SP500_prices.csv", index_col = 0)
df_indices = pd.read_csv("indices.csv")
df_SP500 = pd.read_csv("SP500.csv",index_col = 0)
df_prices.describe()
open high low close volume adjusted
count 282821.000000 282821.000000 282821.000000 282821.000000 2.828210e+05 282821.000000
mean 139.384227 141.184891 137.569435 139.425703 4.983235e+06 137.510842
std 249.635882 253.080583 246.310869 249.739199 1.180107e+07 249.704273
min 3.220000 3.290000 3.020000 3.120000 0.000000e+00 3.092542
25% 47.780000 48.419998 47.120000 47.790001 1.010000e+06 46.309925
50% 86.430000 87.480003 85.320000 86.420000 2.034800e+06 84.330002
75% 151.500000 153.380005 149.600006 151.570000 4.528900e+06 148.930000
max 4742.610000 4832.800000 4700.000000 4776.410000 4.286171e+08 4776.410000
df_prices.head()
symbol date open high low close volume adjusted
1 AAPL 2019-01-02 38.722500 39.712502 38.557499 39.480000 148158800.0 38.505024
2 AAPL 2019-01-03 35.994999 36.430000 35.500000 35.547501 365248800.0 34.669640
3 AAPL 2019-01-04 36.132500 37.137501 35.950001 37.064999 234428400.0 36.149662
4 AAPL 2019-01-07 37.174999 37.207500 36.474998 36.982498 219111200.0 36.069202
5 AAPL 2019-01-08 37.389999 37.955002 37.130001 37.687500 164101200.0 36.756794
df_prices_adj = df_prices[['symbol','date', 'adjusted']]
df_prices_adj.columns = ['symbol','date','price']
df_prices_adj.tail()
symbol date price
282817 NWS 2021-03-24 23.69
282818 NWS 2021-03-25 24.42
282819 NWS 2021-03-26 24.01
282820 NWS 2021-03-29 23.39
282821 NWS 2021-03-30 23.83
df_indices.describe()
Unnamed: 0 price
count 16385.000000 16215.000000
mean 8193.000000 588.995521
std 4730.086416 3309.895206
min 1.000000 -36.980000
25% 4097.000000 1.284350
50% 8193.000000 4.227200
75% 12289.000000 107.426300
max 16385.000000 59221.230000
df_indices.head()
Unnamed: 0 symbol date price
0 1 DPROPANEMBTX 2019-01-02 0.641
1 2 DPROPANEMBTX 2019-01-03 0.630
2 3 DPROPANEMBTX 2019-01-04 0.635
3 4 DPROPANEMBTX 2019-01-07 0.623
4 5 DPROPANEMBTX 2019-01-08 0.628
df_indices = df_indices[["symbol","date","price"]]
df_indices.tail()
symbol date price
16380 THREEFY5 2021-03-24 0.8123
16381 THREEFY5 2021-03-25 0.8126
16382 THREEFY5 2021-03-26 0.8354
16383 THREEFY5 2021-03-29 0.8687
16384 THREEFY5 2021-03-30 0.8818
df_SP500.head()
symbol company identifier sedol weight sector shares_held local_currency exchange
1 AAPL Apple Inc. 03783310 2046251 0.059338 Information Technology 161340980 USD SP500
2 MSFT Microsoft Corporation 59491810 2588173 0.055094 Information Technology 77110660 USD SP500
3 AMZN Amazon.com Inc. 02313510 2000019 0.040733 Consumer Discretionary 4376067 USD SP500
4 FB Facebook Inc. Class A 30303M10 B7TL820 0.021718 Communication Services 24592958 USD SP500
5 GOOGL Alphabet Inc. Class A 02079K30 BYVY8G0 0.019521 Communication Services 3074670 USD SP500
df = pd.concat([df_prices_adj,df_indices])
df['date'] = pd.to_datetime(df['date'], format='%Y%m%d', errors='ignore')
df.set_index(['date','symbol'],inplace=True)
df=df.unstack()['price']
df.fillna(method='bfill',inplace=True)
df
symbol A AAL AAP AAPL ABBV ABC ABMD ABT ACN ADBE ... XEL XLNX XOM XRAY XYL YUM ZBH ZBRA ZION ZTS
date
2019-01-02 64.511734 31.96316 156.2589 38.505024 79.101799 71.46416 309.96 67.034943 136.179626 224.570007 ... 45.400452 84.360565 60.557911 37.21114 64.63606 87.819199 100.576180 156.24 38.71991 83.337715
2019-01-03 62.135132 29.58167 161.1371 34.669640 76.495514 70.42746 302.29 63.871284 131.530212 215.699997 ... 45.221561 81.184296 59.628124 37.23077 62.42029 85.610275 98.756989 146.88 38.50573 80.457184
2019-01-04 64.285828 31.53016 157.1396 36.149662 78.959961 71.24338 313.44 65.694260 136.644577 226.190002 ... 45.664082 84.943359 61.826595 38.31106 65.05392 87.838394 102.129860 152.97 39.68837 83.613907
2019-01-07 65.650917 32.42568 159.4450 36.069202 80.112411 71.75211 314.80 66.678070 137.119217 229.259995 ... 45.466366 87.187141 62.148109 38.99852 64.09184 87.742371 102.169182 155.29 39.84668 84.117012
2019-01-08 66.613335 31.90411 158.3368 36.756794 80.484734 72.52003 318.42 65.877518 140.586914 232.679993 ... 45.993618 85.526176 62.599968 38.73336 64.69435 87.569496 99.877983 156.33 40.20985 85.369850
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2021-03-24 120.656403 21.81000 181.7300 120.089996 103.059998 115.37000 294.21 118.019997 267.549988 451.510010 ... 65.580002 119.959999 56.340000 60.25000 101.11000 107.080002 157.210648 463.81 53.01000 155.429993
2021-03-25 121.714798 22.77000 185.6500 120.589996 103.879997 117.34000 294.14 119.050003 268.609985 450.989990 ... 66.000000 120.029999 56.180000 60.55000 101.93000 107.379997 157.639999 461.26 54.74000 152.880005
2021-03-26 125.449112 22.93000 187.3200 121.209999 105.980003 118.73000 301.40 122.070000 280.769989 469.089996 ... 66.309998 123.139999 57.709999 61.28000 104.76000 108.059998 161.320007 476.96 55.85000 156.149994
2021-03-29 125.229446 22.91000 185.0600 121.389999 106.730003 119.05000 305.77 122.230003 279.540009 469.320007 ... 67.000000 122.230003 57.400002 62.06000 104.27000 109.209999 160.210007 467.07 53.89000 158.389999
2021-03-30 124.650322 24.12000 186.0700 119.900002 106.790001 119.06000 309.88 119.750000 278.549988 465.459991 ... 66.010002 120.300003 56.689999 63.54000 104.88000 109.769997 161.220001 474.83 55.91000 157.039993

565 rows × 531 columns

df = (df-df.mean())/df.std()
df.describe()
symbol A AAL AAP AAPL ABBV ABC ABMD ABT ACN ADBE ... XEL XLNX XOM XRAY XYL YUM ZBH ZBRA ZION ZTS
count 5.650000e+02 5.650000e+02 5.650000e+02 5.650000e+02 5.650000e+02 5.650000e+02 5.650000e+02 5.650000e+02 5.650000e+02 5.650000e+02 ... 5.650000e+02 5.650000e+02 5.650000e+02 5.650000e+02 5.650000e+02 5.650000e+02 5.650000e+02 5.650000e+02 5.650000e+02 5.650000e+02
mean -5.533430e-16 -1.156990e-15 -2.339132e-15 2.364284e-15 -3.420666e-15 -1.307902e-15 7.797106e-16 4.426744e-15 8.551665e-16 4.326136e-15 ... -1.237476e-14 4.099769e-15 7.545587e-16 -1.760637e-16 -7.646194e-15 -1.936701e-15 -2.678683e-15 -1.810941e-15 -2.867323e-15 -3.068539e-15
std 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 ... 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
min -1.402956e+00 -1.578205e+00 -4.368070e+00 -1.422373e+00 -1.783790e+00 -1.920594e+00 -1.960001e+00 -1.883874e+00 -2.071674e+00 -1.646058e+00 ... -2.597860e+00 -2.077320e+00 -1.906102e+00 -2.730556e+00 -2.111907e+00 -4.089893e+00 -3.106090e+00 -1.373928e+00 -2.269724e+00 -2.059833e+00
25% -7.205020e-01 -1.081580e+00 -3.269484e-01 -9.160365e-01 -8.075055e-01 -6.871508e-01 -9.956515e-01 -7.258871e-01 -7.138452e-01 -9.190687e-01 ... -5.309481e-01 -7.727544e-01 -9.708038e-01 -8.461749e-01 -5.681917e-01 -6.355256e-01 -7.359661e-01 -6.979548e-01 -1.004871e+00 -7.099711e-01
50% -3.988059e-01 3.186508e-01 2.191719e-01 -2.702986e-01 -7.196453e-02 -2.771314e-01 2.134506e-01 -3.168558e-01 -2.189815e-01 -2.961142e-01 ... 6.798383e-02 -1.999376e-01 3.236947e-01 1.573233e-01 -1.819963e-01 3.273311e-02 1.208611e-01 -2.155373e-01 2.149392e-01 -1.179682e-01
75% 6.010351e-01 8.994444e-01 5.329562e-01 1.102648e+00 7.246279e-01 7.769495e-01 8.128134e-01 8.944433e-01 7.914156e-01 1.047349e+00 ... 6.942542e-01 6.930862e-01 8.373723e-01 8.655397e-01 3.657484e-01 7.911210e-01 7.965623e-01 2.228456e-01 6.929752e-01 1.075490e+00
max 2.370841e+00 1.631285e+00 2.129579e+00 2.053239e+00 2.192993e+00 2.759439e+00 1.856881e+00 2.525851e+00 2.386994e+00 1.970695e+00 ... 2.121844e+00 2.439377e+00 1.533337e+00 2.122636e+00 2.542459e+00 1.801157e+00 1.843039e+00 3.113057e+00 2.442046e+00 1.818189e+00

8 rows × 531 columns

2. Data Visualization

2.1. Stock Price Volatility

%matplotlib inline
fig, ax1 = plt.subplots(figsize=(20, 15))
df.iloc[:,:20].plot(ax=ax1, legend=False)
plt.tight_layout()
plt.show()

png

df_delta = df.copy()
for column in df_delta.columns.values.tolist():
    df_delta[column] = df_delta[column]- df_delta[column].shift(1)
df_delta.iloc[0]=0
df_delta
symbol A AAL AAP AAPL ABBV ABC ABMD ABT ACN ADBE ... XEL XLNX XOM XRAY XYL YUM ZBH ZBRA ZION ZTS
date
2019-01-02 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
2019-01-03 -0.132308 -0.278940 0.281110 -0.123113 -0.190242 -0.096013 -0.126329 -0.210193 -0.138905 -0.100851 ... -0.029680 -0.171040 -0.072336 0.002954 -0.197347 -0.215205 -0.106702 -0.114809 -0.028157 -0.119316
2019-01-04 0.119732 0.228224 -0.230359 0.047508 0.179889 0.075565 0.183646 0.121119 0.152796 0.119270 ... 0.073418 0.202423 0.171038 0.162583 0.234564 0.217075 0.197830 0.074699 0.155477 0.130757
2019-01-07 0.075996 0.104891 0.132851 -0.002583 0.084121 0.047115 0.022400 0.065364 0.014180 0.034905 ... -0.032803 0.120826 0.025013 0.103462 -0.085687 -0.009355 0.002306 0.028457 0.020812 0.020839
2019-01-08 0.053579 -0.061091 -0.063861 0.022071 0.027177 0.071120 0.059623 -0.053189 0.103600 0.038885 ... 0.087476 -0.089442 0.035154 -0.039906 0.053662 -0.016842 -0.134387 0.012757 0.047745 0.051895
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2021-03-24 -0.063369 -0.053879 0.126777 -0.078644 -0.129928 0.148182 -0.072635 -0.164107 0.045411 -0.098804 ... 0.023227 -0.116853 0.087134 0.063104 0.063236 -0.089631 0.076719 0.014228 -0.074936 -0.025267
2021-03-25 0.058922 0.112443 0.225893 0.016050 0.059855 0.182449 -0.001153 0.068434 0.031668 -0.005913 ... 0.069681 0.003769 -0.012448 0.045150 0.073033 0.029227 0.025183 -0.031278 0.227436 -0.105625
2021-03-26 0.207894 0.018741 0.096235 0.019902 0.153287 0.128733 0.119576 0.200649 0.363291 0.205795 ... 0.051431 0.167471 0.119032 0.109865 0.252053 0.066249 0.215845 0.192574 0.145927 0.135448
2021-03-29 -0.012229 -0.002343 -0.130234 0.005778 0.054745 0.029636 0.071976 0.010631 -0.036747 0.002615 ... 0.114477 -0.049003 -0.024117 0.117390 -0.043642 0.112039 -0.065105 -0.121310 -0.257674 0.092785
2021-03-30 -0.032241 0.141726 0.058202 -0.047828 0.004379 0.000926 0.067694 -0.164771 -0.029578 -0.043888 ... -0.164249 -0.103929 -0.055237 0.222739 0.054329 0.054558 0.059240 0.095183 0.265562 -0.055920

565 rows × 531 columns

%matplotlib inline
fig, ax1 = plt.subplots(figsize=(20, 15))
df_delta.plot(ax=ax1, legend=False)
plt.tight_layout()
plt.show()

png

2.2. Rolling Average of Stock Price Correlation

def calculate_corr(df_stock_returns, returns_window, corr_window_size, corr_method):
    stocks_cross_corr_dict = {}
    x_days = []
    y_mean_corr = []        
    for i in range(returns_window,len(df_stock_returns),corr_window_size):
        dic_key = i
        stocks_cross_corr_dict[dic_key]=df_stock_returns.iloc[i:(i+W)].corr(method='pearson')
        stocks_cross_corr_dict[dic_key].fillna(0,inplace=True)
        x_days.append(dic_key)
        y_mean_corr.append(np.mean([abs(j) for j in stocks_cross_corr_dict[dic_key].values.flatten().tolist()]))        
    return stocks_cross_corr_dict, x_days,y_mean_corr
%matplotlib inline
start = 21
end = 126
step = 21;
plt.figure(figsize=(20, 10))

for t in range(start, end, step):
    x_days = []
    y_mean_corr = []
    W = t
    _, x_days, y_mean_corr = calculate_corr(df,1,W, 'pearson')
    plt.plot(x_days, y_mean_corr)
    plt.xlabel('Days')
    plt.ylabel('Mean Correlation')
    l = list(range(start, end, step))
    plt.legend(l, loc='upper left')     

plt.show()

png

3. Network Analysis

3.1. Build Graph with Correlation table

# craetes a graph from correlation matrix
cor_matrix = df.corr()
cor_matrix
symbol A AAL AAP AAPL ABBV ABC ABMD ABT ACN ADBE ... XEL XLNX XOM XRAY XYL YUM ZBH ZBRA ZION ZTS
symbol
A 1.000000 -0.512211 0.340607 0.936847 0.901146 0.866659 0.476208 0.932064 0.916575 0.876950 ... 0.581891 0.661159 -0.481706 0.276888 0.819616 0.232105 0.731162 0.947770 0.196208 0.830023
AAL -0.512211 1.000000 0.452672 -0.709976 -0.567905 -0.636546 0.067317 -0.620271 -0.515571 -0.761706 ... -0.685099 0.040970 0.950026 0.414638 -0.104279 0.401344 -0.116920 -0.402034 0.675397 -0.720983
AAP 0.340607 0.452672 1.000000 0.132583 0.218725 0.129889 0.598790 0.206132 0.304547 0.062110 ... -0.102458 0.592191 0.471304 0.536131 0.517752 0.604171 0.464449 0.358634 0.691259 0.045798
AAPL 0.936847 -0.709976 0.132583 1.000000 0.883271 0.900174 0.311099 0.946722 0.931212 0.971310 ... 0.752885 0.464395 -0.694573 0.138280 0.700491 0.103699 0.668931 0.859869 -0.067884 0.937015
ABBV 0.901146 -0.567905 0.218725 0.883271 1.000000 0.842135 0.379425 0.820668 0.826314 0.834858 ... 0.525801 0.430849 -0.525965 0.167883 0.625416 0.017315 0.617159 0.868875 0.131018 0.756742
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
YUM 0.232105 0.401344 0.604171 0.103699 0.017315 0.148166 0.135065 0.246568 0.391225 0.042001 ... 0.150926 0.517301 0.446745 0.791644 0.591224 1.000000 0.622313 0.311883 0.601129 0.211623
ZBH 0.731162 -0.116920 0.464449 0.668931 0.617159 0.647128 0.138407 0.709774 0.815626 0.581120 ... 0.583386 0.473123 -0.072924 0.677935 0.793259 0.622313 1.000000 0.754226 0.495103 0.715109
ZBRA 0.947770 -0.402034 0.358634 0.859869 0.868875 0.842147 0.398625 0.902315 0.895845 0.774380 ... 0.477856 0.635769 -0.332037 0.410140 0.824975 0.311883 0.754226 1.000000 0.353050 0.754373
ZION 0.196208 0.675397 0.691259 -0.067884 0.131018 0.003435 0.274263 0.036986 0.155021 -0.207014 ... -0.314988 0.431878 0.711608 0.768845 0.486766 0.601129 0.495103 0.353050 1.000000 -0.148741
ZTS 0.830023 -0.720983 0.045798 0.937015 0.756742 0.869275 0.075617 0.902965 0.917018 0.930572 ... 0.901222 0.331572 -0.696158 0.206471 0.652798 0.211623 0.715109 0.754373 -0.148741 1.000000

531 rows × 531 columns

mat_pos = cor_matrix[cor_matrix>=0]
mat_pos = mat_pos.fillna(0)
symbols = cor_matrix.index.values
mat_pos = np.asmatrix(mat_pos)
G_pos = nx.from_numpy_matrix(mat_pos)
G_pos = nx.relabel_nodes(G_pos,lambda x: symbols[x])
G_pos.remove_edges_from(nx.selfloop_edges(G_pos))
mat_neg = cor_matrix[cor_matrix<0]
mat_neg = mat_neg.fillna(0)
mat_neg = abs(mat_neg)
symbols = cor_matrix.index.values
mat_neg = np.asmatrix(mat_neg)
G_neg = nx.from_numpy_matrix(mat_neg)
G_neg = nx.relabel_nodes(G_neg,lambda x: symbols[x])
G_neg.remove_edges_from(nx.selfloop_edges(G_neg))
list(G_pos.edges(data=True))[:5], list(G_neg.edges(data=True))[:5]
([('A', 'AAP', {'weight': 0.3406072572702165}),
  ('A', 'AAPL', {'weight': 0.9368465656977406}),
  ('A', 'ABBV', {'weight': 0.9011464914155253}),
  ('A', 'ABC', {'weight': 0.8666593549791352}),
  ('A', 'ABMD', {'weight': 0.476208419378593})],
 [('A', 'AAL', {'weight': 0.5122106576033231}),
  ('A', 'AEP', {'weight': 0.010066159905771194}),
  ('A', 'AFL', {'weight': 0.14527226301899576}),
  ('A', 'AIG', {'weight': 0.21700976911380374}),
  ('A', 'ALK', {'weight': 0.13546044591421322})])
symbol =  df.columns
df_sector = df_SP500[df_SP500['symbol'].isin(symbol)]
df_sector = df_sector[["symbol","sector"]]
tmp = pd.DataFrame({"symbol":df_indices["symbol"].unique(), "sector":"Macroeconomic Indices"})
df_sector = pd.concat([df_sector,tmp],ignore_index=True)
df_sector['sec_idx']= 0
for i,sec in enumerate(df_sector["sector"].unique()):
    df_sector.loc[df_sector['sector']==sec,'sec_idx'] = i+1
df_sector
symbol sector sec_idx
0 AAPL Information Technology 1
1 MSFT Information Technology 1
2 AMZN Consumer Discretionary 2
3 FB Communication Services 3
4 GOOGL Communication Services 3
... ... ... ...
526 GOLDAMGBD228NLBM Macroeconomic Indices 12
527 IOER Macroeconomic Indices 12
528 IORR Macroeconomic Indices 12
529 THREEFY1 Macroeconomic Indices 12
530 THREEFY5 Macroeconomic Indices 12

531 rows × 3 columns

nx.set_node_attributes(G_pos, df_sector.set_index('symbol')['sec_idx'],'sec_idx')
nx.set_node_attributes(G_neg, df_sector.set_index('symbol')['sec_idx'],'sec_idx')

3.2. Setting threshold on weights

def set_threshold(G,threshold):
    edges_rm = list(filter(lambda e: abs(e[2]) < threshold, (e for e in G.edges.data('weight'))))    
    ids_rm = list(e[:2] for e in edges_rm)
    H = G.copy()
    H.remove_edges_from(ids_rm)
    return H
H_pos = set_threshold(G_pos,0.5)
H_neg = set_threshold(G_neg,0.5)
list(H_pos.edges(data=True))[:5], list(H_neg.edges(data=True))[:5]
([('A', 'AAPL', {'weight': 0.9368465656977406}),
  ('A', 'ABBV', {'weight': 0.9011464914155253}),
  ('A', 'ABC', {'weight': 0.8666593549791352}),
  ('A', 'ABT', {'weight': 0.932064482315709}),
  ('A', 'ACN', {'weight': 0.916574717968791})],
 [('A', 'AAL', {'weight': 0.5122106576033231}),
  ('A', 'BA', {'weight': 0.5064885797862092}),
  ('A', 'BXP', {'weight': 0.522527131570438}),
  ('A', 'CCL', {'weight': 0.5559250497113872}),
  ('A', 'DEXCAUS', {'weight': 0.6382900166453468})])

3.3. Community Detection

  • with Louvain Algorithm
# grid search
# for positive weights
np.random.seed(2021)
t=0
for cor_thresold in np.linspace(0.8,0.85,20):
    H_pos = set_threshold(G_pos,cor_thresold)
    partition = louvain.best_partition(H_pos)
    modularity = louvain.modularity(partition, H_pos)
    values = [partition.get(node) for node in H_pos.nodes()]
    communities = []
    tmp = list(partition.items())
    for i in range(len(set(values))):
        communities.append([n for n,c in tmp if c==i])
    sum_comm_nodes = 0
    k=0
    print("{}th Total number of Communities = {}".format(t ,len(communities)))
    for i, comm_nodes in enumerate(communities):
        if len(comm_nodes)>=10:
            k+=1
            print('community {}th: '.format(i),len(comm_nodes))
        sum_comm_nodes+=len(comm_nodes)
    t+=1
    print('n_big_communities: ',k)
# best partition with 5 big communities at cor_threshold = np.linspace(0.8,0.85,20)[8] 
...
5th Total number of Communities = 36
community 0th:  162
community 1th:  188
community 4th:  103
community 6th:  35
n_big_communities:  4
6th Total number of Communities = 36
community 0th:  172
community 1th:  180
community 3th:  113
community 4th:  25
n_big_communities:  4
7th Total number of Communities = 36
community 0th:  152
community 3th:  112
community 4th:  46
community 6th:  180
n_big_communities:  4
8th Total number of Communities = 41
community 2th:  77
community 4th:  107
community 5th:  157
community 7th:  37
community 9th:  108
n_big_communities:  5
9th Total number of Communities = 41
community 0th:  153
community 4th:  103
community 6th:  43
community 7th:  79
community 9th:  108
n_big_communities:  5
10th Total number of Communities = 41
community 0th:  146
community 2th:  84
community 4th:  109
community 6th:  41
community 9th:  106
n_big_communities:  5
11th Total number of Communities = 42
community 0th:  145
community 2th:  178
community 4th:  126
community 8th:  35
n_big_communities:  4
...
np.random.seed(2021)
cor_thresold = np.linspace(0.8,0.85,20)[8]
H_pos = set_threshold(G_pos,cor_thresold)
partition = louvain.best_partition(H_pos)
values = [partition.get(node) for node in H_pos.nodes()]
communities = []
tmp = list(partition.items())
for i in range(len(set(values))):
    communities.append([n for n,c in tmp if c==i])
sum_comm_nodes = 0
k=0
print("Total number of Communities = {}".format(len(communities)))
for i, comm_nodes in enumerate(communities):
    if len(comm_nodes)>=10:
        k+=1
        print('community {}th: '.format(i),len(comm_nodes))
    sum_comm_nodes+=len(comm_nodes)
print('n_big_communities: ',k)
Total number of Communities = 41
community 0th:  164
community 2th:  79
community 4th:  105
community 7th:  30
community 9th:  108
n_big_communities:  5
nx.set_node_attributes(H_pos,partition,'community')
values = [partition.get(node) for node in H_pos.nodes()]
plt.figure(figsize=(10,10))
nx.draw_spring(H_pos, cmap = plt.get_cmap('jet'), node_color = values, node_size=30, with_labels=False)

png

np.random.seed(2021)
# for negative weights
t=0
for cor_thresold in np.linspace(0.6,0.65,20):
    H_neg = set_threshold(G_neg,cor_thresold)
    partition = louvain.best_partition(H_neg)
    modularity = louvain.modularity(partition, H_neg)
    values = [partition.get(node) for node in H_neg.nodes()]
    communities = []
    tmp = list(partition.items())
    for i in range(len(set(values))):
        communities.append([n for n,c in tmp if c==i])
    sum_comm_nodes = 0
    k=0
    
    print("{}th Total number of Communities = {}".format(t ,len(communities)))
    for i, comm_nodes in enumerate(communities):
        if len(comm_nodes)>=10:
            k+=1
            print('community {}th: '.format(i),len(comm_nodes))
        sum_comm_nodes+=len(comm_nodes)
    t+=1
    print('n_big_communities: ',k)
# best partition with 5 big communities at cor_threshold = np.linspace(0.6,0.65,20)[11] 
...
9th Total number of Communities = 37
community 0th:  104
community 1th:  83
community 2th:  177
community 5th:  119
community 6th:  16
n_big_communities:  5
10th Total number of Communities = 40
community 0th:  103
community 1th:  83
community 2th:  177
community 3th:  14
community 6th:  119
n_big_communities:  5
11th Total number of Communities = 41
community 0th:  104
community 1th:  83
community 2th:  176
community 3th:  13
community 6th:  119
n_big_communities:  5
12th Total number of Communities = 42
community 0th:  103
community 1th:  81
community 3th:  13
community 6th:  130
community 11th:  167
n_big_communities:  5
13th Total number of Communities = 42
community 0th:  106
community 1th:  81
community 2th:  176
community 6th:  118
community 7th:  13
n_big_communities:  5
14th Total number of Communities = 43
community 0th:  204
community 1th:  165
community 2th:  18
community 5th:  105
n_big_communities:  4
...
np.random.seed(2021)
cor_thresold = np.linspace(0.6,0.65,20)[11] 
H_neg_otim = set_threshold(G_neg,cor_thresold)
partition = louvain.best_partition(H_neg_otim)
modularity = louvain.modularity(partition, H_neg)
values = [partition.get(node) for node in H_neg.nodes()]
communities = []
tmp = list(partition.items())
for i in range(len(set(values))):
    communities.append([n for n,c in tmp if c==i])
sum_comm_nodes = 0
k=0
print("Total number of Communities = {}".format(len(communities)))
for i, comm_nodes in enumerate(communities):
    if len(comm_nodes)>=10:
        k+=1
        print('community {}th: '.format(i),len(comm_nodes))
    sum_comm_nodes+=len(comm_nodes)
print('n_big_communities: ',k)
Total number of Communities = 41
community 0th:  102
community 1th:  83
community 3th:  13
community 6th:  130
community 11th:  167
n_big_communities:  5
nx.set_node_attributes(H_neg, partition,'community')
values = [partition.get(node) for node in H_neg.nodes()]
plt.figure(figsize=(10,10))
nx.draw_spring(H_neg, cmap = plt.get_cmap('jet'), node_color = values, node_size=30, with_labels=False)

png

3.4. Visualization with Gephi

nx.write_graphml(H_pos, "model_H_pos.graphml")
nx.write_graphml(H_neg, "model_H_neg.graphml")

png