EBA stress-test dataset

All data are downloaded from EBA website. EBA owns the copyright on these data, but authorizes their reproduction, provided the source is acknowledged, which I am doing right now.

Up to now, there are three years available with stress test data: 2011 / 2014 / 2016, and others with transparency exercise which are not considered here. The aim of this dataset curation is to build edgelist of BANK –> ASSET CLASS exposures to be used in fire-sales spillover systemic risk analysis.

2011 data are quite different from 2014 and 2016 (which have a very similare structure, but they are not exactly identical as well). In particular all the Asset Exposures have only one single value, meaning that there is no EAD (Exposure at default) distinction. In 2014 and 2016 instead, three EAD types are considered:

and they must be summed over to get the total exposures. In 2016 this procedure is even trickier, since the Exposure selection is different for IRB and SA, resulting in one category of SA which has non null intersection with many IRB categories. I decided to leave that specific category as a separate asset class. Here 2011 and 2016 cases are presented, 2014 following roughly the 2016 scheme.

2011 stress-test data

data reading

REMARK: the file eba2011_cleaned.csv (which is the actual 2011 dataset) is not the original file you find in the website. The original file is really dirty. Here I tried to remove all the spurious entries and to keep information as well.

The files code_* are derived from an excel spreadsheet on the website that has been translated into several csv’s.

setwd('~/Documents/SoBigData/eba/')
rm(list=ls())
library(data.table) # i'm using data.table: more flexibility
eba11 = fread('./2011/eba2011_cleaned.csv',dec='.')
code_bank11 = fread('./2011/code_bank.csv')
code_country11 = fread('./2011/code_country.csv')
code_information11 = fread('./2011/code_information.csv')
code_measure11 = fread('./2011/code_measure.csv')
dim(eba11)  # 3219 97
## [1] 3219   97
head(eba11) # to get an idea
##                    WORKSHEET INFORMATION_CODE INFO_DATE_CODE
## 1: 1 - Aggregate information            30010       20101231
## 2: 1 - Aggregate information            30010       20111231
## 3: 1 - Aggregate information            30010       20111231
## 4: 1 - Aggregate information            30010       20121231
## 5: 1 - Aggregate information            30010       20121231
## 6: 1 - Aggregate information            30011       20101231
##    C_COUNTRY_CODE SCENARIO_CODE MATURITY_CODE MEASURE_CODE    AT001
## 1:                           NA            NA          120 120539.3
## 2:                          100            NA          120 124631.7
## 3:                          105            NA          120 128470.4
## 4:                          100            NA          120 126483.3
## 5:                          105            NA          120 133084.6
## 6:                           NA            NA          120   9283.0
##         AT002     AT003     BE004     BE005     CY006    CY007    DE017
## 1:  94810.568 27509.444 140834.63 111921.94 27626.888 26277.42 346607.6
## 2:  98268.454 28762.289 141985.90 124532.97 27776.767 26277.09 422045.6
## 3: 100974.240 31097.902 142202.40 127979.50 28117.884 26290.30 467396.2
## 4:  97537.045 29746.450 151898.23 125586.13 27894.578 26277.09 435101.0
## 5:  99967.654 33812.358 166713.25 135836.87 28569.585 26290.30 499896.5
## 6:   5891.284   764.609  17002.28  11351.55  2014.969  2134.01  30361.0
##       DE018    DE019  DE020    DE021     DE022    DE023     DE024    DE025
## 1: 267500.0 120697.4  88689 123849.5  86850.50 19487.17 48615.000 41388.00
## 2: 290007.4 127153.0  96386 126981.7  95548.19 20766.96 49819.992 40596.92
## 3: 318105.4 136483.0 108230 131835.1 104890.65 24282.79 59763.764 44569.21
## 4: 297382.9 126216.0 100924 127848.8 100274.86 21064.29 50073.532 40918.13
## 5: 346664.8 140718.0 124052 134536.3 107861.02 23710.92 67969.621 71503.64
## 6:  10300.0   9838.0   7299   8954.0   3974.00  4936.00  1218.878  4434.00
##       DE027    DE028    DE029  DK008     DK009     DK010 DK011     ES059
## 1: 35256.66 25770.20 17690.86 145907 14090.840  9890.119 75486 594284.08
## 2: 38213.64 27768.48 18104.61 144867 13973.490 10005.526 75418 613278.69
## 3: 48833.90 32704.20 21045.75 144035 14469.865 10048.092 75370 626920.98
## 4: 38883.32 28512.44 18731.56 144093 13895.254 10003.997 75427 622571.14
## 5: 48789.59 35967.28 22704.96 142285 14928.681 10233.461 75351 650979.10
## 6:  5162.00  3359.00  1904.00  11085  1699.068  1230.965  6633  41997.61
##        ES060     ES061    ES062     ES063    ES064     ES065    ES066
## 1: 313327.22 200508.29 162711.3 32096.925 94520.59 56488.147 48595.24
## 2: 316735.97 205824.47 164602.2 32151.467 94798.00 56504.918 48783.74
## 3: 319547.28 210324.75 166631.0 32396.181 94973.39 56503.284 49828.44
## 4: 320087.13 206676.44 164726.5 32196.663 95058.24 56517.573 48916.54
## 5: 322743.55 216318.50 164622.0 32711.230 95396.37 56503.284 49754.31
## 6:  24939.07   9398.95  11109.0  2656.204  6698.78  3507.003  1854.00
##        ES067     ES068    ES069     ES070     ES071    ES072     ES073
## 1: 54734.875 39794.300 30962.09 25250.763 46090.200 23697.61 20068.150
## 2: 54760.565 39857.549 31353.73 25257.853 46202.371 23715.06 20075.407
## 3: 54844.237 40143.995 31776.95 25265.864 46355.303 23849.31 20100.938
## 4: 54773.243 39918.257 31456.77 25259.712 46299.004 23732.95 20079.278
## 5: 54957.563 40483.803 32258.73 25275.112 46657.481 24018.45 20135.869
## 6:  1686.523  2389.324  1920.12  1551.243  2710.565  2299.38  2501.485
##       ES074     ES075     ES076     ES077     ES078    ES079    ES080
## 1: 18339.05 29277.700 16867.675 14679.175 13465.138 9517.400 6171.113
## 2: 18348.63 29283.290 16909.825 14679.175 13491.167 9563.955 6185.034
## 3: 18399.40 29290.283 17053.216 14679.175 13644.779 9521.200 6205.934
## 4: 18355.03 29284.279 16948.517 14679.175 13514.636 9546.441 6199.236
## 5: 18461.22 29299.431 17231.856 14679.175 13855.996 9521.200 6272.445
## 6:  1395.40  2981.506   685.244  1934.505  1163.924 2117.000  771.983
##       ES081    ES082     ES083    FI012     FR013    FR014     FR015
## 1: 640.1250 181.8375 48250.963 42723.95 601271.22 561637.4 407316.20
## 2: 642.8722 181.8375 48292.404 43889.22 660993.35 558570.7 426349.13
## 3: 652.4236 181.8375 48292.404 45875.48 686948.94 557395.4 465831.56
## 4: 645.6512 181.8375 48292.404 44148.15 673782.25 555989.5 440569.82
## 5: 666.2420 181.8375 48292.404 46202.31 723305.15 553361.6 512503.55
## 6:  56.9080  20.4070  1842.825  5232.48  55351.75  46277.2  31943.21
##       FR016     GB088      GB089     GB090     GB091     GR030     GR031
## 1: 343861.5 607351.06  825559.80 461106.73 472113.88 47968.000 68301.000
## 2: 386204.8 622772.00  900631.14 527521.64 483303.71 48807.938 72095.000
## 3: 418651.1 667709.00 1002244.36 595738.80 520086.89 49597.549 72783.219
## 4: 394317.8 608905.00  909072.21 541910.64 484176.48 48753.510 70898.000
## 5: 445529.0 684744.00 1056964.69 657378.47 548299.81 49339.830 72258.513
## 6:  27824.0  58981.74   86900.16  46232.36  47983.74  3345.792  7802.668
##        GR032 GR033    GR034     GR035     HU036       IE037    IE038 IE039
## 1: 48960.677 37987 12636.09 6613.0000 26838.645  98768.0111 83869.92 15792
## 2: 49619.554 38346 12975.26 6706.8004 26982.774 108570.2028 82605.70 15792
## 3: 49691.832 37785 12946.73 6723.5837 26977.191 111267.0499 80705.30 15792
## 4: 50000.308 38642 13249.32 6759.8543 26992.734 110797.0966 78269.12 15792
## 5: 50207.302 39169 13311.56 6814.0966 26977.191 113783.6567 79889.41 15792
## 6:  4335.041  2669   117.00  998.2039  3309.892    273.0814  5220.00  1681
##        IT040     IT041      IT042    IT043     IT044 LU045    MT046
## 1: 332132.11 454849.66 109238.283 94877.76 94360.912 12970 3366.620
## 2: 337133.05 491974.54 109856.750 95742.27 95055.250 12970 3366.620
## 3: 353062.01 514409.03 111281.392 96560.40 96225.200 12970 3366.620
## 4: 333209.62 497596.02 110057.885 96190.29 95758.553 12970 3366.620
## 5: 359112.52 529846.87 113072.445 97925.60 97534.901 12970 3366.620
## 6:  26159.34  35701.96   4400.807  4024.47  6558.697  1551  354.373
##        NL047     NL048     NL049     NL050      NO051     PL052     PT053
## 1: 321102.96 219567.92 116328.00 21302.189 117776.756 35540.207 76989.230
## 2: 335420.64 231563.47 118501.41 21827.412 117981.558 35672.960 78265.960
## 3: 366922.05 247092.52 120618.57 23056.670 118078.164 35672.960 78810.085
## 4: 338905.31 241169.46 118627.48 22221.091 118360.332 35672.960 79186.650
## 5: 391281.87 273059.76 120900.74 25093.218 117695.414 35672.960 80160.228
## 6:  28394.78  27724.61  11573.51  1625.648   9745.982  4201.795  6510.127
##       PT054     PT055     PT056     SE084     SE085      SE086    SE087
## 1: 59561.76 71121.755 26035.817 214760.10 86635.394 106358.929 84346.78
## 2: 62184.35 72882.033 26066.169 213513.52 86635.394 106216.811 84573.24
## 3: 63618.00 74542.694 26838.282 212930.49 89200.473 105712.236 84557.89
## 4: 64799.81 72990.763 26130.946 212396.12 86635.394 106056.317 84206.86
## 5: 68883.61 76712.305 26884.908 210518.83 94846.431 105292.937 83965.86
## 6:  3520.52  4519.686  2132.567  19102.63  9603.561   8208.964  7352.28
##       SI057    SI058
## 1: 15632.81 4946.000
## 2: 15678.43 4952.415
## 3: 15680.50 4950.772
## 4: 15677.17 4987.151
## 5: 15678.29 4951.247
## 6:   812.80  366.000

asset class choice

Here we choose, inside eba11 only some Asset Class types. The choice is done based on code_information11, on the paper by Greenwood and others on Vulnerable Banks and on conversation with Eric.

We also need Capital of single banks. We choose Tier1.

Then, we shall need the declared total amount of exposures of the banks. Candidate are total.exposures11 and total.asset11.

## illiquid asset classes:
## [1] "Non-defaulted exposures--Retail (excluding commercial real estate)--of which Residential mortgages"                                                         
## [2] "Non-defaulted exposures--Retail (excluding commercial real estate)--of which Revolving"                                                                     
## [3] "Non-defaulted exposures--Retail (excluding commercial real estate)--of which SME"                                                                           
## [4] "Non-defaulted exposures--Retail (excluding commercial real estate)--of which other"                                                                         
## [5] "Non-defaulted exposures--Commercial Real Estate"                                                                                                            
## [6] "Defaulted exposures (excluding sovereign)"                                                                                                                  
## [7] "INDIRECT SOVEREIGN EXPOSURES IN THE TRADING BOOK--Net position at fair values (Derivatives with positive fair value + Derivatives with negative fair value)"
## liquid asset classes:
## [1] "Non-defaulted exposures--Institutions"                                                                                                                   
## [2] "Non-defaulted exposures--Corporate (excluding commercial real estate)"                                                                                   
## [3] "NET DIRECT POSITIONS (gross exposures (long) net of cash short position of sovereign debt to other counterparties only where there is maturity matching)"
## [4] "DIRECT SOVEREIGN EXPOSURES IN DERIVATIVES--Net position at fair values (Derivatives with positive fair value + Derivatives with negative fair value)"
## capital:
## [1] "Situation at December 2010--E) Core Tier 1 including exist…"
## total asset:
## [1] "C. Results of t...--Capital adequacy--Total assets after the effects of mandatory restructuring plans publicly announced and fully committed and equity raised and fully committed by 30 April 2011"
## [2] "Total exposures"

country selection

We choose the counterparty countries. We have to choose since there are redundancies in the dataset: for instance, there is a country TO which is the TOTAL country, thus should contain the aggregate value across all the countries. When creating the edgelist we need to remove these redundancies.

## number of different countries:
## [1] 40
## selected countries are:
## EEA countries:
##  [1] "Austria"        "Belgium"        "Bulgaria"       "Cyprus"        
##  [5] "Czech Republic" "Denmark"        "Estonia"        "Finland"       
##  [9] "France"         "Germany"        "Greece"         "Hungary"       
## [13] "Ireland"        "Italy"          "Latvia"         "Lithuania"     
## [17] "Luxembourg"     "Malta"          "Netherlands"    "Poland"        
## [21] "Portugal"       "Romania"        "Slovakia"       "Slovenia"      
## [25] "Spain"          "Sweden"         "United Kingdom" "Norway"        
## [29] "Iceland"        "Liechtenstein"
## american countries:
## [1] "U.S."                     "Middle and South America"
## asian countries:
## [1] "Hong Kong"    "China"        "Japan"        "India"       
## [5] "Rest of Asia"
## other non EEA countries:
## [1] "Other non EEA non Emerging countries"
## [2] "Eastern Europe non EEA"
## and:
## [1] "Rest of the world"

capital dataset building

We simply build a dataframe with capital for each bank

#capital
tier1.11 = eba11[INFORMATION_CODE %in% capital11 & MEASURE_CODE == 120 & !(SCENARIO_CODE %in% c(100,105))] # select Tier1 Capital and MEASURE_CODE => Amount. I cannot use data11 because there I already selected counterparty country (which excludes capital info) 
tier1.11=tier1.11[,(1:7):=NULL] # remove useless columns, remain with one column per bank
dim(tier1.11)
## [1]  1 90
#if you want a DT with banks on columns do
tier1.11 = data.table(BANK=colnames(tier1.11), mlnEuro=as.numeric(tier1.11[1]))

# insert full bank names and countries
tier1.11 = merge(tier1.11, code_bank11, by='BANK')
setcolorder(tier1.11,c('BANK','Bank_country','Bank_name','mlnEuro'))
#write.csv(file='./2011/tier1_11.csv',tier1.11,row.names = F)

head(tier1.11)
##     BANK Bank_country                           Bank_name   mlnEuro
## 1: AT001      Austria              ERSTE BANK GROUP (EBG) 10507.000
## 2: AT002      Austria RAIFFEISEN BANK INTERNATIONAL (RBI)  7641.284
## 3: AT003      Austria       OESTERREICHISCHE VOLKSBANK AG  1764.609
## 4: BE004      Belgium                               DEXIA 17002.281
## 5: BE005      Belgium                            KBC BANK 11705.400
## 6: CY006       Cyprus   MARFIN POPULAR BANK PUBLIC CO LTD  2014.969

data filtering

We implement into the dataset the asset and country selection. We also filter the column SCENARIO_CODE, since values 100 and 105 correspond to stress test scenarios and not to actual values.

# first filtering 
data11 = eba11[C_COUNTRY_CODE %in% country11 & !(SCENARIO_CODE %in% c(100,105))]
data11 = data11[,c('SCENARIO_CODE','WORKSHEET','INFO_DATE_CODE','MATURITY_CODE','MEASURE_CODE'):=NULL] # remove useless columns

data11 = merge(data11,code_information11[,.(INFORMATION_CODE,INFORMATION_DESC)], by='INFORMATION_CODE',all.x=T) # add full exposure name

# asset filtering
data11 = data11[INFORMATION_CODE %in% c(liquid.asset11,illiquid.asset11),] 


names(data11)
##  [1] "INFORMATION_CODE" "C_COUNTRY_CODE"   "AT001"           
##  [4] "AT002"            "AT003"            "BE004"           
##  [7] "BE005"            "CY006"            "CY007"           
## [10] "DE017"            "DE018"            "DE019"           
## [13] "DE020"            "DE021"            "DE022"           
## [16] "DE023"            "DE024"            "DE025"           
## [19] "DE027"            "DE028"            "DE029"           
## [22] "DK008"            "DK009"            "DK010"           
## [25] "DK011"            "ES059"            "ES060"           
## [28] "ES061"            "ES062"            "ES063"           
## [31] "ES064"            "ES065"            "ES066"           
## [34] "ES067"            "ES068"            "ES069"           
## [37] "ES070"            "ES071"            "ES072"           
## [40] "ES073"            "ES074"            "ES075"           
## [43] "ES076"            "ES077"            "ES078"           
## [46] "ES079"            "ES080"            "ES081"           
## [49] "ES082"            "ES083"            "FI012"           
## [52] "FR013"            "FR014"            "FR015"           
## [55] "FR016"            "GB088"            "GB089"           
## [58] "GB090"            "GB091"            "GR030"           
## [61] "GR031"            "GR032"            "GR033"           
## [64] "GR034"            "GR035"            "HU036"           
## [67] "IE037"            "IE038"            "IE039"           
## [70] "IT040"            "IT041"            "IT042"           
## [73] "IT043"            "IT044"            "LU045"           
## [76] "MT046"            "NL047"            "NL048"           
## [79] "NL049"            "NL050"            "NO051"           
## [82] "PL052"            "PT053"            "PT054"           
## [85] "PT055"            "PT056"            "SE084"           
## [88] "SE085"            "SE086"            "SE087"           
## [91] "SI057"            "SI058"            "INFORMATION_DESC"

build edgelist with banks -> asset

Now we build the actual edgelist. Differently from 2014 and 2016 datasets, here the structure of the table is not the right one, namely we have one column for each bank. Thus we need to put banks in rows. Moreover we add an extra column, which is the Country_asset column, and corresponds to the acutal Asset class one considers when doing systemic risk analysis.

Here we choose to keep Country different only for Sovereign-like exposures, while all the other exposures will be considered on aggregate figures across all counterparty countries

# country_asset for all Countries and all assets
data11 = cbind(data.table('COUNTRY_ASSET'=paste(data11$C_COUNTRY_CODE,data11$INFORMATION_DESC,sep='_')),data11)

# select only Sovereign-like assets
sov11= c(33010,
         34012,
         34016
)


data11 = data11[!(INFORMATION_CODE %in% sov11), COUNTRY_ASSET:=INFORMATION_DESC]
#unique(data11$COUNTRY_ASSET)


# build edgelist with banks on rows
library(doMC)
registerDoMC(cores=detectCores())#-ceiling(detectCores()/2))
edgelist11 = foreach(i=4:(dim(data11)[2]-1), .combine=function(x,y) rbindlist(list(x,y))) %dopar% {
  col = data11[,c(1:3,dim(data11)[2],i),with=F]
  col = col[get(names(data11)[i]) != 0] # use get for data.table needs the *variable* and not the (quoted) name
  melt.data.table(col, id.vars = c('COUNTRY_ASSET','INFORMATION_CODE','INFORMATION_DESC','C_COUNTRY_CODE'), variable.name = 'BANK', value.name = 'mlnEuro', variable.factor=F)
}
setcolorder(edgelist11,c('BANK','COUNTRY_ASSET','INFORMATION_CODE','INFORMATION_DESC','C_COUNTRY_CODE','mlnEuro'))
head(edgelist11)
##     BANK                            COUNTRY_ASSET INFORMATION_CODE
## 1: AT001 AT_Non-defaulted exposures--Institutions            33010
## 2: AT001 CZ_Non-defaulted exposures--Institutions            33010
## 3: AT001 DE_Non-defaulted exposures--Institutions            33010
## 4: AT001 ES_Non-defaulted exposures--Institutions            33010
## 5: AT001 FR_Non-defaulted exposures--Institutions            33010
## 6: AT001 GB_Non-defaulted exposures--Institutions            33010
##                         INFORMATION_DESC C_COUNTRY_CODE  mlnEuro
## 1: Non-defaulted exposures--Institutions             AT 4552.000
## 2: Non-defaulted exposures--Institutions             CZ 3350.557
## 3: Non-defaulted exposures--Institutions             DE 2699.000
## 4: Non-defaulted exposures--Institutions             ES  437.000
## 5: Non-defaulted exposures--Institutions             FR  691.000
## 6: Non-defaulted exposures--Institutions             GB 2653.000

check for total asset matching

Since in the dataset total amount of exposures is declared, we need to check if the sum of the exposures in edgelist11 by Bank, actually matches the declared total exposure for each bank

asset.exp = total.exposures11
# try with counter-country = TOTAL
check.total.asset = eba11[INFORMATION_CODE == asset.exp & C_COUNTRY_CODE == 'TO']  
# with country TOTAL there are some banks resulting with 0 total exposure
check.total.asset = check.total.asset[,(c(1:7)):=NULL,with=F]

tot.exp.0 = data.table(BANK=colnames(check.total.asset), total.asset = as.numeric(check.total.asset[1]))
tot.exp.sum = edgelist11[,sum(mlnEuro),by=.(BANK)]
diffe = merge(tot.exp.0,tot.exp.sum, by='BANK')

head(diffe)
##     BANK total.asset        V1
## 1: AT001   222289.92 237060.42
## 2: AT002   164670.59 174658.29
## 3: AT003    45406.01  42234.39
## 4: BE004   516289.57 481837.39
## 5: BE005        0.00 282029.52
## 6: CY006    42864.70  46179.00

As you see, there is no matching. Moreover, bank BE005 (KBC Belgium) has a declared value of 0. For that specific bank we take as total exposure the sum of edgelist11.

# Bank BE005 has 0 tot.exp.0 (also Eric notes this)
# per questa banca assumo tot.exp.0 := tot.exp.sum

diffe=diffe[BANK == 'BE005',total.asset := V1]

# calculate difference and relative difference
diffe = diffe[,correction:=total.asset - V1][,correction.rel := correction/V1*100]

# plotting the differences
library(ggplot2)
library(reshape2)
diffe.long = melt(diffe, id='BANK')

gr=ggplot(data=diffe.long, aes(x=BANK, y=value, color=variable)) 
gr + geom_line(aes(group=variable),data=diffe.long[variable!='correction.rel']) + geom_point(size=3,data=diffe.long[variable!='correction.rel'])  + theme_bw()# + geom_point(size=10, data=asset.long[bank == 'DE025' & !(variable %in% c('diff.perc','diff'))])

gr + geom_line(data=diffe.long[variable=='correction.rel'],aes(group=variable)) + geom_point(size=3,data=diffe.long[variable=='correction.rel']) + theme_bw()

diffe[abs(correction.rel)>10]
##      BANK total.asset         V1  correction correction.rel
##  1: CY007    37587.80   48443.98  -10856.181      -22.40976
##  2: DE017  1098914.58  954631.56  144283.018       15.11400
##  3: DE021   255909.00  331457.00  -75547.998      -22.79270
##  4: DE022   246849.49  294344.68  -47495.191      -16.13591
##  5: DE023   214902.10  142136.27   72765.834       51.19442
##  6: DE025   168304.88   58173.64  110131.245      189.31470
##  7: DE029    89720.95  112251.47  -22530.524      -20.07147
##  8: ES060   545335.98  669094.32 -123758.344      -18.49640
##  9: ES061   278523.48  335797.50  -57274.018      -17.05612
## 10: ES070    44018.02   51647.50   -7629.479      -14.77221
## 11: FR014  1296021.56 1003491.48  292530.080       29.15123
## 12: FR015   791231.93  688051.22  103180.709       14.99608
## 13: GB091  1044712.62  916701.00  128011.618       13.96438
## 14: GR030    79888.83   89100.38   -9211.557      -10.33840
## 15: GR033    47205.48   55278.99   -8073.510      -14.60502
## 16: GR034    26799.86   33157.65   -6357.786      -19.17442
## 17: GR035    15094.80   20084.07   -4989.272      -24.84193
## 18: IE037   147923.77  125484.39   22439.371       17.88220
## 19: IT040   586863.41  653917.58  -67054.167      -10.25422
## 20: IT042   205346.50  244971.58  -39625.084      -16.17538
## 21: IT043   128139.75  145101.11  -16961.360      -11.68934
## 22: NL049   288941.00  248049.20   40891.796       16.48536
## 23: NL050    65007.03   79655.26  -14648.230      -18.38953
## 24: PL052    43684.20   50846.92   -7162.720      -14.08683
## 25: SE085   181970.29  220835.64  -38865.348      -17.59922
## 26: SI057    20488.00   23188.62   -2700.621      -11.64632
##      BANK total.asset         V1  correction correction.rel
nrow(diffe)
## [1] 90
# REMARK: 26 on 90 banks have a relative correction above 10%. BE005 is 100% (tot.exp.0=0) and DE025 is 189%

diffe[correction.rel<0, mean(correction.rel)] # -9%
## [1] -9.152091
diffe[correction.rel>0 & correction.rel < 50, mean(correction.rel)]  # 7.5%
## [1] 7.571485
diffe[,mean(abs(correction.rel))] # 11%
## [1] 11.04516

Here we see that the average correction is 11% of total sum. In 26 cases is above 10%. We decide to introduce a new Asset class, called Correction and with code 7000 (and country 1000), in order to deal with this inconsistencies.

REMARK: inconsistencies like this one are there also for 2014 and 2016. But the peculiarity of 2011 is that inconsitencies are in both directions, while in 2014 and 2016 declared total exposures always dominate sum of exposures. Thus, for 2011, we are going to have an edgelist with negative values as well. (By the way, in the dataset –not considering the correction– there are still (very small) negative exposures, as you notice from the following plot)

We add the correction term as a new asset class and we adjust names and order of columns

## adding the correction to 
corr = data.table(diffe[,.(BANK,correction)],COUNTRY_ASSET = 'Correction',INFORMATION_DESC='Correction',INFORMATION_CODE=7000, C_COUNTRY_CODE='Correction')
setnames(corr,'correction','mlnEuro')
setcolorder(corr,names(edgelist11))
edgelist11 = rbindlist(list(edgelist11,corr))
# add total exposure column (via sum, now that correction is taken into account, sum and tot.exp.0 match)
edgelist11 = edgelist11[,tot.exp := sum(mlnEuro), by=BANK][]
edgelist11 = edgelist11[mlnEuro!=0]


# metto i nomi interi delle banche e la nazionalità secondo 'code_bank11' e elimino BANK

edgelist11 = merge(edgelist11, code_bank11, by='BANK')
names(edgelist11)
## [1] "BANK"             "COUNTRY_ASSET"    "INFORMATION_CODE"
## [4] "INFORMATION_DESC" "C_COUNTRY_CODE"   "mlnEuro"         
## [7] "tot.exp"          "Bank_country"     "Bank_name"
setcolorder(edgelist11,c('BANK','Bank_country','Bank_name','C_COUNTRY_CODE','INFORMATION_CODE','INFORMATION_DESC','COUNTRY_ASSET','mlnEuro','tot.exp'))

# cambio i nomi per avere un po' di omogeneità con 2014 2015
setnames(edgelist11,c('BANK','COUNTRY_ASSET','INFORMATION_CODE','INFORMATION_DESC','C_COUNTRY_CODE'),c('Bank_code','Country_Asset','Exposure','Exposure.label','Country.label'))

# check
merge(tot.exp.0,unique(edgelist11[,.(Bank_code,tot.exp)]),by.y='Bank_code',by.x='BANK')
##      BANK  total.asset      tot.exp
##  1: AT001  222289.9205  222289.9205
##  2: AT002  164670.5921  164670.5921
##  3: AT003   45406.0115   45406.0115
##  4: BE004  516289.5695  516289.5695
##  5: BE005       0.0000  282029.5179
##  6: CY006   42864.6967   42864.6967
##  7: CY007   37587.8025   37587.8025
##  8: DE017 1098914.5830 1098914.5830
##  9: DE018  623652.4000  623652.4000
## 10: DE019  360856.6000  360856.6000
## 11: DE020  314422.3000  314422.3000
## 12: DE021  255909.0000  255909.0000
## 13: DE022  246849.4903  246849.4903
## 14: DE023  214902.1030  214902.1030
## 15: DE024  174953.5433  174953.5433
## 16: DE025  168304.8818  168304.8818
## 17: DE027  123331.3333  123331.3333
## 18: DE028   83163.3748   83163.3748
## 19: DE029   89720.9464   89720.9464
## 20: DK008  418642.4316  418642.4316
## 21: DK009   28565.9019   28565.9019
## 22: DK010   17890.0000   17890.0000
## 23: DK011  160095.0000  160095.0000
## 24: ES059 1007719.1151 1007719.1151
## 25: ES060  545335.9780  545335.9780
## 26: ES061  278523.4806  278523.4806
## 27: ES062  259731.4161  259731.4161
## 28: ES063   45624.1919   45624.1919
## 29: ES064  130654.0000  130654.0000
## 30: ES065   92107.5623   92107.5623
## 31: ES066   65285.2714   65285.2714
## 32: ES067   66751.1316   66751.1316
## 33: ES068   64072.2018   64072.2018
## 34: ES069   50239.3723   50239.3723
## 35: ES070   44018.0249   44018.0249
## 36: ES071   66545.1663   66545.1663
## 37: ES072   40287.2940   40287.2940
## 38: ES073   31156.5674   31156.5674
## 39: ES074   29267.0000   29267.0000
## 40: ES075   41709.9781   41709.9781
## 41: ES076   24653.0282   24653.0282
## 42: ES077   19343.3668   19343.3668
## 43: ES078   19068.0507   19068.0507
## 44: ES079    9656.0000    9656.0000
## 45: ES080    8054.8427    8054.8427
## 46: ES081     893.0187     893.0187
## 47: ES082     338.7597     338.7597
## 48: ES083   66350.7613   66350.7613
## 49: FI012   75418.9083   75418.9083
## 50: FR013 1307151.6579 1307151.6579
## 51: FR014 1296021.5609 1296021.5609
## 52: FR015  791231.9260  791231.9260
## 53: FR016  711131.8836  711131.8836
## 54: GB088 1121686.5695 1121686.5695
## 55: GB089 1444276.2734 1444276.2734
## 56: GB090  856348.7038  856348.7038
## 57: GB091 1044712.6154 1044712.6154
## 58: GR030   79888.8256   79888.8256
## 59: GR031  106460.6566  106460.6566
## 60: GR032   63793.1144   63793.1144
## 61: GR033   47205.4840   47205.4840
## 62: GR034   26799.8619   26799.8619
## 63: GR035   15094.8000   15094.8000
## 64: HU036   38659.8188   38659.8188
## 65: IE037  147923.7656  147923.7656
## 66: IE038  158491.3905  158491.3905
## 67: IE039   47638.0000   47638.0000
## 68: IT040  586863.4088  586863.4088
## 69: IT041  834979.2012  834979.2012
## 70: IT042  205346.4956  205346.4956
## 71: IT043  128139.7518  128139.7518
## 72: IT044  140146.2366  140146.2366
## 73: LU045   40808.7030   40808.7030
## 74: MT046    5601.6680    5601.6680
## 75: NL047  907443.7202  907443.7202
## 76: NL048  621466.5523  621466.5523
## 77: NL049  288941.0000  288941.0000
## 78: NL050   65007.0303   65007.0303
## 79: NO051  213015.0537  213015.0537
## 80: PL052   43684.1991   43684.1991
## 81: PT053  117687.2643  117687.2643
## 82: PT054   99125.2271   99125.2271
## 83: PT055   83414.7563   83414.7563
## 84: PT056   49268.4400   49268.4400
## 85: SE084  454432.9407  454432.9407
## 86: SE085  181970.2883  181970.2883
## 87: SE086  225400.3088  225400.3088
## 88: SE087  175354.5211  175354.5211
## 89: SI057   20488.0000   20488.0000
## 90: SI058    6013.0000    6013.0000
##      BANK  total.asset      tot.exp
# ok, they are the same (a part for BE005)


# now edgelist is 'complete'

#write.csv(edgelist11,file='./2011/edgelist11.csv',row.names=F)

head(edgelist11)
##    Bank_code Bank_country              Bank_name Country.label Exposure
## 1:     AT001      Austria ERSTE BANK GROUP (EBG)            AT    33010
## 2:     AT001      Austria ERSTE BANK GROUP (EBG)            CZ    33010
## 3:     AT001      Austria ERSTE BANK GROUP (EBG)            DE    33010
## 4:     AT001      Austria ERSTE BANK GROUP (EBG)            ES    33010
## 5:     AT001      Austria ERSTE BANK GROUP (EBG)            FR    33010
## 6:     AT001      Austria ERSTE BANK GROUP (EBG)            GB    33010
##                           Exposure.label
## 1: Non-defaulted exposures--Institutions
## 2: Non-defaulted exposures--Institutions
## 3: Non-defaulted exposures--Institutions
## 4: Non-defaulted exposures--Institutions
## 5: Non-defaulted exposures--Institutions
## 6: Non-defaulted exposures--Institutions
##                               Country_Asset  mlnEuro  tot.exp
## 1: AT_Non-defaulted exposures--Institutions 4552.000 222289.9
## 2: CZ_Non-defaulted exposures--Institutions 3350.557 222289.9
## 3: DE_Non-defaulted exposures--Institutions 2699.000 222289.9
## 4: ES_Non-defaulted exposures--Institutions  437.000 222289.9
## 5: FR_Non-defaulted exposures--Institutions  691.000 222289.9
## 6: GB_Non-defaulted exposures--Institutions 2653.000 222289.9

bipartite graph plotting

We plot the bipartite graph BANKS –> COUNTRY_ASSET. The weights are calculated (following Greenwood and others) as percentage of that particular asset class exposure over the total exposures of that bank. Thus, total strength per bank sum to 100 (roughly, due to rounding to integer values for weights).

The size of the nodes is proportional to the degree of the nodes. The vertical order is: + for Banks: descreasing in total exposure (the upper the bigger) + for Asset classes: increasing in total strength (the upper the smaller)

Bank nodes are colored according to their country.

#net plot
library(igraph)

# calculate weigths for country_asset / tot.exp

# calculate weigths for country_asset / tot.exp
edgelist = edgelist11[,sum(mlnEuro), by=.(Bank_name,Country_Asset,tot.exp)] 
edgelist[,weight:=ifelse(tot.exp,round(100*V1/tot.exp,digits=0),0), by=.(Bank_name,Country_Asset)]
edgelist$V1=NULL
head(edgelist[,sum(weight),by=Bank_name]) # should sum to 100, up to rounding issues
hist(edgelist[,sum(weight),by=Bank_name][,V1], main='bank total strength distribution')

edgelist = edgelist[weight!=0]
#names(edgelist)

g = graph_from_data_frame(edgelist,directed=F)
V(g)$type = V(g)$name %in% edgelist$Bank_name
summary(g)

V(g)$shape = 'circle'


V(g)$size[V(g)$type] = 10+30*degree(g,V(g)[V(g)$type])/max(degree(g,V(g)[V(g)$type]))
V(g)$size[!V(g)$type] = 10+30*degree(g,V(g)[!V(g)$type])/max(degree(g,V(g)[!V(g)$type]))

# build bipartite layout
exp = unique(edgelist[,.(Bank_name,tot.exp)])
country.bank = unique(edgelist11[,.(Bank_name,Country.label)])
V(g)$exposure[V(g)$type] = exp[match(V(g)$name[V(g)$type],Bank_name),tot.exp]
V(g)$country[V(g)$type] = country.bank[match(V(g)$name[V(g)$type],Bank_name),Country.label]

banks.deg = order(V(g)$exposure[V(g)$type])
asset.deg = order(strength(g,V(g)[!V(g)$type]),decreasing = TRUE)
len = length(V(g)[V(g)$type])
height.bank = 10/(len-1)
height.asset = 10/(vcount(g)-len-1)
l.bank = data.frame(x=rep(0,len), y=seq(from=0,by=height.bank,to=10)[order(banks.deg)])
l.asset = data.frame(x=rep(5,(vcount(g)-len)), y=seq(from=0,by=height.asset,to=10)[order(asset.deg)])
l = rbind(l.bank,l.asset)


col = rainbow(length(unique(V(g)$country))+1, alpha=0.5)
aa = V(g)$country[V(g)$type]
V(g)$color[V(g)$type] = col[match(aa,unique(aa))]
V(g)$color[!V(g)$type] = col[length(col)]
plot(g,
     layout=l,
     vertex.label.font=1, 
     vertex.label.cex=.7,
     vertex.label.color='black',
     vertex.frame.color='white',
     #vertex.shape = 'none',
     edge.width = 1+E(g)$weight/max(E(g)$weight),
     edge.color = 'grey',
     rescale=F,
     edge.curved=0,
     main=paste('2011 exposure network'),
     #axes=T,
     asp=1,
     xlim=range(l$x),
     ylim=range(l$y)
)

2016 stress-test data

data reading

rm(list=ls())
library(data.table)
setwd('~/Documents/SoBigData/eba/')
# read all datasets
eba16_cred= fread('./2016/TRA_CR.csv', dec='.',encoding = "Latin-1")
eba16_sov= fread('./2016/TRA_SOV.csv',dec='.')
eba16_cap = fread('./2016/TRA_OTH.csv',dec='.',encoding='Latin-1')
code_item16 = fread('./2016/code_item.csv',header=T) # header=T perché il primo nome è un '3'
code_country16 = fread('./2016/code_country.csv')
code_exposure16 = fread('./2016/code_exposure.csv')

# in 2016 non c'è un code_bank perché il Nome delle banche è dentro ai dataframe insieme al LEI, quindi, molto semplicemente:

code_bank16 = unique(eba16_cred[,.(LEI_code,Bank_name)])
#write.csv(code_bank16, file='./2016/code_bank16.csv',row.names = F)

head(eba16_cred)
##                LEI_code Country_code           Bank_name Period    Item
## 1: PQOH26KWDF7CG10L6792           AT Erste Group Bank AG 201512 1690201
## 2: PQOH26KWDF7CG10L6792           AT Erste Group Bank AG 201512 1690201
## 3: PQOH26KWDF7CG10L6792           AT Erste Group Bank AG 201512 1690201
## 4: PQOH26KWDF7CG10L6792           AT Erste Group Bank AG 201512 1690201
## 5: PQOH26KWDF7CG10L6792           AT Erste Group Bank AG 201512 1690201
## 6: PQOH26KWDF7CG10L6792           AT Erste Group Bank AG 201512 1690201
##    Scenario Portfolio Country Country_rank Exposure Status Perf_Status
## 1:        1         4       0            0     1100      1           0
## 2:        1         4       0            0     2000      1           0
## 3:        1         4       0            0     3000      1           0
## 4:        1         4       0            0     3100      1           0
## 5:        1         4       0            0     3200      1           0
## 6:        1         4       0            0     4000      1           0
##      Amount
## 1:     0.00
## 2:     0.00
## 3:     0.00
## 4:     0.00
## 5:     0.00
## 6: 61288.93

filtering data

Here we choose Scenario=1 meaning take actual figures and not stress scenarios. Then we select the three Items corresponing to EAD values in the dataset, three since there are the three paradigms SA, A-IRB and F-IRB (the difference actually resides only in the way risk-related figures are calculated: for exposure values there is no consequence on numbers, but still, in the dataset they are distinct)

## [1] "Exposure values - (SA)"
## [1] "Exposure values - (IRB)"

Then we select counterparty countries (we actually remove only Country=0, which is the TOTAL country)

country16 = c(1:249) # uguale al 2014. dopo 244 (fino a 249) sono ridondanti (credo). tra 44 e 50 non c'è nulla. 0 = tot
# ho controllato 245:249 hanno zero entries in eba16_cred

data16.country = data16[Country %in% country16]
#data16.country = data16[Country == 0]
data16.country = merge(data16.country,code_country16,by='Country',all.x=T) # attaching counterparty country names
setnames(data16.country,'Label','Country.label')
# data divided in countries (not containing the TOTAL country values)

asset class selection

Here we select asset classes. As before, we choose liquid and illiquid asset classes. As I said in the beginning, here we have a problem with the EAD type: SA and IRB EADs –which must be summed for each bank– are divided into different asset classes. This creates a problem when wanting to select one set of asset classes.

## liquid asset classes
##  [1] "Central banks and central governments"                            
##  [2] "Regional governments or local authorities"                        
##  [3] "Public sector entities"                                           
##  [4] "Multilateral Development Banks"                                   
##  [5] "International Organisations"                                      
##  [6] "Central banks"                                                    
##  [7] "General governments"                                              
##  [8] "Institutions"                                                     
##  [9] "Credit institutions"                                              
## [10] "Other financial corporations"                                     
## [11] "Corporates"                                                       
## [12] "Equity"                                                           
## [13] "Securitisation"                                                   
## [14] "Covered bonds"                                                    
## [15] "Items associated with particularly high risk"                     
## [16] "Claims on institutions and corporates with a ST credit assessment"
## [17] "Collective investments undertakings (CIU)"
## illiquid asset classes
## [1] "Retail"                                    
## [2] "Households"                                
## [3] "Secured by mortgages on immovable property"
## [4] "Other non-credit obligation assets"

in the EBA file Methodological_note available on the website, they show that there are two different clustering of exposures (namley, asset classes) depending on which type of method (SA or IRB) the bank is using for a specific exposure. In any case, it is apparent that the IRB clustering is the reference, and they show the following IRB – SA ‘dictionary’

  • 1100 – 1100,1200 Central gov and banks
  • 2000 – 1300,1400,1500,(1600,1700),2000,6500 institutions
  • 3000 – 3000,5000 (corporate share),6400,6600,6700 corporates
  • 3100 – -SME
  • 3200 – -non-SME
  • 4000 – 4000,5000 (retail share) Retail
  • 4100 – 5000 (retail share) -Secured by real-estate
  • 4110 – 5000 (retail SME share) –SME
  • 4120 – 5000 (retail non-SME share) –non-SME
  • 4200 – 5000 (qualifying revolving share) -qualifying revolving
  • 4300 – 5000 (non-qualifying revolving share) -others
  • 4310 – 5000 (non-qualifying revolving share SME) –SME
  • 4320 – 5000 (non-qualifying revolving share non-SME) –non-SME
  • 6100 – 6100 equity
  • 6200 – 6200 securitization
  • 6300 – 6300 others

The problem is in the SA 5000 Secured by mortgages on immovable property Asset class, which is horizontal to IRB. I decide to keep 5000 as a separate asset class.

## selected asset classes
## [1] "Central banks and central governments"     
## [2] "Institutions"                              
## [3] "Corporates"                                
## [4] "Retail"                                    
## [5] "Secured by mortgages on immovable property"
## [6] "Equity"                                    
## [7] "Securitisation"                            
## [8] "Other non-credit obligation assets"

capital

## capital
## [1] "COMMON EQUITY TIER 1 CAPITAL (net of deductions and after applying transitional adjustments)"
##                LEI_code                            Bank_name   mlnEuro
## 1: PQOH26KWDF7CG10L6792                  Erste Group Bank AG 12135.725
## 2: 529900JP9C734S1LE008 Raiffeisen-Landesbanken-Holding GmbH  7538.001
## 3: A5GWLFH3KM7YV2SFQL84                    Belfius Banque SA  7478.840
## 4: 213800X3Q9LSAKRUWY91                         KBC Group NV 13244.497
## 5: 7LTWFZYICNSX8D621K86                     Deutsche Bank AG 52429.453
## 6: 851WYGNLUQLFZBSYGB56                       Commerzbank AG 27303.396

total exposure calculation and check

Check the difference between total exposures calculated by summing over all countries and all asset classes and total exposures declared by banks (i.e. labeled by Country=0 and Exposure=0)

# we sum over Country and Exposure 
total.exposure.df = data16.country.asset[,sum(mlnEuro),by=.(LEI_code,Country_code,Bank_name)]
setnames(total.exposure.df,'V1','total.exposure') 

# here we take instead Country==0 Exposure==0, to check differences
total.exposure.2df = eba16_cred[Country==0 & Scenario==1 & Exposure == 0 & Item %in% c(sa,irb) & Portfolio %in% c(1,3,4)]
total.exposure.2df = total.exposure.2df[,c('Item','Portfolio'):=NULL]
total.exposure.2df = total.exposure.2df[,sum(Amount),by=.(LEI_code,Country_code,Bank_name)]
setnames(total.exposure.2df,'V1','total.exposure') 

diff.DT = merge(total.exposure.df,total.exposure.2df,by=c('LEI_code','Country_code','Bank_name'))
## Warning in bmerge(i, x, leftcols, rightcols, io, xo, roll, rollends,
## nomatch, : A known encoding (latin1 or UTF-8) was detected in a join
## column. data.table compares the bytes currently, so doesn't support *mixed*
## encodings well; i.e., using both latin1 and UTF-8, or if any unknown
## encodings are non-ascii and some of those are marked known and others
## not. But if either latin1 or UTF-8 is used exclusively, and all unknown
## encodings are ascii, then the result should be ok. In future we will check
## for you and avoid this warning if everything is ok. The tricky part is
## doing this without impacting performance for ascii-only cases.
setnames(diff.DT, c('total.exposure.x','total.exposure.y'), c('tot.exp.sum','tot.exp.0'))

head(diff.DT)
##                LEI_code Country_code
## 1: 0W2PZJM8XOY22M4GG883           DE
## 2: 2138005O9XJIJN4JPN90           GB
## 3: 213800X3Q9LSAKRUWY91           BE
## 4: 2W8N8UU78PMDQKZENC08           IT
## 5: 3M5E1GQGKL17HI6CPN30           DK
## 6: 3U8WV1YX2VMUHH7Z1Q21           IE
##                                                  Bank_name tot.exp.sum
## 1:                          DekaBank Deutsche Girozentrale    64225.12
## 2: The Royal Bank of Scotland Group Public Limited Company   721015.88
## 3:                                            KBC Group NV   216117.64
## 4:                                  Intesa Sanpaolo S.p.A.   485762.89
## 5:                                              Jyske Bank    62897.48
## 6:                                  Allied Irish Banks plc   120261.35
##    tot.exp.0
## 1:  70970.01
## 2: 794123.35
## 3: 235729.81
## 4: 536135.96
## 5:  67541.48
## 6: 127177.49

As you can see, (and as in 2011 and 2014) there is no matching: declared total exposures do not match the sum of single declared exposures. In this case, total exposures always dominate exposures sum

library(ggplot2)
library(reshape2)

diff.DT2 = diff.DT[,tot.exp.0 - tot.exp.sum,by=.(LEI_code,Bank_name,tot.exp.sum)][]; setnames(diff.DT2,'V1','correction')
diff.DT2[,correction.rel:=correction/tot.exp.sum*100]
diff.DT2 = melt(diff.DT2,id.vars = c('LEI_code','Bank_name'))

gr=ggplot(data=diff.DT2, aes(x=Bank_name, y=value, color=variable)) 
gr + geom_line(aes(group=variable),data=diff.DT2[variable!='correction.rel']) + geom_point(size=3,data=diff.DT2[variable!='correction.rel'])  + theme_bw()# + geom_point(size=10, data=asset.long[bank == 'DE025' & !(variable %in% c('diff.perc','diff'))])

gr + geom_line(data=diff.DT2[variable=='correction.rel'],aes(group=variable)) + geom_point(size=3,data=diff.DT2[variable=='correction.rel']) + theme_bw()

As you can see, there are significant differences, in average 10.498822 percent. Thus, ass done before, we add a new Asset Class Correction in order to account for this discrepancies.

building final edgelist, introducing correction term and Asset_Class column

As before, we build an edgelist with a rationale BANK –> ASSET CALSS, where asset classes are the ones identified above, but with Sovereign Exposures (Central Banks and Institutions) divided by Country as well

edgelist16 = data16.country.asset


# correction introduction based on diff.DT
# country 1000 means no country, Exposure 7000 is correction
diff.DT[,mlnEuro := tot.exp.0-tot.exp.sum,by=.(LEI_code,Country_code,Bank_name)]
diff.DT[,Country:=1000][,Country.label:='Correction'][,Exposure:=7000][,Exposure.label:='Correction'][,tot.exp.0:=NULL][,tot.exp.sum:=NULL]
setcolorder(diff.DT,c('LEI_code','Country_code','Bank_name','Country','Country.label','Exposure','Exposure.label','mlnEuro'))
names(diff.DT)
edgelist16 =rbindlist(list(edgelist16,diff.DT)) 

# remove 0 value exposure
edgelist16 = edgelist16[mlnEuro != 0]


# creating country_asset variable

edgelist16[,tot.exp:=sum(mlnEuro), by=.(LEI_code,Country_code,Bank_name)]

# producing a data.table with banks -> country asset, for all countries and all assets
edgelist16 = cbind(data.table('Country_Asset'=paste(edgelist16$Country.label,edgelist16$Exposure.label,sep='_')),edgelist16)
setcolorder(edgelist16, c('LEI_code','Country_code','Bank_name','Country','Country.label','Exposure','Exposure.label','Country_Asset','mlnEuro','tot.exp'))


# producing a data.table with banks -> country asset, but countries are differentiated only for Exposure = Central banks and central gov and Institutions. Of course, also corresponding weights are going to change
sov.asset16 = c(1100,1200,1300,1400,1500,2000)
edgelist16[!(Exposure %in% sov.asset16), Country_Asset:=Exposure.label]


head(edgelist16)

We end up with an edgelist of dimensions 3013, 10. The number of asset classes is 222 while the number of banks 51

bipartite graph plotting

As above, we plot the bipartite graph BANKS –> COUNTRY_ASSET. The weights are calculated (following Greenwood and others) as percentage of that particular asset class exposure over the total exposures of that bank. Thus, total strength per bank sum to 100 (roughly, due to rounding to integer values for weights).

The size of the nodes is proportional to the degree of the nodes. The vertical order is:

  • for Banks: descreasing in total exposure (the upper the bigger)
  • for Asset classes: increasing in total strength (the upper the smaller)

Bank nodes are colored according to their country.

library(igraph)

# calculate weigths for country_asset / tot.exp
edgelist = edgelist16[,sum(mlnEuro), by=.(Bank_name,Country_Asset,tot.exp)] 
edgelist[,weight:=ifelse(tot.exp,round(100*V1/tot.exp,digits=0),0), by=.(Bank_name,Country_Asset)]
edgelist$V1=NULL
head(edgelist[,sum(weight),by=Bank_name]) # should sum to 100, up to rounding issues
hist(edgelist[,sum(weight),by=Bank_name][,V1], main='bank total strength distribution')

edgelist = edgelist[weight!=0]
#names(edgelist)

g = graph_from_data_frame(edgelist,directed=F)
V(g)$type = V(g)$name %in% edgelist$Bank_name
summary(g)

V(g)$shape = 'circle'


V(g)$size[V(g)$type] = 10+30*degree(g,V(g)[V(g)$type])/max(degree(g,V(g)[V(g)$type]))
V(g)$size[!V(g)$type] = 10+30*degree(g,V(g)[!V(g)$type])/max(degree(g,V(g)[!V(g)$type]))

# build bipartite layout
exp = unique(edgelist[,.(Bank_name,tot.exp)])
country.bank = unique(edgelist16[,.(Bank_name,Country_code)])
V(g)$exposure[V(g)$type] = exp[match(V(g)$name[V(g)$type],Bank_name),tot.exp]
V(g)$country[V(g)$type] = country.bank[match(V(g)$name[V(g)$type],Bank_name),Country_code]

banks.deg = order(V(g)$exposure[V(g)$type])
asset.deg = order(strength(g,V(g)[!V(g)$type]),decreasing = TRUE)
len = length(V(g)[V(g)$type])
height.bank = 10/(len-1)
height.asset = 10/(vcount(g)-len-1)
l.bank = data.frame(x=rep(0,len), y=seq(from=0,by=height.bank,to=10)[order(banks.deg)])
l.asset = data.frame(x=rep(5,(vcount(g)-len)), y=seq(from=0,by=height.asset,to=10)[order(asset.deg)])
l = rbind(l.bank,l.asset)


col = rainbow(length(unique(V(g)$country))+1, alpha=0.5)
aa = V(g)$country[V(g)$type]
V(g)$color[V(g)$type] = col[match(aa,unique(aa))]
V(g)$color[!V(g)$type] = col[length(col)]