我正在使用R中的SNA(statnet 套件)和igraph开始描述性网络分析。我想知道使用哪个套件来研究我的网络的不同属性,因为它们的功能略有不同,这使得它们不能完全互换。
不幸的是,我注意到 2 包返回不同的结果(例如,SNA 识别的最大 clique 大小为 8,而 igraph 达到 17!。我从同一个边缘列表开始构建两个对象,存储为数据框。我可以使用 intergraph 得出相同的结论,但我如何定义哪个包创建了正确的网络?
编辑
我被要求提供可重现的例子。提供数据将毫无用处,因为我无法为它们创建有意义的子样本。不幸的是,它们是私人数据,我无法传播它们。只是为了给出一个想法,下面我提供了我的data.frame的头部,其中包含在代码'fdi.edge.2003'中命名的edgelist,属于'data.frame'类
> dput(head(fdi.edge.2003, 10))
structure(list(iso_o = c("ARE", "ARE", "ARE", "ARE", "ARE", "ARE",
"ARE", "ARE", "ARE", "ARE"), iso_d = c("AUS", "AUT", "BGD", "BHR",
"CHE", "CHN", "EGY", "ESP", "FIN", "GBR"), year = c(2003, 2003,
2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003), all_num = c(1L,
2L, 1L, 3L, 1L, 2L, 3L, 1L, 1L, 1L), all_inv_tot = c(200, 298.6,
10, 21.1, 16.1, 30, 36.5, 5.6, 24.6, 8), all_inv_avg = c(200,
149.3, 10, 7.03333333333333, 16.1, 15, 12.1666666666667, 5.6,
24.6, 8), all_job_tot = c(438, 789, 81, 358, 18, 173, 67, 70,
40, 77), all_job_avg = c(438, 394.5, 81, 119.333335876465, 18,
86.5, 22.3333339691162, 70, 40, 77), greenf_num = c(1, 2, 0,
3, 1, 2, 3, 1, 0, 1), greenf_inv_tot = c(200, 298.6, 0, 21.1,
16.1, 30, 36.5, 5.6, 0, 8), greenf_inv_avg = c(200, 149.300003051758,
0, 7.03333330154419, 16.1000003814697, 15, 12.1666669845581,
5.59999990463257, 0, 8), greenf_job_tot = c(438, 789, 0, 358,
18, 173, 67, 70, 0, 77), greenf_job_avg = c(438, 394.5, 0, 119.333335876465, 18, 86.5, 22.3333339691162, 70, 0, 77), coloc_num = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0), coloc_inv_tot = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), coloc_inv_avg = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), coloc_job_tot = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), coloc_job_avg = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), expan_num = c(0, 0, 1, 0, 0, 0, 0, 0, 1, 0),
expan_inv_tot = c(0, 0, 10, 0, 0, 0, 0, 0, 24.6, 0), expan_inv_avg = c(0,
0, 10, 0, 0, 0, 0, 0, 24.6000003814697, 0), expan_job_tot = c(0,
0, 81, 0, 0, 0, 0, 0, 40, 0), expan_job_avg = c(0, 0, 81,
0, 0, 0, 0, 0, 40, 0), no_flow = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), grav_noinv = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0), dist = c(12092.1591796875, 4245.841796875, 3643.56079101562,
426.657623291016, 4830.7373046875, 5966.42724609375, 2370.06079101562,
5634.66748046875, 4570.3173828125, 5483.74462890625), distw = c(11543.8054501625,
4324.89105693665, 3561.5309768755, 490.192080768776, 4831.20286380844,
5975.25361008682, 2447.29938508511, 5605.06845714264, 4638.03680032616,
5594.32698249168), contig = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), comrelig = c(0.00378700019791722, 0.0094410004094243,
0.815205037593842, 0.901609003543854, 0.00625500036403537,
0.0227760020643473, 0.776296019554138, 0.00387900019995868,
0.00279700011014938, 0.0142930001020432), comlang_off = c(0L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L), comlang_ethno = c(0L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L), comcol = c(0L, 0L, 1L,
1L, 0L, 0L, 0L, 0L, 0L, 0L), colony = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L), comleg_pretrans = c(1L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 1L), comcur = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), tdiff = c(5.35000038146973, 3, 2, 1, 3,
4, 2, 3.5, 2, 4), conflict = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, 0L), smctry = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), subg = c(3, 3, 4, 4, 3, 4, 4, 3, 3, 3), same_reg = c(0,
0, 0, 1, 0, 0, 1, 0, 0, 0), same_inc = c(1, 1, 0, 1, 1, 0,
0, 1, 1, 1), simil_inc = c(1, 1, 0, 1, 1, 0, 0, 1, 1, 1)), datalabel = "fDIMarket data aggregati per origine-destinazione-anno", time.stamp = " 6 Dec 2017 16:07", formats = c("%9s",
"%44s", "%9s", "%44s", "%9.0g", "%9.0g", "%10.0g", "%10.0g",
"%10.0g", "%10.0g", "%8.0g", "%10.0g", "%9.0g", "%10.0g", "%9.0g",
"%8.0g", "%10.0g", "%9.0g", "%10.0g", "%9.0g", "%8.0g", "%10.0g",
"%9.0g", "%10.0g", "%9.0g", "%8.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9.0g", "%12.0g", "%9.0g", "%9.0g", "%9.0g",
"%12.0g", "%8.0g", "%9.0g", "%8.0g", "%8.0g", "%8.0g", "%9.0g",
"%8.0g", "%9.0g", "%9.0g", "%8.0g", "%9.0g", "%8.0g", "%8.0g",
"%9s", "%8.0g", "%8.0g", "%9.0g", "%9.0g", "%9.0g", "%8.0g",
"%8.0g", "%9.0g", "%8.0g", "%8.0g", "%8.0g", "%9s", "%9s", "%9s",
"%9s", "%8.0g", "%8.0g", "%9s", "%9.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9s", "%10.0g", "%9.0g", "%9.0g", "%9s", "%9.0g",
"%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9s", "%10.0g",
"%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g",
"%10.0g", "%10.0g", "%9.0g", "%9.0g", "%9.0g", "%9.0g", "%40.0g",
"%40.0g", "%10.0g", "%10.0g", "%10.0g", "%10.0g", "%10.0g", "%10.0g",
"%9.0g", "%9.0g", "%8.0g", "%8.0g", "%9.0g", "%9.0g", "%9.0g",
"%9.0g", "%9.0g", "%9.0g", "%9.0g"), types = c(3L, 44L, 3L, 44L,
65527L, 65528L, 65526L, 65526L, 65526L, 65527L, 65526L, 65526L,
65527L, 65526L, 65527L, 65526L, 65526L, 65527L, 65526L, 65527L,
65526L, 65526L, 65527L, 65526L, 65527L, 65530L, 65527L, 65527L,
65526L, 65526L, 65526L, 65527L, 65528L, 65526L, 65526L, 65527L,
65528L, 65530L, 65527L, 65530L, 65530L, 65530L, 65530L, 65530L,
65530L, 65530L, 65530L, 65527L, 65530L, 65529L, 3L, 65530L, 65530L,
65530L, 65530L, 65530L, 65530L, 65530L, 65527L, 65530L, 65530L,
65530L, 2L, 2L, 2L, 2L, 65530L, 65530L, 2L, 65527L, 65527L, 65527L,
65527L, 65527L, 65527L, 3L, 65530L, 65527L, 65527L, 2L, 65527L,
65527L, 65527L, 65527L, 65527L, 65527L, 3L, 65530L, 65527L, 65527L,
65527L, 65526L, 65530L, 65530L, 65530L, 65529L, 65530L, 65530L,
65530L, 65530L, 65530L, 65530L, 65530L, 65526L, 65526L, 65530L,
65530L, 65526L, 65526L, 65527L, 65527L, 65530L, 65530L, 65527L,
65527L, 65527L, 65527L, 65527L, 65527L, 65527L), val.labels = structure(c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "gspflag", "gspflag", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""), .Names = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "gspflag", "gspflag", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "")), var.labels = c("iso_3",
"Parent Country (Name)", "iso_3", "Host Country (Name)", "Reference year",
"Total # of Investments by origin in destination, in reference year",
"Total value of investments by origin in destination, in reference year",
"Average value of investments flown by origin in destination, in reference year",
"Total # of jobs created by year-origin-dest", "Average # of jobs created by investments by origin in destination, in reference ",
"Greenfield - Number of Projects in reference year year", "Greenfield - Total value by origin in destination, in reference year",
"Yearly average value of greenfield projects in the channel",
"Greenfield - Total # of jobs by origin in destination, in reference year",
"Average # of Jobs created by greenfield projects", "Co-location - Number of Projects in reference year year",
"Co-location - Total value by origin in destination, in reference year",
"Yearly average value of co-location projects in the channel",
"Co-location - Total # of jobs by origin in destination, in reference year",
"Average # of Jobs created by co-location projects", "Greenfield - Number of Projects in reference year year",
"Expansion - Total value by origin in destination, in reference year",
"Yearly average value of expansion projects in the channel",
"Expansion - Total # of jobs by origin in destination, in reference year",
"Average # of Jobs created by expansion projects", "Dummy = 1 if no investment in the year-cty_pair observation were recorded",
"Only gravity information - no investments. kept for merge with migration",
"simple distance (most populated cities, km)", "weighted distance (pop-wt, km)",
"Population, total in mn", "GDP (current US$)", "GDP per cap (current US$)",
"Area in sq. kms", "Population, total in mn", "GDP (current US$)",
"GDP per cap (current US$)", "Area in sq. kms", "1=Contiguity",
"1=Common religion", "1=Common official or primary language",
"1=Language is spoken by at least 9% of the population", "1=Common colonizer post 1945",
"1=Pair ever in colonial relationship", "1=Pair in colonial relationship post 1945",
"1=Common legal origins before transition", "1=Common legal origins after transition",
"1=Common currency", "nb of hours difference between ex and im",
"1=War", "Independence date if colony=1", "hegemon if cursib==1",
"1=Origin is current or former hegemon of destination", "1=Destination is current or former hegemon of origin",
"1=Trade from heg_o to colony", "1=Trade from colony to heg_d",
"1=Pair currently in colonial relationship", "1=Origin is GATT/WTO member",
"1=Destination is GATT/WTO member", "= 1 if a BIT ever existed between o and d",
"1=RTA (Source: WTO, 2015)", "1=FTA;2=Cust. Union;3=Common Market;4=Economic union (Baier & Bergstrand, 2009)",
"1=FTA (Source: Head, Mayer and Ries, 2010)", "origin legal system before transition",
"destination legal system before transition", "origin legal system after transition",
"destination legal system after transition", "1=ACP to EU", "1=EU to ACP",
"Income Category. H=high L=Low etc", "1 if income category = H",
"1 if income category = L", "1 if income category = LM", "1 if income category = UM",
"1 if fragile state", "1 if small state", "Regional 3-char Code - WB definition",
"1 if member of OECD", "1 if country is included among least developed countries",
"1 if no information on income level / region is available for FDI parent country",
"Income Category. H=high L=Low etc", "1 if income category = H",
"1 if income category = L", "1 if income category = LM", "1 if income category = UM",
"1 if fragile state", "1 if small state", "Regional 3-char Code - WB definition",
"1 if member of OECD", "1 if country is included among least developed countries",
"1 if no information on income level / region is available for FDI host country",
"simple distance between capitals (capitals, km)", "weighted distance (pop-wt, km) CES distances with theta=-1",
"1 if countries were or are the same country", "1=Pair ever in sibling relationship",
"1=Pair currently in sibling relationship", "severance year for pairs if sibling == 1",
"1=Pair ever in sibling relationship and conflict", "1=Common legal origin changed since transition",
"1=Non-reciprocal PTA ; 2=PTA (Source: Baier & Bergstrand, 2009)",
"1=Origin is donator", "1=Destination is donator", "report of changes in Rose data",
"report of changes in Rose data", "Cost of business start-up procedures (% of GNI per capita)",
"Cost of business start-up procedures (% of GNI per capita)",
"Start-up procedures to register a business (number)", "Start-up procedures to register a business (number)",
"Time required to start a business (days)", "Time required to start a business (days)",
"Days+Procs to start a business", "Days+Procs to start a business",
"1=Origin is a EU member", "1=Destination is a EU member", "group(iso_o iso_d)",
"", "", "", "", "", ""), version = 118L, label.table = structure(list(gspflag = structure(0:3, .Names = c("no gsp recorded in Rose",
"data directly from Rose", "changes in data from Rose", "Assumption that gsp continues after 1999"))), .Names = "gspflag"), expansion.fields = list(c("no_region_o", "note1", "Following small countries have no information because not independent countries: Anguilla; Christmas Island; Cocos and Keeling Islands; Cook Islands; Falkland Islands; French Guiana; Guadeloupe; Martinique; Montserrat; Netherland Antilles; Niue; Norfolk Island; Pitcairn; Reunion; Saint Helena, Ascension and Tristan d..; Saint Pierre and Miquelon; Tokelau; Wallis and Futuna; Western Sahara. notes no_region_o: Serbia and Montenegro has no information Beacuse is treated as separated countries. see variables no_flow and grav_noinv for details"), c("_dta", "note11", "Adesione OECD da fonte OECD "http://www.oecd.org/about/membersandpartners/list-oecd-member-countries.htm\""), c("_dta", "note10", "Classificazione per reddito e definizione fragile-small region Ufficiale World Bank - \"https://datahelpdesk.worldbank.org/knowledgebase/articles/906519-world-bank-country-and-lending-groups\""
), c("_dta", "note9", "Classificazione reddito e regioni come da WB World Development Indicators"), c("bit", "note1", "It is a dummy that states whether in a certain year there was a BIT between o and d and vice versa."), c("bit", "note0", "1"), c("_dta", "note8", "non ci sono duplicati nel dataset BIT perchè li ho eliminati. In questo modo, per ogni combinazione anno-cty_pair ho una sola osservazione. Mi serve per creare la variabile BIT nel do file principale"), c("_dta", "note7", "South sudan da problemi, è solo come destination country"), c("_dta", "note6", "I seguenti paesi hanno dati gravity ma non di investimenti (vedi variabile invYN e nota connessa). Li possiamo mantenere come cancellare. I paesi in questione sono: Anguilla - Netherland Antilles - Cocos and Keeling Islands - Cook Islands - Christmas Islands - Western Sahara - Falkland Islands - Faeroe Islands - Gibraltar French Guiana Kiribati - Marshall Islands - Northern Mariana Islands - Montserrat - Norfolk Islands - Niue - Nauru - Pitcairn - Palau - Saint Helena... - San Marino - Saint Pierre et Miquelon Tokelau - Tonga - Tuvalu - British Virgin Islands - Vanuatu - Wallis and Futuna."), c("_dta", "note5", "geo_dist database scaricato da \"http://www.cepii.fr/CEPII/en/bdd_modele/presentation.asp?id=6\" il 28 febbraio 2017. Updated 10 febbraio 2017"), c("_dta", "note4", "gravity database scaricato da \"http://www.cepii.fr/CEPII/en/bdd_modele/presentation.asp?id=8\" il 28 febbraio 2017. Updated 10 febbraio 2017"), c("gdp_d", "destring", "Characters removed were:"), c("pop_d","destring", "Characters removed were:"), c("gdp_o", "destring", "Characters removed were:"), c("pop_o", "destring", "Characters removed were:"), c("year", "destring", "Characters removed were:"), c("_dta","__JVarLab", "Indicator Name"), c("_dta", "ReS_i", "iso3"), c("_dta", "ReS_ver", "v.2"), c("_dta", "ReS_j", "iso_o"), c("_dta", "ReS_str", "1"), c("_dta", "ReS_Xij", "p"), c("fta_bb", "destring", "Characters removed were:"), c("_dta", "_TSitrvl", "1"), c("_dta", "_TSpanel", "ccode"),
c("_dta", "_TStvar", "year"), c("_dta", "_dta", "st"), c("_dta",
"st_id", "leadnew"), c("_dta", "st_d", "leadgone"), c("_dta",
"st_t0", "time0"), c("_dta", "st_t", "tenplus"), c("_dta",
"_lang_list", "default"), c("_dta", "_lang_c", "default"),
c("_dta", "st_bs", "1"), c("_dta", "st_s", "1"), c("_dta",
"st_o", "0"), c("_dta", "st_bd", "failure"), c("_dta", "st_bt",
"dur8099"), c("_dta", "st_ver", "2"), c("_dta", "tis", "year"
), c("_dta", "iis", "shcode56"), c("_dta", "__xi__Vars__To__Drop__",
"_Icontnt1_2 _Icontnt1_3 _Icontnt1_4 _Icontnt1_5 _Icontnt1_6 _Icontnt1_7"
), c("_dta", "__xi__Vars__Prefix__", "_I _I _I _I _I _I"),
c("no_flow", "note1", "Questa variabile mi dice se ho investimenti (=0) nell'osservazione year-cty_pair oppure se ho solo la coppia paese."
), c("no_flow", "note0", "1"), c("_dta", "note3", "codici ISO (versione 3166 da International Organisation for Standardization - \"https://www.iso.org\""
), c("_dta", "note2", "Investimenti FdiMarket from FDIIntelligence - \"http://www.fdiintelligence.com/\", FDIMarket Database"
), c("_dta", "note1", "Dataset Disaggregato a livello di singola impresa. Creato da Filippo Santi - Versione: 1 - Data: 9 Nov 2017."
), c("_dta", "note0", "11"), c("no_region_o", "note0", "2"
), c("no_region_o", "note2", "Following small countries have no information because not independent countries: Anguilla; Christmas Island; Cocos and Keeling Islands; Cook Islands; Falkland Islands; French Guiana; Guadeloupe; Martinique; Montserrat; Netherland Antilles; Niue; Norfolk Island; Pitcairn; Reunion; Saint Helena, Ascension and Tristan d..; Saint Pierre and Miquelon; Tokelau; Wallis and Futuna; Western Sahara. notes no_region_o: Serbia and Montenegro has no information Beacuse is treated as separated countries. see variables no_flow and grav_noinv for details"
), c("no_region_d", "note0", "1"), c("no_region_d", "note1",
"See notes to no_region_o")), byteorder = "LSF", .Names = c("iso_o",
"iso_d", "year", "all_num", "all_inv_tot", "all_inv_avg", "all_job_tot",
"all_job_avg", "greenf_num", "greenf_inv_tot", "greenf_inv_avg",
"greenf_job_tot", "greenf_job_avg", "coloc_num", "coloc_inv_tot",
"coloc_inv_avg", "coloc_job_tot", "coloc_job_avg", "expan_num",
"expan_inv_tot", "expan_inv_avg", "expan_job_tot", "expan_job_avg",
"no_flow", "grav_noinv", "dist", "distw", "contig", "comrelig",
"comlang_off", "comlang_ethno", "comcol", "colony", "comleg_pretrans",
"comcur", "tdiff", "conflict", "smctry", "subg", "same_reg",
"same_inc", "simil_inc"), n = 228L, row.names = c(NA, 10L), class = "data.frame")
下面,我正在使用的代码
# Create network (STATNET)
library(network)
library(sna)
fdi.net.2003 <- network(fdi.edge.2003, matrix.type = "edgelist",
directed = T,
ignore.eval = F)
# Computing DIAMETER and APL
# APL and DIAMETER computed in statnet require to compute the geodist
# inf.replace = 0 substitute distance in unconnected components to be 0
geod.2003 <- geodist(fdi.net.2015, inf.replace = NA)
# The length of the shortest path for all pairs of nodes.
geod.2003$gdist
# The number of shortest path for all pairs of nodes.
geod.2003$counts
# Shortest Path Matrix
dist_mat = geod.2003$gdist
hist(dist_mat)
diam <- max(dist_mat, na.rm = T) # 6
apl <- mean(dist_mat, na.rm = T)
# (Adapted from https://dnac.ssri.duke.edu/rlabs/2017/02_descriptive_statistics.php)
# REPLICATE WITH IGRAPH
detach(package:sna)
detach(package:network)
library(igraph)
fdi.graph.2003 <- graph_from_data_frame(fdi.edge.2003, directed = T, vertices = fdi.attr.2003)
diam2 <- diameter(fdi.graph.2003, directed = TRUE, unconnected = TRUE) #7
identical(diam, diam2) # FALSE
# Using intergraph
library(intergraph)
fdi.graph.2003.int <- asIgraph(fdi.net.2003)
class(fdi.graph.2003.int) # igraph
diam3 <- diameter(fdi.graph.2003.int, directed = TRUE, unconnected = TRUE) #6
identical(diam, diam3) # TRUE
这只是一个例子。在计算集团人口普查时出现了其他更相关的问题:根据“网络”类对象,我得到了大小为 8 的最大集团。相反,使用“igraph”,我得到了 17 的最大集团!
关于发生了什么的任何想法?