This notebook provides all the steps to replicate the results of our paper Expanding the measurement of culture with a sample of two billion humans published in the Journal of the Royal Society Interface 19:20220085 (2022).
This notebook contains the code to generate the panel plots of regional data. The data files required are allRegionsAndContriesSimilarity_Jan19.csv and translation.csv
First of all, define a function to load countries and regularize all column names.
loadCountries<-function()
{
allCountries=read.csv("./data/RegionalAnalysis/allRegionsAndCountriesSimilarity_Jan19.csv", row.names=1, check.names = FALSE)
country_codes=read.csv("./data/RegionalAnalysis/translation.csv",header=FALSE)
cc=as.character(country_codes$V2)
names(cc)<-as.character(country_codes$V1)
new_colnames=c()
for( country in colnames(allCountries))
{
if(nchar(country)==2)
{
new_colnames<-c(new_colnames, cc[country])
}
else
{
new_colnames<-c(new_colnames, country)
}
}
new_rownames=c()
for( country in row.names(allCountries))
{
if(nchar(country)==2)
{
new_rownames<-c(new_rownames, cc[country])
}
else
{
new_rownames<-c(new_rownames, country)
}
}
colnames(allCountries)<-new_colnames
row.names(allCountries)<-new_rownames
return(allCountries)
}
For simplicity purposes, use variables for colors
escol<-"red"
decol<-"orange"
itcol<-"darkgreen"
frcol<-"blue"
othercol="white"
In reality, this could be comprised into a single function that receives as input the country name, but for only 4 countries and also taking into account that some tweaking is needed, specially when putting names and more, I found it easier to simply replicate code
This code is basically the same with the exception of some text values. Essentially, it opens the regions and neighbors in each country and loads the data correctlly. Then it plots the boxplot chart sometimes simplifying values that are too long.
COUNTRY<-"ES"
#par(mar=c(8.5,3,0.2,0.2))
par(mar=c(8.8, 3, 0.2, 0.2))
allCountries<-loadCountries()
items=read.csv(paste("./data/RegionalAnalysis/", COUNTRY, ".csv", sep=""), header = FALSE)
reduced_items=read.csv(paste("./data/RegionalAnalysis/", COUNTRY, "_reg.csv", sep=""), header = FALSE)
regionsAndCountries=as.character(items$V2)
regions=as.character(reduced_items$V2)
regionsAndCountries[!regionsAndCountries %in% colnames(allCountries)]
allCountries=allCountries[regions,regionsAndCountries]
allCountries<-allCountries[order(sapply(allCountries, median))]
colnames(allCountries)[13]<-"Valencia"
allCountries <- 1 - allCountries
colors<-c(decol, itcol, frcol, rep(escol,17))
#png(filename=paste("./PanelFigures/", COUNTRY, ".png", sep=""))
#pdf(file=paste("./data/RegionalAnalysis/", COUNTRY, ".pdf", sep=""), width = 15, height = 10)
# Step 1: Call the pdf command to start the plot
boxplot(allCountries, las=2, outline=F, xaxt="n", col=colors, ylim=c(0,0.15))
text(seq(1,length(allCountries),by=1), -.01, srt = 60, adj= 1, xpd = TRUE, labels = paste(colnames(allCountries)), cex=1.25)
#text(x=9.5, y=0.735, labels="Spain Regions and neighbours",xpd=TRUE,cex=1.5)
dev.copy(pdf, paste("./data/RegionalAnalysis/Figure-4-A", ".pdf", sep=""))
dev.off()
This code is basically the same with the exception of some text values. Essentially, it opens the regions and neighbors in each country and loads the data correctlly. Then it plots the boxplot chart sometimes simplifying values that are too long.
COUNTRY<-"FR"
#par(mar=c(8.5,3,0.2,0.2))
par(mar=c(10, 3, 0.2, 0.2))
allCountries<-loadCountries()
items=read.csv(paste("./data/RegionalAnalysis/", COUNTRY, ".csv", sep=""), header = FALSE, stringsAsFactors = FALSE)
reduced_items=read.csv(paste("./data/RegionalAnalysis/", COUNTRY, "_reg.csv", sep=""), header = FALSE, stringsAsFactors = FALSE)
regionsAndCountries=as.character(items$V2)
regions=as.character(reduced_items$V2)
regionsAndCountries[!regionsAndCountries %in% colnames(allCountries)]
allCountries=allCountries[regions,regionsAndCountries]
allCountries<-allCountries[order(sapply(allCountries, median))]
colnames(allCountries)[12]<-"Provence-Alpes"
allCountries <- 1 - allCountries
#png(filename=paste("./PanelFigures/", COUNTRY, ".png", sep=""))
#pdf(file=paste("./data/RegionalAnalysis/", COUNTRY, ".pdf", sep=""), width = 30, height = 20)
colors<-c(escol,itcol,decol,rep(frcol,21))
boxplot(allCountries, las=2, outline=F, xaxt="n", col=colors, ylim=c(0,0.15))
text(seq(1,length(allCountries),by=1), -.01, srt = 60, adj= 1, xpd = TRUE, labels = paste(colnames(allCountries)), cex=1.25)
#text(11, 0.765, "France Regions and neighbours", xpd=TRUE)
dev.copy(pdf, paste("./data/RegionalAnalysis/Figure-4-B", ".pdf", sep=""))
dev.off()
This code is basically the same with the exception of some text values. Essentially, it opens the regions and neighbors in each country and loads the data correctlly. Then it plots the boxplot chart sometimes simplifying values that are too long.
COUNTRY<-"DE"
par(mar=c(9.2,3,0.2,0.2))
allCountries<-loadCountries()
items=read.csv(paste("./data/RegionalAnalysis/", COUNTRY, ".csv", sep=""), header = FALSE)
reduced_items=read.csv(paste("./data/RegionalAnalysis/", COUNTRY, "_reg.csv", sep=""), header = FALSE)
regionsAndCountries=as.character(items$V2)
regions=as.character(reduced_items$V2)
regionsAndCountries[!regionsAndCountries %in% colnames(allCountries)]
allCountries=allCountries[regions,regionsAndCountries]
allCountries<-allCountries[order(sapply(allCountries, median))]
colnames(allCountries)[13]<-"Mecklenburg"
allCountries <- 1 - allCountries
#png(filename=paste("./PanelFigures/", COUNTRY, ".png", sep=""))
#pdf(file=paste("./data/RegionalAnalysis/", COUNTRY, ".pdf", sep=""), width = 30, height = 20)
colors<-c(escol,itcol, frcol, rep(decol,13))
boxplot(allCountries, las=2, outline=F, xaxt="n", col=colors, ylim=c(0,0.15))
text(seq(1,length(allCountries),by=1), -.01, srt = 60, adj= 1, xpd = TRUE, labels = paste(colnames(allCountries)), cex=1.25)
#text(7.5, 0.765, "Germany Regions and neighbours", xpd=TRUE)
dev.copy(pdf, paste("./data/RegionalAnalysis/Figure-4-C", ".pdf", sep=""))
dev.off()
This code is basically the same with the exception of some text values. Essentially, it opens the regions and neighbors in each country and loads the data correctlly. Then it plots the boxplot chart sometimes simplifying values that are too long.
COUNTRY<-"IT"
par(mar=c(9,3,0.2,0.2))
allCountries<-loadCountries()
items=read.csv(paste("./data/RegionalAnalysis/", COUNTRY, ".csv", sep=""), header = FALSE)
reduced_items=read.csv(paste("./data/RegionalAnalysis/", COUNTRY, "_reg.csv", sep=""), header = FALSE)
regionsAndCountries=as.character(items$V2)
regions=as.character(reduced_items$V2)
regionsAndCountries[!regionsAndCountries %in% colnames(allCountries)]
allCountries=allCountries[regions,regionsAndCountries]
allCountries<-allCountries[order(sapply(allCountries, median))]
colnames(allCountries)[4]<-"Trentino-Alto Adige"
allCountries <- 1 - allCountries
#png(filename=paste("./PanelFigures/", COUNTRY, ".png", sep=""))
#pdf(file=paste("./data/RegionalAnalysis/", COUNTRY, ".pdf", sep=""), width = 30, height = 20)
colors<-c(decol,escol, frcol, rep(itcol,20))
boxplot(allCountries, las=2, outline=F, xaxt="n", col=colors, ylim=c(0,0.15))
text(seq(1,length(allCountries),by=1), -.01, srt = 60, adj= 1, xpd = TRUE, labels = paste(colnames(allCountries)), cex=1.25)
#text(10, 0.765, "Italy Regions and neighbours", xpd=TRUE)
dev.copy(pdf, paste("./data/RegionalAnalysis/Figure-4-D", ".pdf", sep=""))
dev.off()
allCountries
Germany | Spain | France | Trentino-Alto Adige | Sardinia | Aosta Valley | Basilicata | Calabria | Campania | Friuli-Venezia Giulia | ⋯ | Puglia | Piedmont | Lombardy | Marche | Abruzzo | Liguria | Molise | Lazio | Tuscany | Emilia-Romagna | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | ⋯ | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | <dbl> | |
Abruzzo | 0.1347267 | 0.1270803 | 0.11725046 | 0.03589109 | 0.02467482 | 2.397060e-02 | 0.01905254 | 1.682574e-02 | 0.01709562 | 0.019593189 | ⋯ | 1.592834e-02 | 1.958136e-02 | 0.018220760 | 0.01150486 | 0.000000000 | 0.017011574 | 8.310756e-03 | 0.01244958 | 1.577268e-02 | 0.014425156 |
Basilicata | 0.1401469 | 0.1359114 | 0.12347153 | 0.04471418 | 0.02898712 | 3.115010e-02 | 0.00000000 | 1.174849e-02 | 0.01680952 | 0.027321719 | ⋯ | 1.073725e-02 | 2.445594e-02 | 0.024411669 | 0.02249022 | 0.019052536 | 0.023804459 | 1.201171e-02 | 0.01959685 | 2.290132e-02 | 0.022421668 |
Calabria | 0.1391626 | 0.1328995 | 0.12106929 | 0.04340594 | 0.02551703 | 2.862164e-02 | 0.01174849 | -2.220446e-16 | 0.01432843 | 0.025009546 | ⋯ | 1.249001e-02 | 2.166542e-02 | 0.021626144 | 0.02012993 | 0.016825745 | 0.020325105 | 1.120440e-02 | 0.01638727 | 2.003677e-02 | 0.019803237 |
Campania | 0.1365881 | 0.1310480 | 0.11861095 | 0.04433392 | 0.02759758 | 3.060026e-02 | 0.01680952 | 1.432843e-02 | 0.00000000 | 0.025680303 | ⋯ | 1.358006e-02 | 2.340673e-02 | 0.021717128 | 0.01972597 | 0.017095625 | 0.020822442 | 1.269779e-02 | 0.01517949 | 1.982860e-02 | 0.019269948 |
Emilia-Romagna | 0.1147544 | 0.1144313 | 0.09595028 | 0.02512653 | 0.02569175 | 1.947058e-02 | 0.02242167 | 1.980324e-02 | 0.01926995 | 0.010918741 | ⋯ | 1.863475e-02 | 1.108416e-02 | 0.006765260 | 0.01040783 | 0.014425156 | 0.010105320 | 1.612702e-02 | 0.01041940 | 7.518949e-03 | 0.000000000 |
Friuli-Venezia Giulia | 0.1154077 | 0.1154178 | 0.09728576 | 0.02474157 | 0.02844711 | 2.156249e-02 | 0.02732172 | 2.500955e-02 | 0.02568030 | 0.000000000 | ⋯ | 2.411911e-02 | 1.568098e-02 | 0.012355725 | 0.01700387 | 0.019593189 | 0.014245253 | 2.139431e-02 | 0.01625195 | 1.343407e-02 | 0.010918741 |
Lazio | 0.1203519 | 0.1187581 | 0.10130931 | 0.03315718 | 0.02500293 | 2.463916e-02 | 0.01959685 | 1.638727e-02 | 0.01517949 | 0.016251947 | ⋯ | 1.624563e-02 | 1.484282e-02 | 0.011675130 | 0.01407143 | 0.012449582 | 0.013488082 | 1.293437e-02 | 0.00000000 | 1.060186e-02 | 0.010419400 |
Liguria | 0.1213935 | 0.1164764 | 0.10084349 | 0.02901360 | 0.02381289 | 1.851069e-02 | 0.02380446 | 2.032510e-02 | 0.02082244 | 0.014245253 | ⋯ | 1.961558e-02 | 9.680176e-03 | 0.010679652 | 0.01477391 | 0.017011574 | 0.000000000 | 1.751608e-02 | 0.01348808 | 1.011197e-02 | 0.010105320 |
Lombardy | 0.1094344 | 0.1133450 | 0.09062378 | 0.02495309 | 0.02959137 | 1.918659e-02 | 0.02441167 | 2.162614e-02 | 0.02171713 | 0.012355725 | ⋯ | 2.132084e-02 | 8.680780e-03 | 0.000000000 | 0.01580111 | 0.018220760 | 0.010679652 | 1.799446e-02 | 0.01167513 | 9.656538e-03 | 0.006765260 |
Marche | 0.1300642 | 0.1240861 | 0.11189905 | 0.03235137 | 0.02493026 | 2.318224e-02 | 0.02249022 | 2.012993e-02 | 0.01972597 | 0.017003867 | ⋯ | 1.860827e-02 | 1.766078e-02 | 0.015801115 | 0.00000000 | 0.011504861 | 0.014773913 | 1.510252e-02 | 0.01407143 | 1.310673e-02 | 0.010407827 |
Molise | 0.1337406 | 0.1292696 | 0.11768676 | 0.03805822 | 0.02381872 | 2.538339e-02 | 0.01201171 | 1.120440e-02 | 0.01269779 | 0.021394305 | ⋯ | 1.127148e-02 | 1.881812e-02 | 0.017994464 | 0.01510252 | 0.008310756 | 0.017516082 | -2.220446e-16 | 0.01293437 | 1.632405e-02 | 0.016127022 |
Piedmont | 0.1190401 | 0.1195127 | 0.09930861 | 0.03100220 | 0.02838905 | 1.683699e-02 | 0.02445594 | 2.166542e-02 | 0.02340673 | 0.015680979 | ⋯ | 2.192065e-02 | 2.220446e-16 | 0.008680780 | 0.01766078 | 0.019581362 | 0.009680176 | 1.881812e-02 | 0.01484282 | 1.277622e-02 | 0.011084162 |
Puglia | 0.1393894 | 0.1321885 | 0.12173232 | 0.04315117 | 0.02502833 | 3.001241e-02 | 0.01073725 | 1.249001e-02 | 0.01358006 | 0.024119107 | ⋯ | 1.110223e-16 | 2.192065e-02 | 0.021320835 | 0.01860827 | 0.015928343 | 0.019615577 | 1.127148e-02 | 0.01624563 | 1.978506e-02 | 0.018634755 |
Sardinia | 0.1488370 | 0.1367466 | 0.13069482 | 0.04665596 | 0.00000000 | 3.376342e-02 | 0.02898712 | 2.551703e-02 | 0.02759758 | 0.028447110 | ⋯ | 2.502833e-02 | 2.838905e-02 | 0.029591365 | 0.02493026 | 0.024674821 | 0.023812889 | 2.381872e-02 | 0.02500293 | 2.608084e-02 | 0.025691750 |
Sicilia | 0.1310814 | 0.1266463 | 0.11349104 | 0.04214461 | 0.02693343 | 2.986087e-02 | 0.01807277 | 1.148358e-02 | 0.01544297 | 0.023635039 | ⋯ | 1.515385e-02 | 2.032675e-02 | 0.019397981 | 0.02053152 | 0.018575477 | 0.019020758 | 1.462043e-02 | 0.01565551 | 1.864705e-02 | 0.018321760 |
Tuscany | 0.1163302 | 0.1152229 | 0.09622377 | 0.02872400 | 0.02608084 | 2.143748e-02 | 0.02290132 | 2.003677e-02 | 0.01982860 | 0.013434066 | ⋯ | 1.978506e-02 | 1.277622e-02 | 0.009656538 | 0.01310673 | 0.015772680 | 0.010111966 | 1.632405e-02 | 0.01060186 | -2.220446e-16 | 0.007518949 |
Trentino-Alto Adige/Sudtirol | 0.1077870 | 0.1210134 | 0.10138782 | 0.00000000 | 0.04665596 | 3.019813e-02 | 0.04471418 | 4.340594e-02 | 0.04433392 | 0.024741574 | ⋯ | 4.315117e-02 | 3.100220e-02 | 0.024953089 | 0.03235137 | 0.035891087 | 0.029013604 | 3.805822e-02 | 0.03315718 | 2.872400e-02 | 0.025126530 |
Umbria | 0.1309651 | 0.1255135 | 0.11258011 | 0.03510692 | 0.02627352 | 2.512650e-02 | 0.02165091 | 1.919347e-02 | 0.02069916 | 0.019039971 | ⋯ | 1.893842e-02 | 1.874126e-02 | 0.017072130 | 0.01167681 | 0.014551052 | 0.016509115 | 1.564977e-02 | 0.01232837 | 1.252888e-02 | 0.012977717 |
Aosta Valley | 0.1288756 | 0.1245036 | 0.10922829 | 0.03019813 | 0.03376342 | 1.110223e-16 | 0.03115010 | 2.862164e-02 | 0.03060026 | 0.021562493 | ⋯ | 3.001241e-02 | 1.683699e-02 | 0.019186585 | 0.02318224 | 0.023970596 | 0.018510686 | 2.538339e-02 | 0.02463916 | 2.143748e-02 | 0.019470577 |
Veneto | 0.1165959 | 0.1167465 | 0.09830711 | 0.02192779 | 0.03071633 | 2.270953e-02 | 0.02780147 | 2.573526e-02 | 0.02599011 | 0.008403153 | ⋯ | 2.466874e-02 | 1.592200e-02 | 0.010447942 | 0.01745415 | 0.020564286 | 0.014884642 | 2.152257e-02 | 0.01676578 | 1.314311e-02 | 0.009799833 |