# Network analysis # Second session # Harald Waxenecker library('igraph') ### 1. Set the working directory # using setwd() setwd("/Users/macbookpro/Documents/") # or: Session -> Set Working Directory -> Choose Directory # Sample data: Florentine families # https://methods-sagepub-com.ezproxy.muni.cz/dataset/intro-networks-in-florentine-1994 # https://sites.google.com/site/ucinetsoftware/datasets/padgett-florentine-families ### 2. Import data from different formats: # .csv = comma separated values # .xlsx = Excel file # .Rdata = data format for use in R # 2.1 Open Rdata with a click # (here it is necessary to convert the names of the data objects) nodes <- data edges <- data load("_binaries_dataset-florentine-1994-subset1-edges_dataset-florentine-1994-subset1-edges.Rdata") # V(G)$name <- trimws(V(G)$name) # 2.2 Import data from .csv list.files() nodes <- read.csv("_binaries_dataset-florentine-1994-subset1-nodes_dataset-florentine-1994-subset1-nodes.csv") edges <- read.csv("_binaries_dataset-florentine-1994-subset1-edges_dataset-florentine-1994-subset1-edges.csv") class(nodes) # 2.3 Import data from .xlsx install.packages("readxl") library(readxl) list.files() nodes <- read_excel("_binaries_dataset-florentine-1994-subset1-nodes_dataset-florentine-1994-subset1-nodes.xlsx") edges <- read_excel("_binaries_dataset-florentine-1994-subset1-edges_dataset-florentine-1994-subset1-edges.xlsx") # In English, the Excel function "BUSCARV" is called "VLOOKUP". ### 3. Social network analysis # igraph # 3.1 The graph or network florentine_network <- graph_from_data_frame(d=edges, vertices=nodes, directed=FALSE) class(florentine_network) plot(florentine_network) # node attributes: V(florentine_network)$name = as.character(nodes$label) V(florentine_network)$wealth V(florentine_network)$totalties # 3.2 General network characteristics # one-mode network # undirected network edge_density(florentine_network) ecount(florentine_network) vcount(florentine_network) diameter(florentine_network) mean_distance(florentine_network) distances(florentine_network, v = 'Medici', to = 'Peruzzi') distances(florentine_network, v = 'Pazzi', to = 'Lamberteschi') distances(florentine_network, v = 'Ginori', to = 'Strozzi') # 3.3 Node centralities degree(florentine_network) betweenness(florentine_network, normalized = TRUE) closeness(florentine_network, normalized = TRUE) # Extract all vertex attributes into a data frame node_centralities <- as_data_frame(florentine_network, what = "vertices") # Compute centrality measures deg <- degree(florentine_network) btw <- betweenness(florentine_network, normalized = TRUE) clo <- closeness(florentine_network, normalized = TRUE) # Add the computed centrality measures to the data frame node_centralities$degree <- deg node_centralities$betweenness <- btw node_centralities$closeness <- clo # 3.4 Improving the plot # Plot the network with improved aesthetics plot(florentine_network, layout = layout_with_fr(florentine_network), vertex.color = "gold", vertex.size = degree(florentine_network)*3+2, vertex.label.cex = degree(florentine_network)*0.5, main = "Florentine family network: nodesize = degree") plot(florentine_network, layout = layout_with_fr(florentine_network), # using a different algorithm for node distribution vertex.label.cex = 1.2, # adjust label size vertex.size = degree(florentine_network)*5+1, # define vertex size vertex.color = "gold", # define vertex color main = "Florentine family network: ") # add a title save.image(file = "exercise2_florentine_data.RData") # Exercise: # Create an edgelist and a nodelist in Excel. (>20 nodes) # Import the data into R # Create your network # Do some node centrality analysis # Plot your network