Online University
Bhupendra Mishra
Thursday, November 12, 2015
- Load the dataset called studentNetwork.RData. Read the datasets negative-words.txt and positive-words.txt into R. Here is a link: studentNetwork.RDataView in a new window It is also in the Files section
#dir <- "C:/Users/Bhupendra Mishra/Desktop/donotbackup/"
#setwd(dir)
load("C:/Users/Bhupendra Mishra/Desktop/donotbackup/studentNetwork.RData")
#load("studentNetwork.RData")
- Make a plot of the network
library(igraph)
## Warning: package 'igraph' was built under R version 3.2.2
##
## Attaching package: 'igraph'
##
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
##
## The following object is masked from 'package:base':
##
## union
library(statnet)
## Warning: package 'statnet' was built under R version 3.2.2
## Loading required package: network
## Warning: package 'network' was built under R version 3.2.2
## network: Classes for Relational Data
## Version 1.13.0 created on 2015-08-31.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Martina Morris, University of Washington
## Skye Bender-deMoll, University of Washington
## For citation information, type citation("network").
## Type help("network-package") to get started.
##
##
## Attaching package: 'network'
##
## The following objects are masked from 'package:igraph':
##
## %c%, %s%, add.edges, add.vertices, delete.edges,
## delete.vertices, get.edge.attribute, get.edges,
## get.vertex.attribute, is.bipartite, is.directed,
## list.edge.attributes, list.vertex.attributes,
## set.edge.attribute, set.vertex.attribute
##
## Loading required package: networkDynamic
## Warning: package 'networkDynamic' was built under R version 3.2.2
##
## networkDynamic: version 0.8.1, created on 2015-10-06
## Copyright (c) 2015, Carter T. Butts, University of California -- Irvine
## Ayn Leslie-Cook, University of Washington
## Pavel N. Krivitsky, University of Wollongong
## Skye Bender-deMoll, University of Washington
## with contributions from
## Zack Almquist, University of California -- Irvine
## David R. Hunter, Penn State University
## Li Wang
## Kirk Li, University of Washington
## Steven M. Goodreau, University of Washington
## Jeffrey Horner
## Martina Morris, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("networkDynamic").
##
## Loading required package: ergm
## Warning: package 'ergm' was built under R version 3.2.2
## Loading required package: statnet.common
## Warning: package 'statnet.common' was built under R version 3.2.2
##
## ergm: version 3.5.1, created on 2015-10-18
## Copyright (c) 2015, Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Carter T. Butts, University of California -- Irvine
## Steven M. Goodreau, University of Washington
## Pavel N. Krivitsky, University of Wollongong
## Martina Morris, University of Washington
## with contributions from
## Li Wang
## Kirk Li, University of Washington
## Skye Bender-deMoll, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("ergm").
##
## NOTE: If you use custom ERGM terms based on 'ergm.userterms'
## version prior to 3.1, you will need to perform a one-time update
## of the package boilerplate files (the files that you did not write
## or modify) from 'ergm.userterms' 3.1 or later. See
## help('eut-upgrade') for instructions.
##
## Loading required package: sna
## Warning: package 'sna' was built under R version 3.2.2
## sna: Tools for Social Network Analysis
## Version 2.3-2 created on 2014-01-13.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## For citation information, type citation("sna").
## Type help(package="sna") to get started.
##
##
## Attaching package: 'sna'
##
## The following object is masked from 'package:network':
##
## %c%
##
## The following objects are masked from 'package:igraph':
##
## %c%, betweenness, bonpow, closeness, components, degree,
## dyad.census, evcent, hierarchy, is.connected, neighborhood,
## triad.census
##
## Loading required package: tergm
## Warning: package 'tergm' was built under R version 3.2.2
##
## tergm: version 3.3.1, created on 2015-10-25
## Copyright (c) 2015, Pavel N. Krivitsky, University of Wollongong
## Mark S. Handcock, University of California -- Los Angeles
## with contributions from
## David R. Hunter, Penn State University
## Steven M. Goodreau, University of Washington
## Martina Morris, University of Washington
## Nicole Bohme Carnegie, New York University
## Carter T. Butts, University of California -- Irvine
## Ayn Leslie-Cook, University of Washington
## Skye Bender-deMoll
## Li Wang
## Kirk Li, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("tergm").
##
## Loading required package: ergm.count
## Warning: package 'ergm.count' was built under R version 3.2.2
##
## ergm.count: version 3.2.0, created on 2015-06-18
## Copyright (c) 2015, Pavel N. Krivitsky, University of Wollongong
## with contributions from
## Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("ergm.count").
##
## NOTE: The form of the term 'CMP' has been changed in version 3.2
## of 'ergm.count'. See the news or help('CMP') for more information.
##
##
## statnet: version 2015.11.0, created on 2015-11-04
## Copyright (c) 2015, Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Carter T. Butts, University of California -- Irvine
## Steven M. Goodreau, University of Washington
## Pavel N. Krivitsky, University of Wollongong
## Skye Bender-deMoll
## Martina Morris, University of Washington
## Based on "statnet" project software (statnet.org).
## For license and citation information see statnet.org/attribution
## or type citation("statnet").
##
## unable to reach CRAN
plot(studentNetwork, main = "Student NEtwork")
- How many nodes and edges are there in studentNetwork?
summary(studentNetwork)
## Network attributes:
## vertices = 205
## directed = FALSE
## hyper = FALSE
## loops = FALSE
## multiple = FALSE
## bipartite = FALSE
## total edges = 203
## missing edges = 0
## non-missing edges = 203
## density = 0.009708274
##
## Vertex attributes:
##
## Course_of_Study:
## character valued attribute
## attribute summary:
## Business Fine_Arts Liberal_Arts Physical_Sciences
## 109 4 68 6
## Technology
## 18
##
## Sex:
## character valued attribute
## attribute summary:
## F M
## 99 106
##
## StudentID:
## integer valued attribute
## 205 values
##
## Tweets:
## character valued attribute
## attribute summary:
## the 10 most common values are:
## abnormal|arbitrary|better-than-expected|dirt-cheap|foolish|lawful|lonesome|pretty|supremely|trump|unconditional|unthinkable
## 1
## abominably|affably|benefit|enchant|enraptured|finagle|fugitive|gleeful|ingenious|nourish|premier|priceless|rapturously|vexingly|wasteful
## 1
## abominate|adventuresome|affluent|blatantly|conveniently|dummy-proof|hedonistic|idol|improvement|irking|laudable|refresh|rumbling|silent|sweetness|titillatingly
## 1
## abort|altruistically|barbarously|disgruntle|faith|imaginative|indebted|ingenious|unwatchable
## 1
## abound|amenable|anomalous|baffling|dominates|drab|enjoy|flawlessly|happily|humorous|illness|reaffirm|shiny|stupendously|taboo|thoughtfulness|treasure|well-being|well-educated
## 1
## abound|clouding|comfortable|expansive|glorious|impartial|principled|reforming|statuesque|troubled|woo
## 1
## absurdly|aspiration|brainwash|clear|ergonomical|eye-catch|immaculate|inevitable|nurturing|punk|rumbling
## 1
## abundant|entranced|hoodwink|outperforms|regress|solemn|thriving|upseting
## 1
## acclaimed|accomplishment|believable|boisterous|breach|flawlessly|fondness|frail|hooray|idolized|peerless|randomly|spew|temptingly
## 1
## accolade|calming|calumniation|cure|effusively|offending|saint|stupendous
## 1
##
## Year:
## numeric valued attribute
## attribute summary:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 3.000 2.732 4.000 6.000
##
## No edge attributes
##
## Network edgelist matrix:
## [,1] [,2]
## [1,] 1 25
## [2,] 1 52
## [3,] 1 58
## [4,] 1 70
## [5,] 1 87
## [6,] 1 92
## [7,] 1 96
## [8,] 1 100
## [9,] 1 110
## [10,] 1 127
## [11,] 1 151
## [12,] 1 161
## [13,] 1 174
## [14,] 2 52
## [15,] 2 100
## [16,] 2 134
## [17,] 2 190
## [18,] 5 204
## [19,] 8 30
## [20,] 8 104
## [21,] 8 160
## [22,] 9 19
## [23,] 9 54
## [24,] 9 115
## [25,] 9 205
## [26,] 11 44
## [27,] 11 74
## [28,] 13 61
## [29,] 13 153
## [30,] 14 182
## [31,] 15 22
## [32,] 15 55
## [33,] 15 76
## [34,] 15 189
## [35,] 16 40
## [36,] 16 140
## [37,] 17 122
## [38,] 18 63
## [39,] 18 129
## [40,] 18 158
## [41,] 18 195
## [42,] 21 59
## [43,] 21 102
## [44,] 21 140
## [45,] 22 55
## [46,] 22 64
## [47,] 22 76
## [48,] 22 123
## [49,] 22 189
## [50,] 23 36
## [51,] 25 43
## [52,] 25 60
## [53,] 25 77
## [54,] 25 87
## [55,] 25 92
## [56,] 25 147
## [57,] 27 68
## [58,] 27 93
## [59,] 29 51
## [60,] 29 55
## [61,] 30 54
## [62,] 30 104
## [63,] 30 160
## [64,] 31 114
## [65,] 31 185
## [66,] 32 178
## [67,] 33 140
## [68,] 34 187
## [69,] 34 200
## [70,] 36 97
## [71,] 36 167
## [72,] 38 155
## [73,] 43 60
## [74,] 43 77
## [75,] 44 74
## [76,] 44 136
## [77,] 47 74
## [78,] 47 79
## [79,] 47 102
## [80,] 47 105
## [81,] 47 139
## [82,] 47 153
## [83,] 47 189
## [84,] 47 191
## [85,] 47 201
## [86,] 51 55
## [87,] 51 61
## [88,] 52 127
## [89,] 52 190
## [90,] 53 83
## [91,] 53 136
## [92,] 54 115
## [93,] 55 61
## [94,] 55 66
## [95,] 55 86
## [96,] 55 123
## [97,] 55 157
## [98,] 56 71
## [99,] 56 129
## [100,] 57 133
## [101,] 58 149
## [102,] 59 65
## [103,] 59 104
## [104,] 63 198
## [105,] 64 66
## [106,] 64 123
## [107,] 64 139
## [108,] 64 157
## [109,] 65 104
## [110,] 66 82
## [111,] 66 157
## [112,] 70 158
## [113,] 70 195
## [114,] 74 136
## [115,] 74 176
## [116,] 75 187
## [117,] 75 204
## [118,] 78 137
## [119,] 79 99
## [120,] 79 108
## [121,] 79 164
## [122,] 79 173
## [123,] 81 131
## [124,] 83 136
## [125,] 87 88
## [126,] 87 92
## [127,] 87 96
## [128,] 87 110
## [129,] 87 127
## [130,] 87 156
## [131,] 87 179
## [132,] 87 183
## [133,] 88 183
## [134,] 89 111
## [135,] 89 131
## [136,] 90 117
## [137,] 91 187
## [138,] 92 110
## [139,] 96 100
## [140,] 96 110
## [141,] 96 137
## [142,] 96 150
## [143,] 96 179
## [144,] 98 192
## [145,] 99 164
## [146,] 99 173
## [147,] 100 137
## [148,] 100 150
## [149,] 101 108
## [150,] 102 189
## [151,] 102 201
## [152,] 103 128
## [153,] 103 141
## [154,] 104 160
## [155,] 105 139
## [156,] 108 173
## [157,] 109 121
## [158,] 109 142
## [159,] 110 134
## [160,] 112 185
## [161,] 114 138
## [162,] 114 185
## [163,] 115 144
## [164,] 123 139
## [165,] 123 157
## [166,] 123 178
## [167,] 123 189
## [168,] 124 142
## [169,] 124 160
## [170,] 124 161
## [171,] 125 204
## [172,] 127 150
## [173,] 127 151
## [174,] 129 158
## [175,] 132 150
## [176,] 132 185
## [177,] 134 196
## [178,] 136 202
## [179,] 137 179
## [180,] 138 180
## [181,] 138 185
## [182,] 139 189
## [183,] 139 193
## [184,] 140 160
## [185,] 140 194
## [186,] 142 160
## [187,] 146 192
## [188,] 148 194
## [189,] 149 165
## [190,] 149 186
## [191,] 153 170
## [192,] 158 195
## [193,] 160 166
## [194,] 160 194
## [195,] 161 190
## [196,] 164 173
## [197,] 165 178
## [198,] 165 187
## [199,] 165 199
## [200,] 179 196
## [201,] 181 182
## [202,] 183 190
## [203,] 189 191
Answer: We have total 205 nodes and 203 edges- What are the attributes of this network object? What do they each contain?
5, What proportion of the students are Studying Business? What proportion are in their 2nd Year?
symbol = c(6,5,3,7,4,2)
student.sex = studentNetwork%v%"Sex"
summary(student.sex)
## F M
## 99 106
barplot(table(student.sex), main = "Sex of student", col=symbol)
student.course_study=studentNetwork%v%"Course_of_Study"
summary(student.course_study)
## Business Fine_Arts Liberal_Arts Physical_Sciences
## 109 4 68 6
## Technology
## 18
barplot(table(student.course_study), main="course of study of student", col=symbol)
student.year=studentNetwork%v%"Year"
#View(student.year)
summary(as.character(student.year))
## 1 2 3 4 5 6
## 62 40 42 25 24 12
barplot(table(student.year), main="Year of study of student", col=symbol)
#Ceate legend for network graph
symbol.sex=c(4,12) [match(student.sex, c("M","F"))]
symbol.course_study=c(1,2,3,4,5)[match(student.course_study,c("Business", "Fine_Arts", "Liberal_Arts", "Physical_Sciences", "Technology"))]
plot(studentNetwork, vertex.sides = symbol.sex, vertex.rot = 45, vertex.cex = 2, vertex.col = symbol[student.year], edge.lwd = 2, cex.main = 1, displayisolates = TRUE, main = "Network Diagram - Student Year")
legend("bottomright", c("Year1", "Year2", "Year3", "Year4", "Year5", "Year6"), fill = symbol, cex=0.6)
Reference: https://en.wikipedia.org/wiki/Adjacency_matrix
#Creating Adjacency Matrix
student.matrix=studentNetwork[,]
Degree: A Node’s degree in an undirected network is defined as its number of edges to other nodesstudent.degree<-degree(student.matrix)
student.degree
## [1] 26 8 0 0 2 0 0 6 8 0 4 0 4 2 8 4 2 8 2 0 6 12 2
## [24] 0 14 0 4 0 4 8 4 2 2 4 0 6 0 2 0 2 0 0 6 6 0 0
## [47] 18 0 0 0 6 8 4 6 18 4 2 4 6 4 6 0 4 10 4 8 0 2 0
## [70] 6 2 0 0 10 4 4 4 2 10 0 2 2 4 0 0 2 20 4 4 2 2 8
## [93] 2 0 0 14 2 2 6 10 2 8 4 10 4 0 0 6 4 10 2 2 0 6 6
## [116] 0 2 0 0 0 2 2 14 6 2 0 10 2 6 0 4 4 2 6 0 10 8 6
## [139] 12 10 2 6 0 2 0 2 2 2 6 8 4 0 6 0 2 2 8 8 0 16 6
## [162] 0 0 6 8 2 2 0 0 2 0 0 8 2 0 2 0 6 8 2 2 4 6 0
## [185] 10 2 8 0 14 8 4 4 2 6 6 4 0 2 2 2 4 2 0 6 2
- Plot a histogram of the scores. What does it indicate?
hist(student.degree, col=symbol, main="Distribution of Nodes' Degree", ylab="Number of Students", xlab="Numbder of Connections")
student.betweenness <- betweenness(student.matrix)
plot(student.betweenness, col="green", main="Betweenness Centrality", ylab="Betweenness")
n.words <- read.table("C:/Users/Bhupendra Mishra/Desktop/donotbackup//negative-words.txt", header=TRUE, quote="\"")
#View(n.words)
p.words <-read.table("C:/Users/Bhupendra Mishra/Desktop/donotbackup//positive-words.txt", header=TRUE, quote="\"")
#View(p.words)
library(parallel)
library(foreach)
## Warning: package 'foreach' was built under R version 3.2.2
student.tweets=studentNetwork%v%"Tweets"
#student.tweets
tweet.score <- foreach(i=1:205, .combine='rbind') %dopar%
{
words<-unlist(strsplit(student.tweets[i], split ='\\|'))
# View(words)
pos.matches = match(words, unlist(p.words))
# View(pos.matches)
neg.matches = match(words, unlist(n.words))
#View(neg.matches)
pos.matches = !is.na(pos.matches)
neg.matches = !is.na(neg.matches)
score = sum(pos.matches) - sum(neg.matches)
}
## Warning: executing %dopar% sequentially: no parallel backend registered
hist(tweet.score, main="Sentiment analysis of the Students",xlab="Tweet Score",col=symbol)
#View(tweet.score)
- Do the distribution between 2nd Year and 4th Year students look different? How about between those studying Business and those studying Technology?
#Histogram of Tweet Scores for year 1 and year 2 respectively
par(mfrow = c(1,2))
hist(tweet.score[student.year==2], main="Sentiment - Year 2", xlab="Tweet Score", col=symbol)
hist(tweet.score[student.year==4], main="Sentiment - Year 4", xlab="Tweet Score", col=symbol)
#Histogram of Tweets Scorces for Business and Technolgy Students respectively
hist(tweet.score[student.course_study=="Business"], main="Sentiment - Business", xlab="Tweet Score", col=symbol)
hist(tweet.score[student.course_study=="Technology"], main= "Sentiment - Technology", xlab="Tweet Score", col=symbol)
No comments:
Post a Comment