# This script produces the graphs in Appendix D

# script for preparing IMC survey graphs

# install needed packages
# install.packages(c("ggplot2", "plyr", "reshape", "MASS", "stargazer", "mnormt", "Rmisc", "lubridate", "grid", "doBy", "xtable"))

rm(list=ls())
set.seed(123456)
library(ggplot2)
library(plyr)
library(reshape)
library(MASS)
library(stargazer)
library(mnormt)
library(Rmisc)
library(lubridate)
library(grid)
library(doBy)
library(xtable)
library(gridExtra)

# read in the survey data
dat <- read.csv("IMC_data_final.csv")
dat.o <- dat 

# exclude anyone who didn't respond to any questions
dat <- subset(dat.o, positionOther!="Giant Possum") # exclude the "Giant Possum" guy
dat$na.sum <- apply(X=is.na(dat), FUN=sum, MARGIN=1)
dat.excluded <- subset(dat, na.sum>=51)
dat <- subset(dat, na.sum<51)

dim(dat)
dim(subset(dat, na.sum==50))

# drop all observations who participated in the IMC
dat <- subset(dat, imcViews==1)

# drop the extraneous na.sum variable
dat$na.sum <- NULL



#################
# Figure 14a
#################

##
# how many people did we survey in each job position (excluding missings)?
##

# create collapsed variable for job/position

#note that i can break on assistant versus tenured
#note that i rearranged the order to make the plot look better
dat$altposition <- NA
dat$altposition[which(dat$position==3|dat$position==4)] <- 1
dat$altposition[which(dat$position==1)] <- 2
dat$altposition[which(dat$position==2)] <- 3
dat$altposition[which(dat$position==5)] <- 4
dat$altposition[which(dat$position==6|dat$position==7)] <- 5
dat$altposition[which(is.na(dat$position))] <- 6
dat$altposition <- factor(dat$altposition, labels=c("TT academic", "grad student", "non-TT academic"
                                                    , "emeritus", "industry", "missing"))

# create the plot
a1<-ggplot(subset(dat, subset=dat$position!="missing"), aes(altposition)) +
  geom_bar() + xlab("Job Position") + ylab("# of respondents") +
  scale_y_continuous(limits=c(0,500)) + 
  theme(axis.text = element_text(size=10, color="black", angle = 30, hjust = 1)
        , axis.title = element_text(size=12)) 

# print out the figure
a1
ggsave(filename="position-noimc.eps", plot=a1, device="eps", width=2.9, height=2.9)



#################
# Figure 14b
#################

##
# what proportion of the sample indicated interest in each subfield?
##

# create a ggplot-friendly format for the field variable
# note: sums will exceed 100% due to multiple interests
field <- c()
field[1] <- sum(dat$american)
field[2] <- sum(dat$IR)
field[3] <- sum(dat$CP)
field[4] <- sum(dat$politicalTheory)
field[5] <- sum(dat$methods)
field[6] <- sum(dat$publicPolicy)

field.lab <- c("American", "IR", "Comparative", "Theory", "Methods", "Policy")
field.obs <- rep(field.lab, times=field)
dat.field <- data.frame(field.obs)
dat.field$field.obs1 <- factor(field.obs, levels = c("Comparative", "American", "IR" 
                                                     , "Methods", "Policy", "Theory"))

# create the plot
# note: sums will exceed 100% due to multiple interests
a2<-ggplot(dat.field, aes(x = field.obs1)) + geom_bar(aes(y = (..count..) / 911 )) +
  scale_y_continuous(labels = scales::percent) +
  xlab("Subfield") + ylab("% of respondents") +
  theme(axis.text = element_text(size=10, color="black", angle = 45, hjust = 1)
        , axis.title = element_text(size=12)) 

# print out the figure
a2
ggsave(filename="field-noimc.eps", plot=a2, device="eps", width=2.9, height=2.9)



#################
# Figure 14c
#################

##
# what was the distribution of age in our sample (excluding missings)?
##

# create the plot
a3<-ggplot(dat, aes(age)) + geom_histogram(binwidth=2, na.rm=T) + 
  scale_x_continuous("Age", breaks=seq(20, 90, 10), limits=c(20, 90)) + scale_y_continuous(name="# of Respondents") +
  theme(axis.text = element_text(size=10, color="black")
        , axis.title = element_text(size=12)) 

# print out the figure
a3
ggsave(filename="age-noimc.eps", plot=a3, device="eps", width=3, height=3)


#################
# Figure 14d
#################

##
# what was the distribution of gender in our sample (excluding missings)?
##

dat.gender.factor <- subset(dat, is.na(gender)==F)
dat.gender.factor$gender.factor <- factor(dat.gender.factor$gender, levels=c(1,2), labels=c("Male", "Female"))
a4<-ggplot(data=dat.gender.factor, aes(gender.factor)) + geom_bar(aes(fill=gender.factor)) + 
  scale_fill_grey(start=0.3, end=0.7, guide=FALSE) + scale_x_discrete("Gender") + 
  scale_y_continuous(name="# of Respondents", limits=c(0,600)) +
  theme(axis.text = element_text(size=10, color="black"), axis.title = element_text(size=12))
ggsave(filename="gender-noimc.eps", plot=a4, device="eps", width=3, height=3)

# percentage men
sum(dat.gender.factor$gender==1)/length(dat.gender.factor$gender)

########################################
# Figure 16: Where Do People Get Ideas?
########################################


##
#where do ideas come from?
##

#invert and convert to factors
dat$ideasBloginv <- 6-dat$ideasBlog
dat$ideasBloginvfactor <- factor(dat$ideasBloginv
                                 , labels=c("1: Not at all imp.", "2: Slightly imp.", "3: Somewhat imp."
                                            , "4: Important", "5: Extremely imp."))

dat$ideasTwitterinv <- 6-dat$ideasTwitter
dat$ideasTwitterinvfactor <- factor(dat$ideasTwitterinv
                                    , labels=c("1: Not at all imp.", "2: Slightly imp.", "3: Somewhat imp."
                                               , "4: Important", "5: Extremely imp."))

dat$ideasGroupinv <- 6-dat$ideasGroup
dat$ideasGroupinvfactor <- factor(dat$ideasGroupinv
                                  , labels=c("1: Not at all imp.", "2: Slightly imp.", "3: Somewhat imp."
                                             , "4: Important", "5: Extremely imp."))

dat$ideasFacebookinv <- 6-dat$ideasFacebook
dat$ideasFacebookinvfactor <- factor(dat$ideasFacebookinv
                                     , labels=c("1: Not at all imp.", "2: Slightly imp.", "3: Somewhat imp."
                                                , "4: Important", "5: Extremely imp."))

dat$ideasJournalinv <- 6-dat$ideasJournal
dat$ideasJournalinvfactor <- factor(dat$ideasJournalinv
                                    , labels=c("1: Not at all imp.", "2: Slightly imp.", "3: Somewhat imp."
                                               , "4: Important", "5: Extremely imp."))

dat$ideasConferenceinv <- 6-dat$ideasConference
dat$ideasConferenceinvfactor <- factor(dat$ideasConferenceinv
                                       , labels=c("1: Not at all imp.", "2: Slightly imp.", "3: Somewhat imp."
                                                  , "4: Important", "5: Extremely imp."))

dat$ideasSearchinv <- 6-dat$ideasSearch
dat$ideasSearchinvfactor <- factor(dat$ideasSearchinv
                                   , labels=c("1: Not at all imp.", "2: Slightly imp.", "3: Somewhat imp."
                                              , "4: Important", "5: Extremely imp."))

dat$ideasStudentinv <- 6-dat$ideasStudent
dat$ideasStudentinvfactor <- factor(dat$ideasStudentinv
                                    , labels=c("1: Not at all imp.", "2: Slightly imp.", "3: Somewhat imp."
                                               , "4: Important", "5: Extremely imp."))

dat$ideasWebinarinv <- 6-dat$ideasWebinar
dat$ideasWebinarinvfactor <- factor(dat$ideasWebinarinv
                                    , labels=c("1: Not at all imp.", "2: Slightly imp.", "3: Somewhat imp."
                                               , "4: Important", "5: Extremely imp."))


#prepare for unified plot
#thanks to http://stackoverflow.com/questions/18158461/grouped-bar-plot-in-ggplot

#create names
names<-c("Webinar","Blog","Facebook"
         ,"Twitter","Small Groups","Journal"
         , "Conference", "Search Engines", "Students")

#take only the needed information
plot.dat<-dat[,c("ideasWebinarinvfactor","ideasBloginvfactor","ideasFacebookinvfactor"
                 ,"ideasTwitterinvfactor","ideasGroupinvfactor","ideasJournalinvfactor"
                 , "ideasConferenceinvfactor", "ideasSearchinvfactor", "ideasStudentinvfactor")]

#create a frequency table 
#calls table to count how many of each factor level occured in each column
# 1 2 3      fish cow  cow     0 1 2
# 1 2 3   +  fish fish cow  =  2 0 1
# 1 2 3      dog  cow  cow     3 0 0
freq<-table(col(plot.dat), as.matrix(plot.dat))

#bind the previous freqtab with the names from above
plot.dat<-data.frame(cbind(freq),names, check.names=F)

#reorder for plotting
plot.dat=plot.dat[,c(6,3,4,5,2,1)]

#create count
plot.dat$count<-plot.dat[,2]+plot.dat[,3]+plot.dat[,4]+plot.dat[,5]+plot.dat[,6]

#divivde by respective counts and get proportions
for(i in 2:6){
  plot.dat[,i]<-plot.dat[,i]/plot.dat[,7]
}

#drop the count
plot.dat<-plot.dat[,-7]

#melt to get the grouping compatible with ggplot2
data.m <- melt(plot.dat, id.vars='names')

#unifying factor across facets
data.m$all<-""

#setting up nice labels
data.m$variable<-gsub("."," ",data.m$variable,fixed = TRUE)

data.m$Response<-factor(data.m$variable, labels =  c("1: Not at all imp.", "2: Slightly imp."
                                                            , "3: Somewhat imp.", "4: Important"
                                                            , "5: Extremely imp."))
# data.m$Response<-factor(data.m$Response
                       # , levels = rev(unique(as.character(data.m$Response))))

#initial plot
p <- ggplot(data.m, aes(all, value)) + facet_wrap(~names,nrow = 3) +  
  geom_bar(aes(fill = Response), position = "dodge", stat="identity") +
  scale_y_continuous(labels = scales::percent) +
  xlab("") +
  ylab("% of Ratings") +
  coord_flip(ylim = c(0, .6), xlim=c(1,1.15)) +
  scale_fill_grey(guide = guide_legend(reverse=TRUE)) +
  theme_bw() +
  theme(panel.margin.x=unit(0.75,"lines"), 
        axis.text = element_text(size=10, color="black", angle=45, hjust=1), 
        strip.text.x = element_text(size = 10, face = "bold"))

#extract means for plotting 
varlist <- dat[seq(61,77,by = 2)]
meanlist <- lapply(varlist, FUN = mean, na.rm = TRUE)

#mean labeling 
#thanks to http://stackoverflow.com/questions/11889625/annotating-text-on-individual-facet-in-ggplot2
# and http://stackoverflow.com/questions/19209604/creating-a-multiple-data-frames-in-for-loop

#get a list of names for grouping
#namelist <- names(dat)[33:41]
#namelist <- gsub("ideas","",namelist)
namelist <- c("Blog", "Twitter", "Small Groups","Facebook","Journal", "Conference"
              ,"Search Engines", "Students","Webinar")

#create starter data frame
tester<-data.frame(value = .3, all = ""
                   , names = factor(namelist[1]
                                    ,levels = c("Blog", "Twitter", "Small Groups","Facebook","Journal", "Conference"
                                                ,"Search Engines", "Students","Webinar")))

#fill in starter with individual components to correspond to the facets
for(i in 1:length(namelist)){
  tester[i,]<- data.frame(value = .3, all = ""
                          , names = factor(namelist[i]
                                           , levels = c("Blog", "Twitter", "Small Groups","Facebook","Journal", "Conference"
                                                        ,"Search Engines", "Students","Webinar")))
}

#plot the individual components into layers grouped to their facet with the thing to plot being the related mean
for(i in 1: length(tester$names)){
  assign(paste0("lab",i) , geom_label(data = tester[i,],label = paste("mean: ", 
                                       round(meanlist[[i]], digits = 2)), fontface = "bold", nudge_x=0.61))
}

#combine original layer with the 9 facet-specific layers
pplot<-p+lab1+lab2+lab3+lab4+lab5+lab6+lab7+lab8+lab9
pplot

# output the plot
ggsave(filename="ideas-noimc.eps", plot=pplot, device="eps", width=6, height=6)

#drop the extra labeling elements
rm(lab1,lab2,lab3,lab4,lab5,lab6,lab7,lab8,lab9)

#################
# Figure 17
#################

##
#what do people want from web tools?
##

#invert and convert to factors
dat$interestSoftwareinv <- 6-dat$interestSoftware
dat$interestSoftwareinvfactor <- factor(dat$interestSoftwareinv
                                        , labels=c("1: Not at all imp.", "2: Slightly imp.", "3: Somewhat imp."
                                                   , "4: Important", "5: Extremely imp."))

dat$interestResearchinv <- 6-dat$interestResearch
dat$interestResearchinvfactor <- factor(dat$interestResearchinv
                                        , labels=c("1: Not at all imp.", "2: Slightly imp.", "3: Somewhat imp."
                                                   , "4: Important", "5: Extremely imp."))

dat$interestModelinv <- 6-dat$interestModel
dat$interestModelinvfactor <- factor(dat$interestModelinv
                                     , labels=c("1: Not at all imp.", "2: Slightly imp.", "3: Somewhat imp."
                                                , "4: Important", "5: Extremely imp."))

dat$interestCollaborateinv <- 6-dat$interestCollaborate
dat$interestCollaborateinvfactor <- factor(dat$interestCollaborateinv
                                           , labels=c("1: Not at all imp.", "2: Slightly imp.", "3: Somewhat imp."
                                                      , "4: Important", "5: Extremely imp."))

dat$interestTeachinv <- 6-dat$interestTeach
dat$interestTeachinvfactor <- factor(dat$interestTeachinv
                                     , labels=c("1: Not at all imp.", "2: Slightly imp.", "3: Somewhat imp."
                                                , "4: Important", "5: Extremely imp."))

dat$interestFeedbackinv <- 6-dat$interestFeedback
dat$interestFeedbackinvfactor <- factor(dat$interestFeedbackinv
                                        , labels=c("1: Not at all imp.", "2: Slightly imp.", "3: Somewhat imp."
                                                   , "4: Important", "5: Extremely imp."))

dat$interestDebateinv <- 6-dat$interestDebate
dat$interestDebateinvfactor <- factor(dat$interestDebateinv
                                      , labels=c("1: Not at all imp.", "2: Slightly imp.", "3: Somewhat imp."
                                                 , "4: Important", "5: Extremely imp."))

#prepare for unified plot

#create names
names<-c("Software","Research","Model"
         ,"Collaborate","Teach","Feedback"
         , "Debate")

#take only the needed information
plot.dat.2<-dat[,c("interestSoftwareinvfactor"   ,"interestResearchinvfactor","interestModelinvfactor"
                   , "interestCollaborateinvfactor","interestTeachinvfactor"   ,"interestFeedbackinvfactor"
                   , "interestDebateinvfactor")]

#create a frequency table 
freq.2<-table(col(plot.dat.2), as.matrix(plot.dat.2))

#bind the previous freqtab with the names from above
plot.dat.2<-data.frame(cbind(freq.2),names)

#reorder for plotting
plot.dat.2=plot.dat.2[,c(6,3,4,5,2,1)]

#create count
plot.dat.2$count<-plot.dat.2[,2]+plot.dat.2[,3]+plot.dat.2[,4]+plot.dat.2[,5]+plot.dat.2[,6]

#divivde by respective counts and get proportions
for(i in 2:6){
  plot.dat.2[,i]<-plot.dat.2[,i]/plot.dat.2[,7]
}

#drop the count
plot.dat.2<-plot.dat.2[,-7]

#melt to get the grouping compatible with ggplot2
data.m.2 <- melt(plot.dat.2, id.vars='names')

#unifying factor
data.m.2$all<-""

#setting up nice lables
data.m.2$variable<-gsub("."," ",data.m.2$variable,fixed = TRUE)
# data.m.2$Response<-factor(data.m.2$variable
#                           , levels = unique(as.character(data.m.2$variable)))
data.m.2$Response<-factor(data.m.2$variable, labels =  c("1: Not at all int.", "2: Slightly int."
                                                                      , "3: Somewhat int.", "4: Interested"
                                                                      , "5: Extremely int."))

#plot
p <- ggplot(data.m.2, aes(all,value)) + facet_wrap(~names, ncol = 3) + 
  geom_bar(aes(fill = Response), position = "dodge", stat="identity") +
  scale_y_continuous(labels = scales::percent) +
  coord_flip(ylim = c(0, .5), xlim=c(1,1.15)) + 
  scale_fill_grey(guide = guide_legend(reverse=TRUE)) +
  theme_bw() +
  xlab("") +
  ylab("Reported Interest in Using On-Line Tools in Each Category") +
  theme(panel.margin.x=unit(0.75,"lines"), 
        axis.text = element_text(size=10, color="black", angle=45, hjust=1), 
        strip.text.x = element_text(size = 10, face = "bold"))



#extract means for plotting 
varlist <- dat[seq(79,91,by=2)]
meanlist <- lapply(varlist, FUN = mean, na.rm = TRUE)

#mean labeling 
#thanks to http://stackoverflow.com/questions/11889625/annotating-text-on-individual-facet-in-ggplot2
# and http://stackoverflow.com/questions/19209604/creating-a-multiple-data-frames-in-for-loop

#get a list of names for grouping
namelist <- names(dat)[53:59]
namelist <- gsub("interest","",namelist)

#create starter data frame
tester<-data.frame(value = .25, all = ""
                   , names = factor(namelist[1]
                                    ,levels = c( "Collaborate", "Debate", "Feedback", "Model"
                                                ,"Research","Software", "Teach"
                                                )))

#fill in starter with individual components to correspond to the facets
for(i in 1:length(namelist)){
  tester[i,]<- data.frame(value = .25, all = ""
                          , names = factor(namelist[i]
                                           , levels = c("Collaborate", "Debate", "Feedback", "Model"
                                                        ,"Research","Software", "Teach")))
}

#plot the individual components into layers grouped to their facet with the thing to plot being the related mean
for(i in 1: length(tester$names)){
  assign(paste0("lab",i) , geom_label(data = tester[i,],label = paste("mean: ", 
                                                                      round(meanlist[[i]], digits = 2))
                                      , fontface = "bold", nudge_x=0.61))
}

#For plotting a facet_wrap with nice axes
#MANY thanks to http://stackoverflow.com/questions/13297155/add-floating-axis-labels-in-facet-wrap-plot

library(grid)
# pos - where to add new labels
# newpage, vp - see ?print.ggplot
facetAdjust <- function(x, pos = c("up", "down"), 
                        newpage = is.null(vp), vp = NULL)
{
  # part of print.ggplot
  ggplot2:::set_last_plot(x)
  if(newpage)
    grid.newpage()
  pos <- match.arg(pos)
  p <- ggplot_build(x)
  gtable <- ggplot_gtable(p)
  # finding dimensions
  dims <- apply(p$panel$layout[2:3], 2, max)
  nrow <- dims[1]
  ncol <- dims[2]
  # number of panels in the plot
  panels <- sum(grepl("panel", names(gtable$grobs)))
  space <- ncol * nrow
  # missing panels
  n <- space - panels
  # checking whether modifications are needed
  if(panels != space){
    # indices of panels to fix
    idx <- (space - ncol - n + 1):(space - ncol)
    # copying x-axis of the last existing panel to the chosen panels 
    # in the row above
    gtable$grobs[paste0("axis_b",idx)] <- list(gtable$grobs[[paste0("axis_b",panels)]])
    if(pos == "down"){
      # if pos == down then shifting labels down to the same level as 
      # the x-axis of last panel
      rows <- grep(paste0("axis_b\\-[", idx[1], "-", idx[n], "]"), 
                   gtable$layout$name)
      lastAxis <- grep(paste0("axis_b\\-", panels), gtable$layout$name)
      gtable$layout[rows, c("t","b")] <- gtable$layout[lastAxis, c("t")]
    }
  }
  # again part of print.ggplot, plotting adjusted version
  if(is.null(vp)){
    grid.draw(gtable)
  }
  else{
    if (is.character(vp)) 
      seekViewport(vp)
    else pushViewport(vp)
    grid.draw(gtable)
    upViewport()
  }
  invisible(p)
}

#combine original layer with the 9 facet-specific layers
pplot<-p+lab1+lab2+lab3+lab4+lab5+lab6+lab7

# output the plot
setEPS()
postscript("interest-noimc.eps", width=6, height=6)
facetAdjust(pplot)
#pplot
dev.off()

#drop the extra labeling elements
rm(i, lab1,lab2,lab3,lab4,lab5,lab6,lab7)




###########################################
# Figure 15: How Often do People Use Tools?
###########################################


##
#where do ideas come from?
##

#convert to factors
dat$useCollaboratefactor <- factor(dat$useCollaborate
                                   , labels=c("1: Never", "2: Rarely", "3: A Few Times Per Year", 
                                              "4: Once a Month", "5: 2-3 Times a Month", "6: Once a Week or More"))

dat$useGuestfactor <- factor(dat$useGuest
                             , labels=c("1: Never", "2: Rarely", "3: A Few Times Per Year", 
                                        "4: Once a Month", "5: 2-3 Times a Month", "6: Once a Week or More"))


dat$useWritefactor <- factor(dat$useWrite
                             , labels=c("1: Never", "2: Rarely", "3: A Few Times Per Year", 
                                        "4: Once a Month", "5: 2-3 Times a Month", "6: Once a Week or More"))


dat$useSocialfactor <- factor(dat$useSocial
                              , labels=c("1: Never", "2: Rarely", "3: A Few Times Per Year", 
                                         "4: Once a Month", "5: 2-3 Times a Month", "6: Once a Week or More"))


dat$usePresentfactor <- factor(dat$usePresent,
                               levels=c(1,2,3,4,5,6)
                               , labels=c("1: Never", "2: Rarely", "3: A Few Times Per Year", 
                                          "4: Once a Month", "5: 2-3 Times a Month", "6: Once a Week or More"))


dat$useClassfactor <- factor(dat$useClass
                             , labels=c("1: Never", "2: Rarely", "3: A Few Times Per Year", 
                                        "4: Once a Month", "5: 2-3 Times a Month", "6: Once a Week or More"))


dat$useLearnfactor <- factor(dat$useLearn
                             , labels=c("1: Never", "2: Rarely", "3: A Few Times Per Year", 
                                        "4: Once a Month", "5: 2-3 Times a Month", "6: Once a Week or More"))


dat$useListenfactor <- factor(dat$useListen
                              , labels=c("1: Never", "2: Rarely", "3: A Few Times Per Year", 
                                         "4: Once a Month", "5: 2-3 Times a Month", "6: Once a Week or More"))


dat$useReadfactor <- factor(dat$useRead
                            , labels=c("1: Never", "2: Rarely", "3: A Few Times Per Year", 
                                       "4: Once a Month", "5: 2-3 Times a Month", "6: Once a Week or More"))



#prepare for unified plot
#thanks to http://stackoverflow.com/questions/18158461/grouped-bar-plot-in-ggplot

#create names
names<-c("Collaborate with Coauthor","Host Guest Lecture in Class","Write a Blog Post"
         ,"Send Tweet/Facebook Post","Present Web Seminar","Online Video in Class"
         , "Use Online Video to Learn", "Attend Web Seminar", "Read a Blog Post")

#take only the needed information
plot.dat<-dat[,c("useCollaboratefactor", "useGuestfactor", "useWritefactor", 
                 "useSocialfactor", "usePresentfactor", "useClassfactor", "useLearnfactor", 
                 "useListenfactor", "useReadfactor")]

#create a frequency table 
#calls table to count how many of each factor level occured in each column
# 1 2 3      fish cow  cow     0 1 2
# 1 2 3   +  fish fish cow  =  2 0 1
# 1 2 3      dog  cow  cow     3 0 0
freq<-table(col(plot.dat), as.matrix(plot.dat))

#bind the previous freqtab with the names from above
plot.dat<-data.frame(cbind(freq),names, check.names=F)

#create count
plot.dat$count<-plot.dat[,1]+plot.dat[,2]+plot.dat[,3]+plot.dat[,4]+plot.dat[,5]+plot.dat[,6]

#divide by respective counts and get proportions
for(i in 1:6){
  plot.dat[,i]<-plot.dat[,i]/plot.dat[,8]
}

#drop the count
plot.dat<-plot.dat[,-8]

#melt to get the grouping compatible with ggplot2
data.m <- melt(plot.dat, id.vars='names')

#unifying factor across facets
data.m$all<-""

#setting up nice labels
data.m$variable<-gsub("."," ",data.m$variable,fixed = TRUE)

data.m$Response<-factor(data.m$variable, labels =  c("1: Never", "2: Rarely", "3: A Few Times Per Year", 
                                                     "4: Once a Month", "5: 2-3 Times a Month", "6: Once a Week or More"))
# data.m$Response<-factor(data.m$Response
# , levels = rev(unique(as.character(data.m$Response))))

#initial plot
p <- ggplot(data.m, aes(all, value)) + facet_wrap(~names,nrow = 3) +  
  geom_bar(aes(fill = Response), position = "dodge", stat="identity") +
  scale_y_continuous(labels = scales::percent) +
  xlab("") +
  ylab("% of Ratings") +
  coord_flip(ylim = c(0, .8), xlim=c(1,1.15)) +
  scale_fill_grey(guide = guide_legend(reverse=TRUE)) +
  theme_bw() +
  theme(panel.margin.x=unit(0.75,"lines"), 
        axis.text = element_text(size=10, color="black", angle=45, hjust=1), 
        strip.text.x = element_text(size = 6, face = "bold"))

#extract means for plotting 
varlist <- dat[seq(24,32)]
meanlist <- lapply(varlist, FUN = mean, na.rm = TRUE)

#mean labeling 
#thanks to http://stackoverflow.com/questions/11889625/annotating-text-on-individual-facet-in-ggplot2
# and http://stackoverflow.com/questions/19209604/creating-a-multiple-data-frames-in-for-loop

#get a list of names for grouping
#namelist <- names(dat)[33:41]
#namelist <- gsub("ideas","",namelist)
namelist <- c("Collaborate with Coauthor","Host Guest Lecture in Class","Write a Blog Post"
              ,"Send Tweet/Facebook Post","Present Web Seminar","Online Video in Class"
              , "Use Online Video to Learn", "Attend Web Seminar", "Read a Blog Post")


#create starter data frame
tester<-data.frame(value = .4, all = ""
                   , names = factor(namelist[1]
                                    ,levels = c("Collaborate with Coauthor","Host Guest Lecture in Class","Write a Blog Post"
                                                ,"Send Tweet/Facebook Post","Present Web Seminar","Online Video in Class"
                                                , "Use Online Video to Learn", "Attend Web Seminar", "Read a Blog Post")))

#fill in starter with individual components to correspond to the facets
for(i in 1:length(namelist)){
  tester[i,]<- data.frame(value = .4, all = ""
                          , names = factor(namelist[i]
                                           , levels = c("Collaborate with Coauthor","Host Guest Lecture in Class","Write a Blog Post"
                                                        ,"Send Tweet/Facebook Post","Present Web Seminar","Online Video in Class"
                                                        , "Use Online Video to Learn", "Attend Web Seminar", "Read a Blog Post")))
}

#plot the individual components into layers grouped to their facet with the thing to plot being the related mean
for(i in 1: length(tester$names)){
  assign(paste0("lab",i) , geom_label(data = tester[i,],label = paste("mean: ", 
                                                                      round(meanlist[[i]], digits = 2)), fontface = "bold", nudge_x=0.61))
}

#combine original layer with the 9 facet-specific layers
pplot<-p+lab1+lab2+lab3+lab4+lab5+lab6+lab7+lab8+lab9
pplot

# output the plot
ggsave(filename="usage-noimc.eps", plot=pplot, device="eps", width=6.5, height=6.5)

#drop the extra labeling elements
rm(lab1,lab2,lab3,lab4,lab5,lab6,lab7,lab8,lab9)





#################
# Table 3
#################

##
# What factors predict interest in certain venues?
##


# does gender influence the importance of Conferences?

# some code from: http://stackoverflow.com/questions/17368223/
#   ggplot2-multi-group-histogram-with-in-group-proportions-rather-than-frequency
# and: http://stackoverflow.com/questions/3253641/how-to-change-the-order-of-a-discrete-x-scale-in-ggplot

# convert to factor variables, reverse order
dat$genderrec <- ifelse(dat$gender==1, 2, 1)
dat$genderfac <- factor(dat$genderrec, labels=c("female", "male"))

# convert the "missing" category of position to actual missing data
dat$altposition[which(dat$altposition=="missing")] <- NA
dat$newposition <- factor(dat$altposition)

# predict importance of conferences to new ideas
ideas.conf.fit <- polr(as.factor(ideasConferenceinv) ~ age + genderfac + american + methods + publicPolicy 
                       + politicalTheory + CP + IR + newposition + timeTeach
                       , data = dat, method = "probit", Hess = TRUE)

# show results
summary(ideas.conf.fit)

# is age really negatively associated with the importance of conferences?
dat$ideasConferenceinv.jit <- dat$ideasConferenceinv + runif(length(dat$ideasConferenceinv), min=-0.4, max=0.4)
plot(ideasConferenceinv.jit ~ age, dat)
abline(lm(ideasConferenceinv ~ age, dat))

# refit w/o age (position already plays a similar role)
ideas.conf.fit <- polr(as.factor(ideasConferenceinv) ~ genderfac + american + methods + publicPolicy 
                       + politicalTheory + CP + IR + newposition + timeTeach
                       , data = dat, method = "probit", Hess = TRUE)


# predict importance of blogs to new ideas
ideas.blog.fit <- polr(as.factor(ideasBloginv) ~ age + genderfac + american + methods + publicPolicy 
                       + politicalTheory + CP + IR + newposition + timeTeach 
                       , data = dat, method = "probit", Hess = TRUE)

# show results
summary(ideas.blog.fit)

# refit w/o age
ideas.blog.fit <- polr(as.factor(ideasBloginv) ~ genderfac + american + methods + publicPolicy 
                       + politicalTheory + CP + IR + newposition + timeTeach 
                       , data = dat, method = "probit", Hess = TRUE)

# predict importance of webinars to new ideas
ideas.web.fit <- polr(as.factor(ideasWebinarinv) ~ age + genderfac + american + methods + publicPolicy 
                      + politicalTheory + CP + IR + newposition + timeTeach
                      , data = dat, method = "probit", Hess = TRUE)

# show results
summary(ideas.web.fit)

# is age really positively associated with the importance of conferences?
dat$ideasWebinarinv.jit <- dat$ideasWebinarinv + runif(length(dat$ideasWebinarinv), min=-0.4, max=0.4)
plot(ideasWebinarinv.jit ~ age, dat)
abline(lm(ideasWebinarinv ~ age, dat))

# refit w/o age
ideas.web.fit <- polr(as.factor(ideasWebinarinv) ~ genderfac + american + methods + publicPolicy 
                      + politicalTheory + CP + IR + newposition + timeTeach
                      , data = dat, method = "probit", Hess = TRUE)



ideas.journal.fit <- polr(as.factor(ideasJournalinv) ~ genderfac + american + methods + publicPolicy 
                      + politicalTheory + CP + IR + newposition + timeTeach
                      , data = dat, method = "probit", Hess = TRUE)



ideas.group.fit <- polr(as.factor(ideasGroupinv) ~ genderfac + american + methods + publicPolicy 
                          + politicalTheory + CP + IR + newposition + timeTeach
                          , data = dat, method = "probit", Hess = TRUE)

ideas.facebook.fit <- polr(as.factor(ideasFacebookinv) ~ genderfac + american + methods + publicPolicy 
                        + politicalTheory + CP + IR + newposition + timeTeach
                        , data = dat, method = "probit", Hess = TRUE)

ideas.twitter.fit <- polr(as.factor(ideasTwitterinv) ~ genderfac + american + methods + publicPolicy 
                        + politicalTheory + CP + IR + newposition + timeTeach
                        , data = dat, method = "probit", Hess = TRUE)

ideas.students.fit <- polr(as.factor(ideasStudentinv) ~ genderfac + american + methods + publicPolicy 
                        + politicalTheory + CP + IR + newposition + timeTeach
                        , data = dat, method = "probit", Hess = TRUE)

stargazer(ideas.conf.fit,ideas.blog.fit,ideas.web.fit, ideas.journal.fit, ideas.group.fit,
          ideas.facebook.fit, ideas.twitter.fit, ideas.students.fit,type="text", 
          dep.var.labels = c("Conference", "Blog", "Webinar", "Journal", "Small Group", "Facebook", "Twitter", "Students"),
          model.numbers = FALSE, label="tab:ideas-noimc", title="Predictors of Importance for Sources of New Ideas and Research Findings",
          font.size = "small", 
          covariate.labels=c("male", "field: American", "field: Methods", "field: Public Policy", "field: Political Theory", "field: Comparative", 
                             "field: International Relations", "position: graduate student", "position: academic, non-tenure-track",
                             "position: emeritus", "position: industry/government", "prop. of time spent teaching")
)

stargazer(ideas.conf.fit,ideas.blog.fit,ideas.web.fit, ideas.journal.fit, ideas.group.fit,
          ideas.facebook.fit, ideas.twitter.fit, ideas.students.fit,
          type="latex",out="importance-model-noimc.tex", float=T, float.env="sidewaystable",
          dep.var.labels = c("Conference", "Blog", "Webinar", "Journal", "Small Group", "Facebook", "Twitter", "Students"),
          model.numbers = FALSE, label="tab:ideas-noimc", title="Predictors of Importance for Sources of New Ideas and Research Findings",
          font.size = "footnotesize", 
          covariate.labels=c("male", "field: American", "field: Methods", "field: Public Policy", "field: Political Theory", "field: Comparative", 
                             "field: International Relations", "position: graduate student", "position: academic, non-tenure-track",
                             "position: emeritus", "position: industry/government", "prop. of time spent teaching")
)


###################
# model predictions
# Figures 18 and 19
###################

#model results for webinars

pars <- as.matrix(c(ideas.web.fit$coefficients,ideas.web.fit$zeta))
vce <- vcov(ideas.web.fit)

set.seed(123456)

simCoef <- rmnorm(1000, as.vector(pars),as.matrix(vce))


#gender
pred1 <- matrix(NA,nrow=1000,ncol=2)
pred2 <- matrix(NA,nrow=1000,ncol=2)
pred3 <- matrix(NA,nrow=1000,ncol=2)
pred4 <- matrix(NA,nrow=1000,ncol=2)
pred5 <- matrix(NA,nrow=1000,ncol=2)

for(i in 1:2){
  
  agg <- simCoef[, 1] * (i-1) +  # male = 1
    simCoef[, 2] * 0 +  # american
    simCoef[, 3] * 0 +  # methods
    simCoef[, 4] * 0 +  # public policy
    simCoef[, 5] * 0 +  # theory
    simCoef[, 6] * 0 +  # CP
    simCoef[, 7] * 0 + #IR
    simCoef[, 8] * 0  + #grad student
    simCoef[, 9] * 0 +  #non-TT
    simCoef[, 10] * 0 + #emeritus
    simCoef[, 11] * 0 + #industry
    simCoef[, 12] * 30 # time teach
  
  pred1[,i] <- pnorm(simCoef[, 13] - agg)
  pred2[,i] <- pnorm(simCoef[, 14] - agg) - pred1[,i]
  pred3[,i] <- pnorm(simCoef[, 15] - agg) - pred2[,i] - pred1[,i]
  pred4[,i] <- pnorm(simCoef[, 16] - agg) - pred3[,i] - pred2[,i] - pred1[,i]
  pred5[,i] <- 1 - pred1[,i] - pred2[,i] - pred3[,i] - pred4[,i]
  
}

xbar1<-apply(pred1,MARGIN=2,FUN=mean)
xbar2<-apply(pred2,MARGIN=2,FUN=mean)
xbar3<-apply(pred3,MARGIN=2,FUN=mean)
xbar4<-apply(pred4,MARGIN=2,FUN=mean)
xbar5<-apply(pred5,MARGIN=2,FUN=mean)

xbars<-matrix(c(xbar1,xbar2,xbar3,xbar4,xbar5))
xbars<-data.frame(xbars)
xbars$row<-rep(seq(1,2),5)
xbars$row<-factor(xbars$row, labels =  c("Female", "Male"))
xbars$pred<-c(rep(1,2),rep(2,2),rep(3,2),rep(4,2),rep(5,2))
xbars$pred<-as.factor(xbars$pred)
xbars$pred<-factor(xbars$pred, labels =  c("Not at all imp.", "Slightly imp."
                                           , "Somewhat imp.", "Important"
                                           , "Extremely imp."))
xbars<-rename(xbars,replace = c(pred = "Response"))

xbars

q1<-ggplot(xbars, aes(x = row, y = xbars, fill = Response)) + geom_bar(position = "stack", stat = "identity") +
  xlab("") +
  ylab("Predicted Probability") +
  ggtitle("Probability of Each Webinar Importance Response for Females and Males") +
  coord_flip() +
  scale_fill_grey() +
  theme_bw()



#interest
pred1 <- matrix(NA,nrow=1000,ncol=6)
pred2 <- matrix(NA,nrow=1000,ncol=6)
pred3 <- matrix(NA,nrow=1000,ncol=6)
pred4 <- matrix(NA,nrow=1000,ncol=6)
pred5 <- matrix(NA,nrow=1000,ncol=6)

for(i in 1:6){
  
  agg <- simCoef[, 1] * 1  +  # male = 1                                                                                                
    simCoef[, 2] * as.numeric(i == 1) +  # american
    simCoef[, 3] * as.numeric(i == 2) +  # methods
    simCoef[, 4] * as.numeric(i == 3) +  # public policy
    simCoef[, 5] * as.numeric(i == 4) +  # theory
    simCoef[, 6] * as.numeric(i == 5) +  # CP
    simCoef[, 7] * as.numeric(i == 6) +  # IR
    simCoef[, 8] * 0  + #graduate student
    simCoef[, 9] * 0 +  #non-TT
    simCoef[, 10] * 0 + #emeritus
    simCoef[, 11] * 0 + #industry
    simCoef[, 12] * 30 # time teach
  
  pred1[,i] <- pnorm(simCoef[, 13] - agg)
  pred2[,i] <- pnorm(simCoef[, 14] - agg) - pred1[,i]
  pred3[,i] <- pnorm(simCoef[, 15] - agg) - pred2[,i] - pred1[,i]
  pred4[,i] <- pnorm(simCoef[, 16] - agg) - pred3[,i] - pred2[,i] - pred1[,i]
  pred5[,i] <- 1 - pred1[,i] - pred2[,i] - pred3[,i] - pred4[,i]
  
}

xbar1<-apply(pred1,MARGIN=2,FUN=mean)
xbar2<-apply(pred2,MARGIN=2,FUN=mean)
xbar3<-apply(pred3,MARGIN=2,FUN=mean)
xbar4<-apply(pred4,MARGIN=2,FUN=mean)
xbar5<-apply(pred5,MARGIN=2,FUN=mean)

xbars<-matrix(c(xbar1,xbar2,xbar3,xbar4,xbar5))
xbars<-data.frame(xbars)
xbars$row<-rep(seq(1,6),5)
xbars$row<-factor(xbars$row, labels =  c("American", "Methods", "Policy", "Theory", "CP", "IR"))
xbars$pred<-c(rep(1,6),rep(2,6),rep(3,6),rep(4,6),rep(5,6))
xbars$pred<-as.factor(xbars$pred)
xbars$pred<-factor(xbars$pred, labels =  c("Not at all imp.", "Slightly imp."
                                           , "Somewhat imp.", "Important"
                                           , "Extremely imp."))
xbars<-rename(xbars,replace = c(pred = "Response"))

xbars

r1<-ggplot(xbars, aes(x = row, y = xbars, fill = Response)) + geom_bar(position = "stack", stat = "identity") +
  xlab("") +
  ylab("Predicted Probability") +
  ggtitle("Probability of Each Webinar Importance Response for Each Interest") +
  coord_flip() +
  scale_fill_grey() +
  theme_bw()


#could use same as.numeric approach as above
#position

pred1 <- matrix(NA,nrow=1000,ncol=5)
pred2 <- matrix(NA,nrow=1000,ncol=5)
pred3 <- matrix(NA,nrow=1000,ncol=5)
pred4 <- matrix(NA,nrow=1000,ncol=5)
pred5 <- matrix(NA,nrow=1000,ncol=5)

for(i in 1:5){
  
  agg <- simCoef[, 1] * 1  +  # male = 1                                                                                                          
    simCoef[, 2] * 0 +  # american
    simCoef[, 3] * 0 +  # methods
    simCoef[, 4] * 0 +  # public policy
    simCoef[, 5] * 0 +  # theory
    simCoef[, 6] * 0 +  # CP
    simCoef[, 7] * 0 + #IR
    simCoef[, 8] * as.numeric(i-1 == 1)  + #graduate student
    simCoef[, 9] * as.numeric(i-1 == 2) +  #non-TT
    simCoef[, 10] * as.numeric(i-1 == 3) + #emeritus
    simCoef[, 11] * as.numeric(i-1 == 4) + #industry
    simCoef[, 12] * 30 # time teach
  
  pred1[,i] <- pnorm(simCoef[, 13] - agg)
  pred2[,i] <- pnorm(simCoef[, 14] - agg) - pred1[,i]
  pred3[,i] <- pnorm(simCoef[, 15] - agg) - pred2[,i] - pred1[,i]
  pred4[,i] <- pnorm(simCoef[, 16] - agg) - pred3[,i] - pred2[,i] - pred1[,i]
  pred5[,i] <- 1 - pred1[,i] - pred2[,i] - pred3[,i] - pred4[,i]
  
}

xbar1<-apply(pred1,MARGIN=2,FUN=mean)
xbar2<-apply(pred2,MARGIN=2,FUN=mean)
xbar3<-apply(pred3,MARGIN=2,FUN=mean)
xbar4<-apply(pred4,MARGIN=2,FUN=mean)
xbar5<-apply(pred5,MARGIN=2,FUN=mean)

xbars<-matrix(c(xbar1,xbar2,xbar3,xbar4,xbar5))
xbars<-data.frame(xbars)
xbars$row<-rep(seq(1,5),5)
xbars$row<-factor(xbars$row, labels =  c("TT Academic", "Graduate Student", "non-TT Academic"
                                         , "Emeritus", "Industry"))
xbars$pred<-c(rep(1,5),rep(2,5),rep(3,5),rep(4,5),rep(5,5))
xbars$pred<-as.factor(xbars$pred)
xbars$pred<-factor(xbars$pred, labels =  c("Not at all imp.", "Slightly imp."
                                           , "Somewhat imp.", "Important"
                                           , "Extremely imp."))
xbars<-rename(xbars,replace = c(pred = "Response"))

xbars

xbars1<-subset(xbars, row == "Grad Student" | row == "TT Academic")

s1<-ggplot(xbars, aes(x = row, y = xbars, fill = Response)) + geom_bar(position = "stack", stat = "identity") +
  xlab("") +
  ylab("Predicted Probability") +
  ggtitle("Probability of Each Webinar Importance Response for Each Position") +
  coord_flip() +
  scale_fill_grey() +
  theme_bw()



#model results for Conferences

pars <- as.matrix(c(ideas.conf.fit$coefficients,ideas.conf.fit$zeta))
vce <- vcov(ideas.conf.fit)

simCoef <- rmnorm(1000, as.vector(pars),as.matrix(vce))


#gender
pred1 <- matrix(NA,nrow=1000,ncol=2)
pred2 <- matrix(NA,nrow=1000,ncol=2)
pred3 <- matrix(NA,nrow=1000,ncol=2)
pred4 <- matrix(NA,nrow=1000,ncol=2)
pred5 <- matrix(NA,nrow=1000,ncol=2)
# predY<-matrix(NA,nrow=1000,ncol=2)

for(i in 1:2){
  
  agg <- simCoef[, 1] * (i-1) +  # male = 1
    simCoef[, 2] * 0 +  # american
    simCoef[, 3] * 0 +  # methods
    simCoef[, 4] * 0 +  # public policy
    simCoef[, 5] * 0 +  # theory
    simCoef[, 6] * 0 +  # CP
    simCoef[, 7] * 0 + #IR
    simCoef[, 8] * 0  + #Graduate Student
    simCoef[, 9] * 0 +  #non-TT
    simCoef[, 10] * 0 + #emeritus
    simCoef[, 11] * 0 + #industry
    simCoef[, 12] * 30 # time teach
  
  pred1[,i] <- pnorm(simCoef[, 13] - agg)
  pred2[,i] <- pnorm(simCoef[, 14] - agg) - pred1[,i]
  pred3[,i] <- pnorm(simCoef[, 15] - agg) - pred2[,i] - pred1[,i]
  pred4[,i] <- pnorm(simCoef[, 16] - agg) - pred3[,i] - pred2[,i] - pred1[,i]
  pred5[,i] <- 1 - pred1[,i] - pred2[,i] - pred3[,i] - pred4[,i]
  
}

xbar1<-apply(pred1,MARGIN=2,FUN=mean)
xbar2<-apply(pred2,MARGIN=2,FUN=mean)
xbar3<-apply(pred3,MARGIN=2,FUN=mean)
xbar4<-apply(pred4,MARGIN=2,FUN=mean)
xbar5<-apply(pred5,MARGIN=2,FUN=mean)

xbars<-matrix(c(xbar1,xbar2,xbar3,xbar4,xbar5))
xbars<-data.frame(xbars)
xbars$row<-rep(seq(1,2),5)
xbars$row<-factor(xbars$row, labels =  c("Female", "Male"))
xbars$pred<-c(rep(1,2),rep(2,2),rep(3,2),rep(4,2),rep(5,2))
xbars$pred<-as.factor(xbars$pred)
xbars$pred<-factor(xbars$pred, labels =  c("Not at all imp.", "Slightly imp."
                                           , "Somewhat imp.", "Important"
                                           , "Extremely imp."))
xbars<-rename(xbars,replace = c(pred = "Response"))

xbars

q2<-ggplot(xbars, aes(x = row, y = xbars, fill = Response)) + geom_bar(position = "stack", stat = "identity") +
  xlab("") +
  ylab("Predicted Probability") +
  ggtitle("Probability of Each Conference Importance Response for Females and Males") +
  coord_flip() +
  scale_fill_grey() +
  theme_bw()




#interest
pred1 <- matrix(NA,nrow=1000,ncol=6)
pred2 <- matrix(NA,nrow=1000,ncol=6)
pred3 <- matrix(NA,nrow=1000,ncol=6)
pred4 <- matrix(NA,nrow=1000,ncol=6)
pred5 <- matrix(NA,nrow=1000,ncol=6)
# predY<-matrix(NA,nrow=1000,ncol=2)

for(i in 1:6){
  
  agg <- simCoef[, 1] * 1  +  # male = 1 
    simCoef[, 2] * as.numeric(i == 1) +  # american
    simCoef[, 3] * as.numeric(i == 2) +  # methods
    simCoef[, 4] * as.numeric(i == 3) +  # public policy
    simCoef[, 5] * as.numeric(i == 4) +  # theory
    simCoef[, 6] * as.numeric(i == 5) +  # CP
    simCoef[, 7] * as.numeric(i == 6) + #IR
    simCoef[, 8] * 0  + #Graduate Student
    simCoef[, 9] * 0 +  #non-TT
    simCoef[, 10] * 0 + #emeritus
    simCoef[, 11] * 0 + #industry
    simCoef[, 12] * 30 # time teach
  
  pred1[,i] <- pnorm(simCoef[, 13] - agg)
  pred2[,i] <- pnorm(simCoef[, 14] - agg) - pred1[,i]
  pred3[,i] <- pnorm(simCoef[, 15] - agg) - pred2[,i] - pred1[,i]
  pred4[,i] <- pnorm(simCoef[, 16] - agg) - pred3[,i] - pred2[,i] - pred1[,i]
  pred5[,i] <- 1 - pred1[,i] - pred2[,i] - pred3[,i] - pred4[,i]
  
}

xbar1<-apply(pred1,MARGIN=2,FUN=mean)
xbar2<-apply(pred2,MARGIN=2,FUN=mean)
xbar3<-apply(pred3,MARGIN=2,FUN=mean)
xbar4<-apply(pred4,MARGIN=2,FUN=mean)
xbar5<-apply(pred5,MARGIN=2,FUN=mean)

xbars<-matrix(c(xbar1,xbar2,xbar3,xbar4,xbar5))
xbars<-data.frame(xbars)
xbars$row<-rep(seq(1,6),5)
xbars$row<-factor(xbars$row, labels =  c("American", "Methods", "Policy", "Theory", "CP", "IR"))
xbars$pred<-c(rep(1,6),rep(2,6),rep(3,6),rep(4,6),rep(5,6))
xbars$pred<-as.factor(xbars$pred)
xbars$pred<-factor(xbars$pred, labels =  c("Not at all imp.", "Slightly imp."
                                           , "Somewhat imp.", "Important"
                                           , "Extremely imp."))
xbars<-rename(xbars,replace = c(pred = "Response"))

xbars

r2<-ggplot(xbars, aes(x = row, y = xbars, fill = Response)) + geom_bar(position = "stack", stat = "identity") +
  xlab("") +
  ylab("Predicted Probability") +
  ggtitle("Probability of Each Conference Importance Response for Each Interest") +
  coord_flip() +
  scale_fill_grey() +
  theme_bw()


#position
pred1 <- matrix(NA,nrow=1000,ncol=5)
pred2 <- matrix(NA,nrow=1000,ncol=5)
pred3 <- matrix(NA,nrow=1000,ncol=5)
pred4 <- matrix(NA,nrow=1000,ncol=5)
pred5 <- matrix(NA,nrow=1000,ncol=5)

for(i in 1:5){
  
  agg <- simCoef[, 1] * 1  +  # male = 1
    simCoef[, 2] * 0 +  # american
    simCoef[, 3] * 0 +  # methods
    simCoef[, 4] * 0 +  # public policy
    simCoef[, 5] * 0 +  # theory
    simCoef[, 6] * 0 +  # CP
    simCoef[, 7] * 0 + #IR
    simCoef[, 8] * as.numeric(i-1 == 1)  + #Graduate Student
    simCoef[, 9] * as.numeric(i-1 == 2) +  #non-TT
    simCoef[, 10] * as.numeric(i-1 == 3) + #emeritus
    simCoef[, 11] * as.numeric(i-1 == 4) + #industry
    simCoef[, 12] * 30 # time teach
  
  pred1[,i] <- pnorm(simCoef[, 13] - agg)
  pred2[,i] <- pnorm(simCoef[, 14] - agg) - pred1[,i]
  pred3[,i] <- pnorm(simCoef[, 15] - agg) - pred2[,i] - pred1[,i]
  pred4[,i] <- pnorm(simCoef[, 16] - agg) - pred3[,i] - pred2[,i] - pred1[,i]
  pred5[,i] <- 1 - pred1[,i] - pred2[,i] - pred3[,i] - pred4[,i]
  
}

xbar1<-apply(pred1,MARGIN=2,FUN=mean)
xbar2<-apply(pred2,MARGIN=2,FUN=mean)
xbar3<-apply(pred3,MARGIN=2,FUN=mean)
xbar4<-apply(pred4,MARGIN=2,FUN=mean)
xbar5<-apply(pred5,MARGIN=2,FUN=mean)

xbars<-matrix(c(xbar1,xbar2,xbar3,xbar4,xbar5))
xbars<-data.frame(xbars)
xbars$row<-rep(seq(1,5),5)
xbars$row<-factor(xbars$row, labels =  c("TT Academic", "Graduate Student", "non-TT Academic"
                                         , "Emeritus", "Industry"))
xbars$pred<-c(rep(1,5),rep(2,5),rep(3,5),rep(4,5),rep(5,5))
xbars$pred<-as.factor(xbars$pred)
xbars$pred<-factor(xbars$pred, labels =  c("Not at all imp.", "Slightly imp."
                                           , "Somewhat imp.", "Important"
                                           , "Extremely imp."))
xbars<-rename(xbars,replace = c(pred = "Response"))

xbars

xbars1<-subset(xbars, row == "Grad Student" | row == "TT Academic")

s2<-ggplot(xbars, aes(x = row, y = xbars, fill = Response)) + geom_bar(position = "stack", stat = "identity") +
  xlab("") +
  ylab("Predicted Probability") +
  ggtitle("Probability of Each Conference Importance Response for Each Position") +
  coord_flip() +
  scale_fill_grey() +
  theme_bw()





#model results for Blogs

pars <- as.matrix(c(ideas.blog.fit$coefficients,ideas.blog.fit$zeta))
vce <- vcov(ideas.blog.fit)

simCoef <- rmnorm(1000, as.vector(pars),as.matrix(vce))




#gender
pred1 <- matrix(NA,nrow=1000,ncol=2)
pred2 <- matrix(NA,nrow=1000,ncol=2)
pred3 <- matrix(NA,nrow=1000,ncol=2)
pred4 <- matrix(NA,nrow=1000,ncol=2)
pred5 <- matrix(NA,nrow=1000,ncol=2)
predY<-matrix(NA,nrow=1000,ncol=2)

for(i in 1:2){
  
  agg <- simCoef[, 1] * (i-1) +  # male = 1
    simCoef[, 2] * 0 +  # american
    simCoef[, 3] * 0 +  # methods
    simCoef[, 4] * 0 +  # public policy
    simCoef[, 5] * 0 +  # theory
    simCoef[, 6] * 0 +  # CP
    simCoef[, 7] * 0 + #IR
    simCoef[, 8] * 0  + #Graduate Student
    simCoef[, 9] * 0 +  #non-TT
    simCoef[, 10] * 0 + #emeritus
    simCoef[, 11] * 0 + #industry
    simCoef[, 12] * 30 # time teach
  
  pred1[,i] <- pnorm(simCoef[, 13] - agg)
  pred2[,i] <- pnorm(simCoef[, 14] - agg) - pred1[,i]
  pred3[,i] <- pnorm(simCoef[, 15] - agg) - pred2[,i] - pred1[,i]
  pred4[,i] <- pnorm(simCoef[, 16] - agg) - pred3[,i] - pred2[,i] - pred1[,i]
  pred5[,i] <- 1 - pred1[,i] - pred2[,i] - pred3[,i] - pred4[,i]
  
}

xbar1<-apply(pred1,MARGIN=2,FUN=mean)
xbar2<-apply(pred2,MARGIN=2,FUN=mean)
xbar3<-apply(pred3,MARGIN=2,FUN=mean)
xbar4<-apply(pred4,MARGIN=2,FUN=mean)
xbar5<-apply(pred5,MARGIN=2,FUN=mean)

xbars<-matrix(c(xbar1,xbar2,xbar3,xbar4,xbar5))
xbars<-data.frame(xbars)
xbars$row<-rep(seq(1,2),5)
xbars$row<-factor(xbars$row, labels =  c("Female", "Male"))
xbars$pred<-c(rep(1,2),rep(2,2),rep(3,2),rep(4,2),rep(5,2))
xbars$pred<-as.factor(xbars$pred)
xbars$pred<-factor(xbars$pred, labels =  c("Not at all imp.", "Slightly imp."
                                           , "Somewhat imp.", "Important"
                                           , "Extremely imp."))
xbars<-rename(xbars,replace = c(pred = "Response"))

xbars

q3<-ggplot(xbars, aes(x = row, y = xbars, fill = Response)) + geom_bar(position = "stack", stat = "identity") +
  xlab("") +
  ylab("Predicted Probability") +
  ggtitle("Probability of Each Blog Importance Response for Females and Males") +
  coord_flip() +
  scale_fill_grey() +
  theme_bw()



#interest
pred1 <- matrix(NA,nrow=1000,ncol=6)
pred2 <- matrix(NA,nrow=1000,ncol=6)
pred3 <- matrix(NA,nrow=1000,ncol=6)
pred4 <- matrix(NA,nrow=1000,ncol=6)
pred5 <- matrix(NA,nrow=1000,ncol=6)
# predY<-matrix(NA,nrow=1000,ncol=2)

for(i in 1:6){
  
  agg <- simCoef[, 1] * 1  +  # male = 1 
    simCoef[, 2] * as.numeric(i == 1) +  # american
    simCoef[, 3] * as.numeric(i == 2) +  # methods
    simCoef[, 4] * as.numeric(i == 3) +  # public policy
    simCoef[, 5] * as.numeric(i == 4) +  # theory
    simCoef[, 6] * as.numeric(i == 5) +  # CP
    simCoef[, 7] * as.numeric(i == 6) + #IR
    simCoef[, 8] * 0  + #Graduate Student
    simCoef[, 9] * 0 +  #non-TT
    simCoef[, 10] * 0 + #emeritus
    simCoef[, 11] * 0 + #industry
    simCoef[, 12] * 30 # time teach
  
  pred1[,i] <- pnorm(simCoef[, 13] - agg)
  pred2[,i] <- pnorm(simCoef[, 14] - agg) - pred1[,i]
  pred3[,i] <- pnorm(simCoef[, 15] - agg) - pred2[,i] - pred1[,i]
  pred4[,i] <- pnorm(simCoef[, 16] - agg) - pred3[,i] - pred2[,i] - pred1[,i]
  pred5[,i] <- 1 - pred1[,i] - pred2[,i] - pred3[,i] - pred4[,i]
  
}

xbar1<-apply(pred1,MARGIN=2,FUN=mean)
xbar2<-apply(pred2,MARGIN=2,FUN=mean)
xbar3<-apply(pred3,MARGIN=2,FUN=mean)
xbar4<-apply(pred4,MARGIN=2,FUN=mean)
xbar5<-apply(pred5,MARGIN=2,FUN=mean)

xbars<-matrix(c(xbar1,xbar2,xbar3,xbar4,xbar5))
xbars<-data.frame(xbars)
xbars$row<-rep(seq(1,6),5)
xbars$row<-factor(xbars$row, labels =  c("American", "Methods", "Policy", "Theory", "CP", "IR"))
xbars$pred<-c(rep(1,6),rep(2,6),rep(3,6),rep(4,6),rep(5,6))
xbars$pred<-as.factor(xbars$pred)
xbars$pred<-factor(xbars$pred, labels =  c("Not at all imp.", "Slightly imp."
                                           , "Somewhat imp.", "Important"
                                           , "Extremely imp."))
xbars<-rename(xbars,replace = c(pred = "Response"))

xbars

r3<-ggplot(xbars, aes(x = row, y = xbars, fill = Response)) + geom_bar(position = "stack", stat = "identity") +
  xlab("") +
  ylab("Predicted Probability") +
  ggtitle("Probability of Each Blog Importance Response for Each Interest") +
  coord_flip() +
  scale_fill_grey() +
  theme_bw()



#position
pred1 <- matrix(NA,nrow=1000,ncol=5)
pred2 <- matrix(NA,nrow=1000,ncol=5)
pred3 <- matrix(NA,nrow=1000,ncol=5)
pred4 <- matrix(NA,nrow=1000,ncol=5)
pred5 <- matrix(NA,nrow=1000,ncol=5)

for(i in 1:5){
  
  agg <- simCoef[, 1] * 1  +  # male = 1
    simCoef[, 2] * 0 +  # american
    simCoef[, 3] * 0 +  # methods
    simCoef[, 4] * 0 +  # public policy
    simCoef[, 5] * 0 +  # theory
    simCoef[, 6] * 0 +  # CP
    simCoef[, 7] * 0 + #IR
    simCoef[, 8] * as.numeric(i-1 == 1)  + #graduate student
    simCoef[, 9] * as.numeric(i-1 == 2) +  #non-TT
    simCoef[, 10] * as.numeric(i-1 == 3) + #emeritus
    simCoef[, 11] * as.numeric(i-1 == 4) + #industry
    simCoef[, 12] * 30 # time teach
  
  pred1[,i] <- pnorm(simCoef[, 13] - agg)
  pred2[,i] <- pnorm(simCoef[, 14] - agg) - pred1[,i]
  pred3[,i] <- pnorm(simCoef[, 15] - agg) - pred2[,i] - pred1[,i]
  pred4[,i] <- pnorm(simCoef[, 16] - agg) - pred3[,i] - pred2[,i] - pred1[,i]
  pred5[,i] <- 1 - pred1[,i] - pred2[,i] - pred3[,i] - pred4[,i]
  
}

xbar1<-apply(pred1,MARGIN=2,FUN=mean)
xbar2<-apply(pred2,MARGIN=2,FUN=mean)
xbar3<-apply(pred3,MARGIN=2,FUN=mean)
xbar4<-apply(pred4,MARGIN=2,FUN=mean)
xbar5<-apply(pred5,MARGIN=2,FUN=mean)

xbars<-matrix(c(xbar1,xbar2,xbar3,xbar4,xbar5))
xbars<-data.frame(xbars)
xbars$row<-rep(seq(1,5),5)
xbars$row<-factor(xbars$row, labels =  c("TT Academic", "Grad Student", "non-TT Academic"
                                         , "Emeritus", "Industry"))
xbars$pred<-c(rep(1,5),rep(2,5),rep(3,5),rep(4,5),rep(5,5))
xbars$pred<-as.factor(xbars$pred)
xbars$pred<-factor(xbars$pred, labels =  c("Not at all imp.", "Slightly imp."
                                           , "Somewhat imp.", "Important"
                                           , "Extremely imp."))
xbars<-rename(xbars,replace = c(pred = "Response"))

xbars

xbars1<-subset(xbars, row == "Grad Student" | row == "TT Academic")

s3<-ggplot(xbars, aes(x = row, y = xbars, fill = Response)) + geom_bar(position = "stack", stat = "identity") +
  xlab("") +
  ylab("Predicted Probability") +
  ggtitle("Probability of Each Blog Importance Response for Each Position") +
  coord_flip() +
  scale_fill_grey() +
  theme_bw()


setEPS()
postscript("ppgender-noimc.eps", width=8, height=7)
multiplot(q1,q2,q3)
dev.off()

setEPS()
postscript("ppinterest-noimc.eps", width=8, height=7)
multiplot(r1,r2,r3)
dev.off()

setEPS()
postscript("pppos-noimc.eps", width=8, height=7)
multiplot(s1,s2,s3)
dev.off()






# look at factors that might make you more likely to attend a webinar

########################################
# Factors to attend: famous
########################################

dat$attendFameFac <- factor(dat$attendFame, levels=1:5, 
                            labels=c("Much more likely", "Somewhat more likely", "Neither more or less likely",
                                     "Somewhat less likely", "Much less likely"))

plotnine <- ggplot(subset(dat, dat$attendFameFac!="<NA>"), aes(attendFameFac)) +  geom_bar(na.rm=F) +
  xlab("More or less likely to attend webinar") + ylab("# of respondents") +
  theme(axis.text = element_text(size=8, color="black", angle = 30, hjust = 1), title = element_text(size=6),
        axis.title = element_text(size=8), plot.background = element_rect(color="black", fill=NA)) + 
  scale_x_discrete(drop=FALSE) + labs(title="Famous presenter")
plotnine

########################################
# Factors to attend: core interest
########################################

dat$attendRelevantFac <- factor(dat$attendRelevant, levels=1:5, 
                                labels=c("Much more likely", "Somewhat more likely", "Neither more or less likely",
                                         "Somewhat less likely", "Much less likely"))

plotten <- ggplot(subset(dat, dat$attendRelevantFac!="<NA>"), aes(attendRelevantFac)) +  geom_bar(na.rm=F) +
  xlab("More or less likely to attend webinar") + ylab("# of respondents") +
  theme(axis.text = element_text(size=8, color="black", angle = 30, hjust = 1), title = element_text(size=6),
        axis.title = element_text(size=8), plot.background = element_rect(color="black", fill=NA)) + 
  scale_x_discrete(drop=FALSE) + labs(title="Topic relevant to your core interest")
plotten

########################################
# Factors to attend: new interest
########################################

dat$attendNewFac <- factor(dat$attendNew, levels=1:5, 
                           labels=c("Much more likely", "Somewhat more likely", "Neither more or less likely",
                                    "Somewhat less likely", "Much less likely"))

ploteleven <- ggplot(subset(dat, dat$attendNewFac!="<NA>"), aes(attendNewFac)) +  geom_bar(na.rm=F) +
  xlab("More or less likely to attend webinar") + ylab("# of respondents") +
  theme(axis.text = element_text(size=8, color="black", angle = 30, hjust = 1), title = element_text(size=6),
        axis.title = element_text(size=8), plot.background = element_rect(color="black", fill=NA)) + 
  scale_x_discrete(drop=FALSE) + labs(title="Topic is new research outside your core area")
ploteleven

########################################
# Factors to attend: convenient time
########################################

dat$attendConvenientFac <- factor(dat$attendConvenient, levels=1:5, 
                                  labels=c("Much more likely", "Somewhat more likely", "Neither more or less likely",
                                           "Somewhat less likely", "Much less likely"))

plottwelve <- ggplot(subset(dat, dat$attendConvenientFac!="<NA>"), aes(attendConvenientFac)) +  geom_bar(na.rm=F) +
  xlab("More or less likely to attend webinar") + ylab("# of respondents") +
  theme(axis.text = element_text(size=8, color="black", angle = 30, hjust = 1), title = element_text(size=6),
        axis.title = element_text(size=8), plot.background = element_rect(color="black", fill=NA)) + 
  scale_x_discrete(drop=FALSE) + labs(title="Live broadcast at a convenient time")
plottwelve

########################################
# Factors to attend: availability of video
########################################

dat$attendRecordFac <- factor(dat$attendRecord, levels=1:5, 
                              labels=c("Much more likely", "Somewhat more likely", "Neither more or less likely",
                                       "Somewhat less likely", "Much less likely"))

plotthirteen <- ggplot(subset(dat, dat$attendRecordFac!="<NA>"), aes(attendRecordFac)) +  geom_bar(na.rm=F) +
  xlab("More or less likely to attend webinar") + ylab("# of respondents") +
  theme(axis.text = element_text(size=8, color="black", angle = 30, hjust = 1), title = element_text(size=6),
        axis.title = element_text(size=8), plot.background = element_rect(color="black", fill=NA)) + 
  scale_x_discrete(drop=FALSE) + labs(title="Recorded video available")
plotthirteen

########################################
# Factors to attend: ask questions
########################################

dat$attendQuestionFac <- factor(dat$attendQuestion, levels=1:5, 
                                labels=c("Much more likely", "Somewhat more likely", "Neither more or less likely",
                                         "Somewhat less likely", "Much less likely"))

plotfourteen <- ggplot(subset(dat, dat$attendQuestionFac!="<NA>"), aes(attendQuestionFac)) +  geom_bar(na.rm=F) +
  xlab("More or less likely to attend webinar") + ylab("# of respondents") +
  theme(axis.text = element_text(size=8, color="black", angle = 30, hjust = 1), title = element_text(size=6),
        axis.title = element_text(size=8), plot.background = element_rect(color="black", fill=NA)) + 
  scale_x_discrete(drop=FALSE) + labs(title="Opportunity for Q&A")
plotfourteen

########################################
# Factors to attend: job relevant
########################################

dat$attendJobsFac <- factor(dat$attendJobs, levels=1:5, 
                            labels=c("Much more likely", "Somewhat more likely", "Neither more or less likely",
                                     "Somewhat less likely", "Much less likely"))

plotfifteen <- ggplot(subset(dat, dat$attendJobsFac!="<NA>"), aes(attendJobsFac)) +  geom_bar(na.rm=F) +
  xlab("More or less likely to attend webinar") + ylab("# of respondents") +
  theme(axis.text = element_text(size=8, color="black", angle = 30, hjust = 1), title = element_text(size=6),
        axis.title = element_text(size=8), plot.background = element_rect(color="black", fill=NA)) + 
  scale_x_discrete(drop=FALSE) + labs(title="Relevant to job prospects")
plotfifteen

########################################
# Factors to attend: teaches practical skills
########################################

dat$attendSkillsFac <- factor(dat$attendSkills, levels=1:5, 
                              labels=c("Much more likely", "Somewhat more likely", "Neither more or less likely",
                                       "Somewhat less likely", "Much less likely"))

plotsixteen <- ggplot(subset(dat, dat$attendSkillsFac!="<NA>"), aes(attendSkillsFac)) +  geom_bar(na.rm=F) +
  xlab("More or less likely to attend webinar") + ylab("# of respondents") +
  theme(axis.text = element_text(size=8, color="black", angle = 30, hjust = 1), title = element_text(size=6),
        axis.title = element_text(size=8), plot.background = element_rect(color="black", fill=NA)) + 
  scale_x_discrete(drop=FALSE) + labs(title="Teaches applied/practical skills (e.g., software)")
plotsixteen

########################################
# Factors to attend: recommended by friend
########################################

dat$attendRecommendedFac <- factor(dat$attendRecommended, levels=1:5, 
                                   labels=c("Much more likely", "Somewhat more likely", "Neither more or less likely",
                                            "Somewhat less likely", "Much less likely"))

plotseventeen <- ggplot(subset(dat, dat$attendRecommendedFac!="<NA>"), aes(attendRecommendedFac)) +  geom_bar(na.rm=F) +
  xlab("More or less likely to attend webinar") + ylab("# of respondents") +
  theme(axis.text = element_text(size=8, color="black", angle = 30, hjust = 1), title = element_text(size=6),
        axis.title = element_text(size=8), plot.background = element_rect(color="black", fill=NA)) + 
  scale_x_discrete(drop=FALSE) + labs(title="Recommended by friend or colleague")
plotseventeen

########################################
# Factors to attend: presenter outside political science
########################################

dat$attendOutsideFac <- factor(dat$attendOutside, levels=1:5, 
                               labels=c("Much more likely", "Somewhat more likely", "Neither more or less likely",
                                        "Somewhat less likely", "Much less likely"))

ploteighteen <- ggplot(subset(dat, dat$attendOutsideFac!="<NA>"), aes(attendOutsideFac)) +  geom_bar(na.rm=F) +
  xlab("More or less likely to attend webinar") + ylab("# of respondents") +
  theme(axis.text = element_text(size=8, color="black", angle = 30, hjust = 1), title = element_text(size=6),
        axis.title = element_text(size=8), plot.background = element_rect(color="black", fill=NA)) + 
  scale_x_discrete(drop=FALSE) + labs(title="Presenter outside Political Science")
ploteighteen

########################################
# Factors to attend: relevant to teaching
########################################

dat$attendTeachingFac <- factor(dat$attendTeaching, levels=1:5, 
                                labels=c("Much more likely", "Somewhat more likely", "Neither more or less likely",
                                         "Somewhat less likely", "Much less likely"))

plotnineteen <- ggplot(subset(dat, dat$attendTeachingFac!="<NA>"), aes(attendTeachingFac)) +  geom_bar(na.rm=F) +
  xlab("More or less likely to attend webinar") + ylab("# of respondents") +
  theme(axis.text = element_text(size=8, color="black", angle = 30, hjust = 1), title = element_text(size=6),
        axis.title = element_text(size=8), plot.background = element_rect(color="black", fill=NA)) + 
  scale_x_discrete(drop=FALSE) + labs(title="Topic relevant to course you are teaching")
plotnineteen



pdf("imc-attend-more-noimc-one.pdf", width=6, height=6)
grid.arrange(plotnine, plotten, ploteleven, plottwelve, ncol=2, nrow=2)
dev.off()

pdf("imc-attend-more-noimc-two.pdf", width=6, height=6)
grid.arrange(plotthirteen, plotfourteen, plotfifteen, plotsixteen, ncol=2, nrow=2)
dev.off()

pdf("imc-attend-more-noimc-three.pdf", width=6, height=6)
grid.arrange(plotseventeen, ploteighteen, plotnineteen, ncol=2, nrow=2)
dev.off()




########################################
# Factors to attend: core interest
########################################

dat$attendRelevantFac <- factor(dat$attendRelevant, levels=1:5, 
                                labels=c("Much more likely", "Somewhat more likely", "Neither more or less likely",
                                         "Somewhat less likely", "Much less likely"))

coreintplot <- ggplot(subset(dat, dat$attendRelevantFac!="<NA>"), aes(attendRelevantFac)) +  geom_bar(na.rm=F) +
  xlab("More or less likely to attend webinar") + ylab("# of respondents") +
  theme(axis.text = element_text(size=8, color="black", angle = 30, hjust = 1),
        axis.title = element_text(size=8), title = element_text(size=8)) + scale_x_discrete(drop=FALSE) + 
  labs(title="Topic relevant to your core interest")

#ggsave(filename="video-core-interest.eps", plot=coreintplot, device="eps", width=2.9, height=2.9)

########################################
# Factors to attend: availability of video
########################################

dat$attendRecordFac <- factor(dat$attendRecord, levels=1:5, 
                              labels=c("Much more likely", "Somewhat more likely", "Neither more or less likely",
                                       "Somewhat less likely", "Much less likely"))

videoplot <- ggplot(subset(dat, dat$attendRecordFac!="<NA>"), aes(attendRecordFac)) +  geom_bar(na.rm=F) +
  xlab("More or less likely to attend webinar") + ylab("# of respondents") +
  theme(axis.text = element_text(size=8, color="black", angle = 30, hjust = 1),
        axis.title = element_text(size=8), title = element_text(size=8)) + scale_x_discrete(drop=FALSE) + 
  labs(title="Recorded video available")

#ggsave(filename="video-recording.eps", plot=videoplot, device="eps", width=2.9, height=2.9)

pdf("video-interest-noimc.pdf", width=6, height=3.5)
grid.arrange(coreintplot, videoplot, ncol=2, nrow=1)
dev.off()