kmunger / BMS_chapter_vignette

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

Replication code for Benoit, Munger, and Spirling book chapter

"Dumbing Down? Trends in the Complexity of Political Communicationy" by Kenneth Benoit, Kevin Munger, and Arthur Spirling. This book chapter appears in an SSRC-funded "Anxieties of Democracy" edited volume, published by Cambridge University ress. The code presented here can be used to replicate all of the figures in this manuscript, and takes advantage of the concurrently developed sophistication package.

Installing sophistication

Using the devtools package:

devtools::install_github("kbenoit/sophistication")

Figure 1

library(quanteda)
## Package version: 1.1.1
## Parallel computing: 2 of 8 threads used.
## See https://quanteda.io for tutorials and examples.
## 
## Attaching package: 'quanteda'
## The following object is masked from 'package:utils':
## 
##     View
library(sophistication)
## sophistication version 0.65
## Dropbox folder location set to: C:/Users/kevin/Dropbox/Benoit_Spirling_Readability/
library(stringr)
library(quanteda.corpora)
## compute readability and bootstrap the sentences
set.seed(20170308)
data(data_corpus_sotu, package = "quanteda.corpora")

results <- bootstrap_readability(data_corpus_sotu, n = 2, measure = "Flesch", verbose = FALSE)
## 1
##  2
bs_sd <- results$bs_sd[,"Flesch"]
stat <- results$original[,"Flesch"]
year <- lubridate::year(docvars(data_corpus_sotu, "Date"))
## calculate statistics for SOTU
library(trend)
library(strucchange)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: sandwich
## 
## Attaching package: 'strucchange'
## 
## The following object is masked from 'package:stringr':
## 
##     boundary
sotu_mean <- mean(stat$Flesch)
sotu_var <- var(stat$Flesch)
sotu_cs_test <- cs.test(stat$Flesch)
sotu_ts <- as.ts(stat$Flesch)
sotu_breaks <- breakpoints(sotu_ts ~ 1 )
summary(sotu_breaks)
## 
##   Optimal (m+1)-segment partition: 
## 
## Call:
## breakpoints.formula(formula = sotu_ts ~ 1)
## 
## Breakpoints at observation number:
##                          
## m = 1             145    
## m = 2         124     176
## m = 3      68     145 194
## m = 4      68 112 146 194
## m = 5   34 68 112 146 194
## 
## Corresponding to breakdates:
##                          
## m = 1             145    
## m = 2         124     176
## m = 3      68     145 194
## m = 4      68 112 146 194
## m = 5   34 68 112 146 194
## 
## Fit:
##                                        
## m   0     1     2     3     4     5    
## RSS 40144 14775 11547  9719  9264  9230
## BIC  1865  1644  1598  1569  1568  1578




#3 breakpoints best fit the data
fm0 <- lm(stat$Flesch ~ breakfactor(sotu_breaks, breaks = 3))



sotu_breaks <- breakpoints(sotu_ts ~ 1, breaks = 3 )
year_breaks <- year[breakdates(sotu_breaks)]



## plot the trend
require(ggplot2)
## Loading required package: ggplot2
p <- ggplot(data = docvars(data_corpus_sotu),
            aes(x = year, y = stat)) + #, group = delivery)) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black")) +
  xlab("") +
  ylab("Flesch Reading Ease Score") +
  geom_errorbar(aes(ymin=stat-1.96*bs_sd, ymax=stat+1.96*bs_sd), colour="grey70", width=.1) +
  geom_point(aes(shape = party), size = 2, fill = "black") +
  scale_shape_manual(values = c(17, 8, 3, 19, 15, 20)) +
  geom_vline(aes(xintercept= year_breaks[1] ) ) +
  geom_vline(aes(xintercept= year_breaks[2] ) ) +
  geom_vline(aes(xintercept= year_breaks[3] ) ) +
  theme(plot.title = element_text(lineheight=.8, face="bold"),
        axis.text.x = element_text(size = 15),
        axis.text.y = element_text(size = 15), axis.title.y = element_text(size = 15))
print(p)
## Don't know how to automatically pick scale for object of type data.frame. Defaulting to continuous.

## Figure 2

# compute the levels
period <- cut(year, breaks = c(min(year), year_breaks, max(year)), include.lowest = TRUE, dig.lab = 4)
# reformat the levels
levs <- stringi::stri_replace_all_fixed(levels(period), c("[", "]", "("), "", vectorize_all = FALSE)
levs <- strsplit(levs, ",")
levs[2:length(levs)] <- lapply(levs[2:length(levs)], function(x) {
    x[1] <- as.integer(x[1]) + 1
    x
})
levels(period) <- sapply(levs, paste, collapse = "-")

ggplot(aes(y = stat$Flesch, x = period, fill = docvars(data_corpus_sotu, "delivery")),
       data = NULL) +
    geom_boxplot() +
    scale_fill_grey("", start = .6, end = .9) +
    theme(plot.title = element_text(lineheight=.8, face="bold"),
           axis.text.x = element_text(size = 15),
          axis.text.y = element_text(size = 15), axis.title.y = element_text(size = 15)) +
    labs(y = "Flesch Reading Ease Score", x = "") +
    theme(panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          panel.background = element_blank(),
          legend.key.size =  unit(2.5, "cm"),
          axis.line = element_line(colour = "black"))

##Figures 3, 5 and 7: US data


library(sophistication)
library(quanteda)
library(stringi)
## Warning: package 'stringi' was built under R version 3.4.4
library(magrittr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

data("data_corpus_sotu")


SOTU_stat <- textstat_readability(data_corpus_sotu, measure = c("Flesch", "meanSentenceLength", "meanWordSyllables"))

SOTU_year <- lubridate::year(docvars(data_corpus_sotu, "Date"))
SOTU_fre_df <- data.frame("year" = SOTU_year, "SOTU_stat" = SOTU_stat$Flesch)
SOTU_fre_sent <- data.frame("year" = SOTU_year, "SOTU_stat" = SOTU_stat$meanSentenceLength)
SOTU_fre_word <- data.frame("year" = SOTU_year, "SOTU_stat" = SOTU_stat$meanWordSyllables)


###EO

data("data_corpus_eo")


#drop the ones that are empty
clean1 <- data_corpus_eo$documents



##problems
clean1$texts<-stri_replace_all_fixed(clean1$texts, "A. D.", "AD")




#drop anything that's very long (95th percentile and above)
quant95 <- which( nchar(clean1$texts) >
                    quantile( nchar(clean1$texts), prob=.95) )
clean2 <- clean1[-quant95, c(1:2)]





eo_corp <- corpus(clean2, text_field = "texts")

##drop very short sentences
eo_corp <- corpus_trimsentences(eo_corp, min_length = 4)

eo_corp <- corpus_subset(eo_corp, ntoken(eo_corp) > 10)

eo_year <- docvars(eo_corp)


eo_stat <- textstat_readability(eo_corp$documents$texts, measure = c("Flesch", "meanSentenceLength", "meanWordSyllables"))

eo_fre_df <- data.frame("year" = eo_year$Year, "eo_stat" = eo_stat$Flesch)
eo_fre_sent <- data.frame("year" = eo_year$Year, "eo_stat" = eo_stat$meanSentenceLength)
eo_fre_word <- data.frame("year" = eo_year$Year, "eo_stat" = eo_stat$meanWordSyllables)


########SCOTUS
#load("C:/Users/kevin/Documents/GitHub/BMS_chapter_replication/data/data_corpus_SCOTUS.rda")
load("C:/Users/kevin/Desktop/data_corpus_SCOTUS.rda")

#drop the ones that are empty
clean1 <- corpus_subset(data_corpus_SCOTUS, nchar(texts(data_corpus_SCOTUS)) > 0)


## implement fixes
texts(clean1) <- stri_replace_all_fixed(texts(clean1), "U. S.", "US")



#drop anything that's very long (95th percentile and above)
temp_lengths <- stri_length(texts(clean1))
clean2 <- corpus_subset(clean1, temp_lengths <quantile(temp_lengths, prob = .95))
scotus_lengths<-ntoken(clean2, removePunct = T)

clean3 <- corpus_trimsentences(clean2, min_length = 4)

scotus_year <- clean3$documents$Year

scotus_stat <- textstat_readability(clean3$documents$texts, measure = c("Flesch", "meanSentenceLength", "meanWordSyllables"))

scotus_fre_df <- data.frame("year" = scotus_year, "scotus_stat" = scotus_stat$Flesch)
scotus_fre_sent <- data.frame("year" = scotus_year, "scotus_stat" = scotus_stat$meanSentenceLength)
scotus_fre_word <- data.frame("year" = scotus_year, "scotus_stat" = scotus_stat$meanWordSyllables)






##too many to plot well, and very unbalanced (way more modern ones)--try for a more balanced sample

years<-unique(scotus_fre_df$year)


indices<-vector()
for(i in 1:length(years)){
  set<-which(scotus_fre_df[,"year"]==years[i])
  num<-min(length(set), 30)
  samp<-sample(set, num, replace = FALSE)
  indices<-c(indices, samp)
}


balanced_scotus_fre_df<-scotus_fre_df[indices,]
balanced_scotus_fre_sent<-scotus_fre_sent[indices,]
balanced_scotus_fre_word<-scotus_fre_word[indices,]




require(reshape2)
## Loading required package: reshape2


###Congress
load("C:/Users/kevin/Desktop/data_corpus_CR.rdata")


#drop the ones that are by the speaker
speaker <- which( (data_corpus_CR$documents$name)=="Speaker" )
clean1 <- data_corpus_CR$documents[-speaker, ]



#drop anything that's very long (90th percentile and above)
quant95 <- which( nchar(clean1$texts) >
                    quantile( nchar(clean1$texts), prob=.95) )
clean2 <- clean1[-quant95, ]


##sync up year format

clean2$year[clean2$year==95]<-1995
clean2$year[clean2$year==96]<-1996
clean2$year[clean2$year==97]<-1997
clean2$year[clean2$year==98]<-1998
clean2$year[clean2$year==99]<-1999
clean2$year[clean2$year==0]<-2000
clean2$year[clean2$year==1]<-2001
clean2$year[clean2$year==2]<-2002
clean2$year[clean2$year==3]<-2003
clean2$year[clean2$year==4]<-2004
clean2$year[clean2$year==5]<-2005
clean2$year[clean2$year==6]<-2006
clean2$year[clean2$year==7]<-2007
clean2$year[clean2$year==8]<-2008


##take a very small sample--can't run readability on the whole thing
sample<-sample(seq(1,length(clean2$texts)), 10000, replace = F  )


sample_data<-clean2[sample,]


congress_year <- sample_data$year


congress_stat <- textstat_readability(sample_data$texts, measure = c("Flesch", "meanSentenceLength", "meanWordSyllables"))

congress_fre_df <- data.frame("year" = congress_year, "congress_stat" = congress_stat$Flesch)
congress_fre_sent <- data.frame("year" = congress_year, "congress_stat" = congress_stat$meanSentenceLength)
congress_fre_word <- data.frame("year" = congress_year, "congress_stat" = congress_stat$meanWordSyllables)




##melt together for plotting--Figure 3
require(reshape2)


df_US<-melt(list(SOTU=SOTU_fre_df, SCOTUS=balanced_scotus_fre_df, Congress = congress_fre_df,
                 ExecOrders = eo_fre_df), id.vars="year")



require(ggplot2)

linez<-c( "F1", "dashed",  "dotdash","solid")

p <- ggplot(data = df_US,
            aes(x = year, y = value, linetype = L1)) + #, group = delivery)) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black")) +
  geom_smooth(alpha=0.2,  method = "loess", span = .34, color = "black", se = F) +
  coord_cartesian(ylim = c(-20, 65)) +
  theme(legend.position="none", axis.text.x = element_text(size = 15),
        axis.text.y = element_text(size = 15), axis.title.y = element_text(size = 15))+
  scale_linetype_manual(values = linez)+
  xlab("") +
  ylab("Flesch Reading Ease Score") +
  theme(plot.title = element_text(lineheight=.8, face="bold")) +
  annotate("text", label = "Congress Speech", x = 1995, y = 45, size = 6, colour = "black")+
  
  annotate("text", label = "SOTU", x = 1800, y = 20, size = 6, colour = "black")+
  annotate("text", label = "SCOTUS", x = 1810, y = 50, size = 6, colour = "black") +
  annotate("text", label = "Executive Orders", x = 1970, y = 0, size = 6, colour = "black")


print(p)

##melt together for plotting--Figure 5


df_US_sent<-melt(list(SOTU=SOTU_fre_sent, SCOTUS=balanced_scotus_fre_sent, Congress = congress_fre_sent,
                 ExecOrders = eo_fre_sent), id.vars="year")




linez<-c( "F1", "dashed",  "dotdash","solid")


p <- ggplot(data = df_US_sent,
            aes(x = year, y = value, linetype = L1)) + #, group = delivery)) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black")) +
  geom_smooth(alpha=0.2,  method = "loess", span = .34, color = "black", se = F) +

  coord_cartesian(ylim = c(15, 65)) +
  theme(legend.position="none", axis.text.x = element_text(size = 15),
        axis.text.y = element_text(size = 15), axis.title.y = element_text(size = 15))+
  scale_linetype_manual(values = linez)+
  xlab("") +
  ylab("Mean Sentence Length") +

  theme(plot.title = element_text(lineheight=.8, face="bold")) +
  annotate("text", label = "Congress Speech", x = 1960, y = 17, size = 6, colour = "black")+
  annotate("text", label = "SOTU", x = 1910, y = 25, size = 6, colour = "black")+
  annotate("text", label = "SCOTUS", x = 1810, y = 30, size = 6, colour = "black") +
  annotate("text", label = "Executive Orders", x = 1960, y = 50, size = 6, colour = "black")


print(p)

##melt together for plotting--Figure 7


df_US_word<-melt(list(SOTU=SOTU_fre_word, SCOTUS=balanced_scotus_fre_word, Congress = congress_fre_word,
                      ExecOrders = eo_fre_word), id.vars="year")


p <- ggplot(data = df_US_word,
            aes(x = year, y = value, linetype = L1)) + #, group = delivery)) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black")) +
  geom_smooth(alpha=0.2,  method = "loess", span = .34, color = "black", se = F) +
coord_cartesian(ylim = c(1.4, 1.85)) +

theme(legend.position="none", axis.text.x = element_text(size = 15),
      axis.text.y = element_text(size = 15), axis.title.y = element_text(size = 15))+
  scale_linetype_manual(values = linez)+
  xlab("") +
  ylab("Mean Number of Syllables per Word") + theme(plot.title = element_text(lineheight=.8, face="bold")) +
  annotate("text", label = "SOTU", x = 1800, y = 1.73, size = 6, colour = "black")+
  annotate("text", label = "SCOTUS", x = 1810, y = 1.47, size = 6, colour = "black") +
  annotate("text", label = "Con. Speech", x = 1998, y = 1.61, size = 6, colour = "black")+
  
  annotate("text", label = "Executive Orders", x = 1950, y = 1.82, size = 6, colour = "black")

print(p)

##Figures 4, 6 and 8: Comparative data



data("data_corpus_sotu")


SOTU_stat <- textstat_readability(data_corpus_sotu, measure = c("Flesch", "meanSentenceLength", "meanWordSyllables"))

SOTU_year <- lubridate::year(docvars(data_corpus_sotu, "Date"))
SOTU_fre_df <- data.frame("year" = SOTU_year, "SOTU_stat" = SOTU_stat$Flesch)
SOTU_fre_sent <- data.frame("year" = SOTU_year, "SOTU_stat" = SOTU_stat$meanSentenceLength)
SOTU_fre_word <- data.frame("year" = SOTU_year, "SOTU_stat" = SOTU_stat$meanWordSyllables)

########nobel
#load("data_text/NobelLitePresentations/data_corpus_nobel.rdata")
load("C:/Users/kevin/Desktop/data_corpus_nobel.rdata")


temp_lengths <- stri_length(texts(data_corpus_nobel))
data_corpus_nobel <- corpus_subset(data_corpus_nobel, temp_lengths < quantile(temp_lengths, prob = .95))


nobel_corp <- corpus_trimsentences(data_corpus_nobel, min_length = 4)


nobel_lengths<-ntoken(nobel_corp, removePunct = T)






nobel_stat <- textstat_readability(nobel_corp, measure = c("Flesch", "meanSentenceLength", "meanWordSyllables"))

nobel_year <- (docvars(data_corpus_nobel, "year"))
nobel_fre_df <- data.frame("year" = nobel_year, "nobel_stat" = nobel_stat$Flesch)
nobel_fre_sent <- data.frame("year" = nobel_year, "nobel_stat" = nobel_stat$meanSentenceLength)
nobel_fre_word <- data.frame("year" = nobel_year, "nobel_stat" = nobel_stat$meanWordSyllables)



########Party Broadcasts

data("data_corpus_partybroadcasts")
temp_lengths <- stri_length(texts(data_corpus_partybroadcasts))
data_corpus_pb <- corpus_subset(data_corpus_partybroadcasts, temp_lengths < quantile(temp_lengths, prob = .95))

pb_corp <- corpus_trimsentences(data_corpus_pb, min_length = 4)

pb_lengths<-ntoken(pb_corp, removePunct = T)

###need to manually fix the dates
xx<-substr(texts(pb_corp), 1, 30)
xxx<-str_extract_all(xx,"\\(?[0-9,.]+\\)?")


pb_year <- numeric()
for(i in 1:length(xxx)){
  
  pb_year[i]<- as.numeric((xxx[[i]][length(xxx[[i]])]))
}
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion

## Warning: NAs introduced by coercion
pb_year[4]<-1964

pb_year[10]<-1970

pb_year[14]<-1974
pb_year[22]<-1979



pb_stat <- textstat_readability(pb_corp, measure = c("Flesch", "meanSentenceLength", "meanWordSyllables"))


pb_fre_df <- data.frame("year" = pb_year, "pb_stat" = pb_stat$Flesch)
pb_fre_sent <- data.frame("year" = pb_year, "pb_stat" = pb_stat$meanSentenceLength)
pb_fre_word <- data.frame("year" = pb_year, "pb_stat" = pb_stat$meanWordSyllables)



########UK Manifestos

load("C:/Users/kevin/Desktop/data_corpus_man.rdata")

temp_lengths <- stri_length(texts(data_corpus_man))
data_corpus_man <- corpus_subset(data_corpus_man, temp_lengths < quantile(temp_lengths, prob = .95))


man_corp <- corpus_trimsentences(data_corpus_man, min_length = 4)


docvars(data_corpus_man)
##              Year
## Con1918.txt  1918
## Con1922.txt  1922
## Con1923.txt  1923
## Con1924.txt  1924
## Con1929.txt  1929
## Con1931.txt  1931
## Con1935.txt  1935
## Con1945.txt  1945
## Con1950.txt  1950
## Con1951.txt  1951
## Con1955.txt  1955
## Con1959.txt  1959
## Con1964.txt  1964
## Con1966.txt  1966
## Con1970.txt  1970
## Con1974a.txt 1974
## Con1974b.txt 1974
## Con1979.txt  1979
## Con1983.txt  1983
## Con1987.txt  1987
## Con1997.txt  1997
## Con2001.txt  2001
## Con2005.txt  2005
## Lab1918.txt  1918
## Lab1922.txt  1922
## Lab1923.txt  1923
## Lab1924.txt  1924
## Lab1929.txt  1929
## Lab1931.txt  1931
## Lab1935.txt  1935
## Lab1945.txt  1945
## Lab1950.txt  1950
## Lab1951.txt  1951
## Lab1955.txt  1955
## Lab1959.txt  1959
## Lab1964.txt  1964
## Lab1966.txt  1966
## Lab1970.txt  1970
## Lab1974a.txt 1974
## Lab1974b.txt 1974
## Lab1979.txt  1979
## Lab1987.txt  1987
## Lab1992.txt  1992
## Lab1997.txt  1997
## Lab2001.txt  2001
## Lab2005.txt  2005
## Lib1918.txt  1918
## Lib1922.txt  1922
## Lib1923.txt  1923
## Lib1924.txt  1924
## Lib1929.txt  1929
## Lib1931.txt  1931
## Lib1935.txt  1935
## Lib1945.txt  1945
## Lib1950.txt  1950
## Lib1951.txt  1951
## Lib1955.txt  1955
## Lib1959.txt  1959
## Lib1964.txt  1964
## Lib1966.txt  1966
## Lib1970.txt  1970
## Lib1974a.txt 1974
## Lib1974b.txt 1974
## Lib1979.txt  1979
## Lib1983.txt  1983
## Lib1987.txt  1987
## Lib1992.txt  1992
## Lib1997.txt  1997
## Lib2001.txt  2001
## Lib2005.txt  2005
## Lib2010.txt  2010


man_stat <- textstat_readability(man_corp, measure = c("Flesch", "meanSentenceLength", "meanWordSyllables"))

man_year <- (docvars(data_corpus_man, "Year"))
man_fre_df <- data.frame("year" = man_year, "man_stat" = man_stat$Flesch)
man_fre_sent <- data.frame("year" = man_year, "man_stat" = man_stat$meanSentenceLength)
man_fre_word <- data.frame("year" = man_year, "man_stat" = man_stat$meanWordSyllables)



##melt together for plotting--Figure 4
require(reshape2)


df_comp<-melt(list(SOTU=SOTU_fre_df, Manifestos=man_fre_df, Broadcasts = pb_fre_df,
                 Nobel = nobel_fre_df), id.vars="year")



require(ggplot2)

linez<-c( "F1", "dashed",  "dotdash","solid")


p <- ggplot(data = df_comp,
            aes(x = year, y = value, linetype = L1)) + #, group = delivery)) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black")) +
  geom_smooth(alpha=0.2,  method = "loess", span = .34, color = "black", se = F) +
  geom_point(alpha=0.01) + 
  coord_cartesian(ylim = c(15, 70), xlim=c(1900, 2010)) +
  theme(legend.position="none")+
  scale_linetype_manual(values = linez)+
  xlab("") +
  ylab("Flesch Reading Ease Score") +
  #geom_line(aes(), alpha=0.3, size = 1) +
  # ggtitle("Text Complexity in State of the Union Addresses") +
  theme(plot.title = element_text(lineheight=.8, face="bold")) +
  annotate("text", label = "SOTU", x = 1995, y = 55, size = 6, colour = "black")+
  annotate("text", label = "UK Manifestos", x = 1925, y = 30, size = 6, colour = "black") +
  annotate("text", label = "Party Broadcasts", x = 1955, y = 65, size = 6, colour = "black") + 
  annotate("text", label = "Nobel Prize", x = 1910, y = 48, size = 6, colour = "black")


print(p)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 1979
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 5
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 7.9715e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 16

##melt together for plotting--Figure 6



df_comp_sent<-melt(list(SOTU=SOTU_fre_sent, Manifestos=man_fre_sent, Broadcasts = pb_fre_sent,
                   Nobel = nobel_fre_sent), id.vars="year")



linez<-c( "F1", "dashed",  "dotdash","solid")



p <- ggplot(data = df_comp_sent,
            aes(x = year, y = value, linetype = L1)) + #, group = delivery)) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black")) +
  geom_smooth(alpha=0.2,  method = "loess", span = .34, color = "black", se = F) +
  coord_cartesian(ylim = c(15, 35), xlim=c(1900, 2010)) +
  theme(legend.position="none")+
  scale_linetype_manual(values = linez)+
  xlab("") +
  ylab("Mean Sentence Length") +
  theme(axis.text.x = element_text(size = 15),
        axis.text.y = element_text(size = 15), axis.title.y = element_text(size = 15)) +
  annotate("text", label = "SOTU", x = 1912, y = 34, size = 6, colour = "black")+
  annotate("text", label = "UK Manifestos", x = 2005, y = 27, size = 6, colour = "black") +
  annotate("text", label = "Party Broadcasts", x = 1965, y = 15, size = 6, colour = "black") + 
  annotate("text", label = "Nobel Prize", x = 1910, y = 26, size = 6, colour = "black")



print(p)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 1979
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 5
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 7.9715e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 16

##melt together for plotting--Figure 8



df_comp_word<-melt(list(SOTU=SOTU_fre_word, Manifestos=man_fre_word, Broadcasts = pb_fre_word,
                        Nobel = nobel_fre_word), id.vars="year")



p <- ggplot(data = df_comp_word,
            aes(x = year, y = value, linetype = L1)) + #, group = delivery)) +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black")) +
  geom_smooth(alpha=0.2,  method = "loess", span = .34, color = "black", se = F) +
  coord_cartesian(ylim = c(1.4, 1.75), xlim=c(1900, 2010)) +
  theme(legend.position="none")+
  scale_linetype_manual(values = linez)+
  xlab("") +
  ylab("Mean Number of Syllables per Word") +
  theme(axis.text.x = element_text(size = 15),
        axis.text.y = element_text(size = 15), axis.title.y = element_text(size = 15)) +
  annotate("text", label = "SOTU", x = 1903, y = 1.66, size = 6, colour = "black")+
  annotate("text", label = "UK Manifestos", x = 1985, y = 1.72, size = 6, colour = "black") +
  annotate("text", label = "Party Broadcasts", x = 1955, y = 1.45, size = 6, colour = "black") + 
  annotate("text", label = "Nobel Prize", x = 1910, y = 1.58, size = 6, colour = "black")



print(p)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 1979
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 5
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 7.9715e-017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 16

About


Languages

Language:R 100.0%