Introduction

The analysis here aims to be as simple as possible. I’ll pull down Trump’s most recent tweets using the twitteR package.

trump_tweets <- userTimeline("realDonaldTrump", n = 3200, excludeReplies = TRUE)
d<- tbl_df(map_df(trump_tweets, as.data.frame))
trump_tweets2 <- userTimeline("realDonaldTrump", n = 3200, excludeReplies = TRUE,maxID=min(d$id))
d2<- tbl_df(map_df(trump_tweets2, as.data.frame))
d<-rbind(d,d2)
d$hour<-hour(with_tz(d$created, "EST"))

Clean the text

Tweet texts always need some cleaning up to remove internet characters.

d %>% mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|@;", "")) ->d

d$text<-gsub("@","",d$text)
d$text<-gsub("#","",d$text)

Count on time of day

Does Trump still Tweet in the morning?

d %>%
  count(hour) %>%
  mutate(percent = n / sum(n)) %>%
  
  ggplot(aes(hour, percent)) +
  geom_line() +
  scale_y_continuous(labels = percent_format()) +
  labs(x = "Hour of day (EST)",
       y = "% of tweets",
       color = "")

data(stop_words)
d %>% select(hour, id, favoriteCount,text) %>% unnest_tokens(word, text) %>% anti_join(stop_words) -> words

Sentiment scores

This time I will use the affin lexicon. This produces scores between -3 and +3 for positive and negative

afin<-get_sentiments("afinn")
words$daytime<-cut(words$hour,c(-1,8,25))
levels(words$daytime)<-c("Early","Later")
words %>% inner_join(afin, by = "word") -> word_score

word_score %>% group_by(id) %>% summarise(n=n(),score=mean(score),likes=mean(favoriteCount)) %>% ggplot(aes(x=score,y=likes)) + geom_point() + geom_smooth()

word_score %>%
ggplot(aes(x=daytime,y=score)) +stat_summary(fun.y=mean,geom="point") +stat_summary(fun.data=mean_cl_normal,geom="errorbar")

library(mgcv)
word_score %>% group_by(hour) %>% summarise(score=mean(score)) %>%
ggplot(aes(x=hour,y=score)) +geom_point() +geom_line()  +geom_smooth(method="gam",formula=y~s(x))