Twitter Reacts to Tunisia: A Quick Statistical Analysis

Corpus: 54825 tweets.

for (i in seq(from = 1, to = 6, by = 1)) {
  temp =   get_all_tweets(query = tunisiadate$day[i],
                          "2021-07-26T00:00:00Z",
                          "2021-08-01T12:00:00Z",
                          bearer_token,
                          is_retweet = FALSE,
                          n = 200000)
  temp = mutate(temp,
                author = author_id)
  temp = mutate(temp,
                reactions = metrics)
  temp = temp %>% select(author_id, author, reactions, created_at, text, id, in_reply_to_user_id)
  tweets = rbind(tweets, temp)
  rm(temp)
}
tweets = mutate(tweets,
                 date = str_sub(tweets$created_at, 1, 10))
tweets3 = mutate(tweets3,
                 date = str_sub(tweets3$created_at, 1, 10))
tweets4 = mutate(tweets4,
                  date = str_sub(tweets4$created_at, 1, 10))
tuntweets = bind_rows(tweets, tweets2, tweets3, tweets4)
tuntweets = tuntweets %>% filter(lang == "en")

corptun = corpus(tuntweets)
toks_tweets = tokens(corptun, remove_punct = TRUE) %>% 
                      tokens_keep(pattern = "#*")
toks_tweets = tokens_tolower(toks_tweets)
dfmat_tweets = dfm(toks_tweets)
tstat_freq <- textstat_frequency(dfmat_tweets)
dfmat_tweets %>% 
  textstat_frequency(n = 30) %>% 
  ggplot(aes(x = reorder(feature, frequency), y = frequency)) +
  geom_point() +
  coord_flip() +
  labs(x = NULL, y = "Frequency", title = "Frequency of Hashtags", caption = "@JihedNcib") +
  theme_minimal()
cust = c("e", "de", "tunísia", "tunisia", "tunisian", "tunisia's", "o", "la", "kais", "saied", "said", "amp", "country", "da", "que", "us", "di", "il", "في", "|", "via", "happening", "just", "without", "من", "و", "can", "like", "s", "new", "le", "one", "know", "el", "al", "presidente", "un", "na", "1", "argentina", "2", "want", "não", "years", "se", "تونس")
corptun = corpus(tuntweets)
toks_tweets = tokens(corptun, remove_punct = TRUE, remove_url = TRUE) %>% 
  tokens_remove(pattern = "#*")
toks_tweets = tokens_tolower(toks_tweets)
test = c(stopwords("en"), cust)
dfmat_tweets = dfm(toks_tweets, remove = test)
tstat_freq <- textstat_frequency(dfmat_tweets)
dfmat_tweets %>% 
  textstat_frequency(n = 30) %>% 
  ggplot(aes(x = reorder(feature, frequency), y = frequency)) +
  geom_point() +
  coord_flip() +
  labs(x = NULL, y = "Frequency", title = "Most Used Words", caption = "@JihedNcib") +
  theme_minimal()
replies = tuntweets %>% ct(in_reply_to_user_id)
replies = replies[order(-replies$n),]
replies = mutate(replies, user = frcode(in_reply_to_user_id == "783792992" ~ "@IlhanMN", 
                                        in_reply_to_user_id == "2443666806" ~ "@AlarabyTV", 
                                        in_reply_to_user_id == "122515724" ~ "@radwan_masmoudi", 
                                        in_reply_to_user_id == "1288792853230096385" ~ "@Rd_tunisia", 
                                        in_reply_to_user_id == "1275079410" ~ "@IKherigi", 
                                        in_reply_to_user_id == "1378776417928937474" ~ "@enaasjo", 
                                        in_reply_to_user_id == "1175162837490110465" ~ "@LouayCherni4",
                                        in_reply_to_user_id == "2373735295" ~ "@MiddleEastEye",
                                        in_reply_to_user_id == "1287302961379631105" ~ "@Conquest1453",
                                        in_reply_to_user_id == "21088417" ~ "@marcowenjones",
                          TRUE ~ "REMOVE"))
replies = replies %>% filter(user != "REMOVE")
replies = replies[order(-replies$n),]
replies %>% ggplot(., aes(x = reorder(user, n), y = n)) + geom_col() +
  coord_flip() +
  labs(x = "Users", y = "Number of Replies", title = "Users Receiving More Engagement (Replies)", caption = "@JihedNcib") +
  theme_minimal()
posts = tuntweets %>% ct(author_id)
posts = posts[order(-posts$n),]
posts = posts[1:10,]
posts = mutate(posts, user = frcode(author_id == "2965653249" ~ "@Tunisia_Watch", 
                                    author_id == "2305181856" ~ "@newsafricanow", 
                                    author_id == "1304065965026467840" ~ "@sachinbhat94", 
                                    author_id == "2866637663" ~ "@n_e_i_n", 
                                    author_id == "2369593518" ~ "@Bichettt", 
                                    author_id == "1287302961379631105" ~ "@Conquest1453", 
                                    author_id == "2380259108" ~ "@The_NewArab",
                                    author_id == "2877383369" ~ "@25Juillet_",
                                    author_id == "81136269" ~ "@MiddleEastMnt",
                                    author_id == "1171516450147667970" ~ "@LHamza200",
                                        TRUE ~ "REMOVE"))
posts %>% ggplot(., aes(x = reorder(user, n), y = n)) + geom_col() +
  coord_flip() +
  labs(x = "Users", y = "Number of Tweets", title = "Most Active Users (By Number of Tweets)", caption = "@JihedNcib") +
  theme_minimal()