Text Analysis: Death Row Last Statements in Texas

200 statements (2006 – 2020)
Source: Texas Department of Criminal Justice

Term Frequency (Top 20)
tkns = stats %>% unnest_tokens(word, statement)
tkns = tkns %>% anti_join(stop_words)
tkns = tkns %>% filter(word != "ya\'ll")
tkns = tkns %>% filter(word != "y\'all")
cnt = tkns %>% count(word) %>% arrange(desc(n))
cnt %>% filter(word!="y’all") %>%  top_n(20, n) %>% ggplot(., aes(x = reorder(word, n), y = n, fill = word)) + 
  geom_col(color='black', fill = "#56B4E9") +
  theme_minimal() +
  coord_flip() +
  labs(x ="Term", y = "Frequency", subtitle = "Term Frequency", title = "Texas Death Row - Last Statements", caption = "@JihedNcib | Data: Texas Department of Criminal Justice") +
  theme(text=element_text(size=18, family="saira")) +
  theme(legend.position = "none")
Sentiment Analysis
library("fmsb")
sent <- tkns %>% inner_join(get_sentiments("nrc"))
sent <- sent %>% select(word, sentiment)
sent = sent %>% filter(!grepl("positive|negative", sentiment))
snts = sent %>% count(sentiment) %>% 
  spread(sentiment, n)
data <- rbind(rep(912,8) , rep(150,8) , snts)
radarchart( data  ,  
            pcol=rgb(0.2,0.5,0.5,0.9) , pfcol=rgb(0.2,0.5,0.5,0.5) , plwd=4 , 
            cglcol="grey", cglty=1, axislabcol="grey", cglwd=0.8, title = "Texas Death Row - Last Statements")
Topic Modeling
tkns_dfm = tkns %>% 
  count(id, word, sort = TRUE) %>% 
  cast_dfm(id, word, n)
topic_mode = stm(tkns_dfm, K = 6, init.type = "Spectral")
td_beta = tidy(topic_mode)
td_beta %>% 
  group_by(topic) %>% 
  top_n(10) %>% 
  ungroup() %>% 
  mutate(term, reorder(term, beta)) %>% 
  ggplot(aes(term, beta, fill = topic)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(~topic, scales = "free") +
  coord_flip()