Text Analysis: Death Row Last Statements in Texas
200 statements (2006 – 2020)
Source: Texas Department of Criminal Justice

tkns = stats %>% unnest_tokens(word, statement) tkns = tkns %>% anti_join(stop_words) tkns = tkns %>% filter(word != "ya\'ll") tkns = tkns %>% filter(word != "y\'all") cnt = tkns %>% count(word) %>% arrange(desc(n)) cnt %>% filter(word!="y’all") %>% top_n(20, n) %>% ggplot(., aes(x = reorder(word, n), y = n, fill = word)) + geom_col(color='black', fill = "#56B4E9") + theme_minimal() + coord_flip() + labs(x ="Term", y = "Frequency", subtitle = "Term Frequency", title = "Texas Death Row - Last Statements", caption = "@JihedNcib | Data: Texas Department of Criminal Justice") + theme(text=element_text(size=18, family="saira")) + theme(legend.position = "none")

library("fmsb") sent <- tkns %>% inner_join(get_sentiments("nrc")) sent <- sent %>% select(word, sentiment) sent = sent %>% filter(!grepl("positive|negative", sentiment)) snts = sent %>% count(sentiment) %>% spread(sentiment, n) data <- rbind(rep(912,8) , rep(150,8) , snts) radarchart( data , pcol=rgb(0.2,0.5,0.5,0.9) , pfcol=rgb(0.2,0.5,0.5,0.5) , plwd=4 , cglcol="grey", cglty=1, axislabcol="grey", cglwd=0.8, title = "Texas Death Row - Last Statements")

tkns_dfm = tkns %>% count(id, word, sort = TRUE) %>% cast_dfm(id, word, n) topic_mode = stm(tkns_dfm, K = 6, init.type = "Spectral") td_beta = tidy(topic_mode) td_beta %>% group_by(topic) %>% top_n(10) %>% ungroup() %>% mutate(term, reorder(term, beta)) %>% ggplot(aes(term, beta, fill = topic)) + geom_col(show.legend = FALSE) + facet_wrap(~topic, scales = "free") + coord_flip()