Code
### Load Packages
library(tidyverse)
library(tidytext)
library(pdftools)
library(ggwordcloud)
library(textdata)
February 10, 2024
In this report, I analyze John Steinbeck’s classic novel, “East of Eden”, to determine the most frequently used words and overall sentiments throughout the book. Figure 1 displays the most frequently used words in each chapter for Chapters 1 - 16, and Figure 2 visualizes a cloud map of most frequenlty used word for just Chapter 1. Figures 3 and 4 portrays the sentiment analysis for Chapters 1-16.
top_5_words <- wordcount_clean %>%
filter(chapter == 1:16) %>%
group_by(chapter) %>%
arrange(-n) %>%
slice(1:5) %>%
ungroup()
# Make some graphs:
ggplot(data = top_5_words, aes(x = n, y = word)) +
geom_col(fill = "darkgreen") +
facet_wrap(~chapter, scales = "free")+
theme_minimal()+
labs(x = " ",
y = " ")
Create a visualization for most frequently used words in Chapter 1:
Determine the overall sentiment (positive or negative) for each chapter:
bing_lex <- get_sentiments(lexicon = "bing")
nrc_lex <- get_sentiments(lexicon = "nrc")
eoe_bing <- eoe_words %>%
filter(chapter == 1:16) %>%
inner_join(bing_lex, by = 'word')
bing_counts <- eoe_bing %>%
group_by(chapter, sentiment) %>%
summarize(n = n())
# Plot them:
ggplot(data = bing_counts, aes(x = sentiment, y = n, fill = sentiment)) +
geom_col() +
facet_wrap(~chapter) +
labs(
x = " ",
y = "Count",
fill = "Sentiment")+
scale_fill_manual(values = c("positive" = "slateblue", "negative" = "darkred"))+
theme_minimal()
# find log ratio score overall:
bing_log_ratio_book <- eoe_bing %>%
summarize(n_pos = sum(sentiment == 'positive'),
n_neg = sum(sentiment == 'negative'),
log_ratio = log(n_pos / n_neg))
# Find the log ratio score by chapter:
bing_log_ratio_ch <- eoe_bing %>%
group_by(chapter) %>%
summarize(n_pos = sum(sentiment == 'positive'),
n_neg = sum(sentiment == 'negative'),
log_ratio = log(n_pos / n_neg)) %>%
mutate(log_ratio_adjust = log_ratio - bing_log_ratio_book$log_ratio) %>%
mutate(pos_neg = ifelse(log_ratio_adjust > 0, 'pos', 'neg'))
ggplot(data = bing_log_ratio_ch,
aes(x = log_ratio_adjust,
y = fct_rev(factor(chapter)),
fill = pos_neg)) +
# y = fct_rev(as.factor(chapter)))) +
geom_col() +
labs(x = 'Adjusted log(positive/negative)',
y = 'Chapter number') +
scale_fill_manual(values = c('pos' = 'slateblue', 'neg' = 'darkred')) +
theme_minimal() +
theme(legend.position = 'none')
Steinbeck, John. East of Eden. Penguin Classics, 2000. Retreived from: https://hitalki.org/blog/wp-content/uploads/2023/05/East-of-Eden.pdf
@online{calbert2024,
author = {Calbert, Madison},
title = {Text {Sentiment} {Analysis}},
date = {2024-02-10},
url = {https://madicalbert.github.io/posts/2024-02-10-sentiment-analysis/},
langid = {en}
}