library(tidytuesdayR)
Look at the available datasets
Loading the Data
#incoming data comes in as a list
<- tidytuesdayR::tt_load("2020-08-11") datasets
--- Compiling #TidyTuesday Information for 2020-08-11 ----
--- There are 2 files available ---
--- Starting Download ---
Downloading file 1 of 2: `avatar.csv`
Downloading file 2 of 2: `scene_description.csv`
--- Download complete ---
#show the names of the individual datasets
names(datasets)
[1] "avatar" "scene_description"
<- datasets$avatar
avatar 1:5,] avatar[
# A tibble: 5 × 11
id book book_num chapter chapter_num character full_text character_words
<dbl> <chr> <dbl> <chr> <dbl> <chr> <chr> <chr>
1 1 Water 1 The Boy … 1 Katara "Water. … Water. Earth. …
2 2 Water 1 The Boy … 1 Scene De… "As the … <NA>
3 3 Water 1 The Boy … 1 Sokka "It's no… It's not getti…
4 4 Water 1 The Boy … 1 Scene De… "The sho… <NA>
5 5 Water 1 The Boy … 1 Katara "[Happil… Sokka, look!
# ℹ 3 more variables: writer <chr>, director <chr>, imdb_rating <dbl>
<- datasets$scene_description
scenes 1:5,] scenes[
# A tibble: 5 × 2
id scene_description
<dbl> <chr>
1 3 [Close-up of the boy as he grins confidently over his shoulder in the d…
2 5 [Happily surprised.]
3 6 [Close-up of Sokka; whispering.]
4 6 [A look of bliss adorns his face. He licks his lips and wiggles his fin…
5 8 [Struggling with the water that passes right in front of her.]
My Research Question
Does the sentiment of each character change over the multiple seasons? That is, does a character become more positive or more negative as their character develops?
I will attempt to summarize the sentiment of each character across each episode.
Using tidytext
to unnest_tokens()
- that is, split each line into 1 word per row.
library(tidytext)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
<- avatar %>%
avatar_words select(id, book, book_num, chapter, chapter_num, character, character_words) %>%
filter(character != "Scene Description") %>%
unnest_tokens(word, character_words)
1:10,] avatar_words[
# A tibble: 10 × 7
id book book_num chapter chapter_num character word
<dbl> <chr> <dbl> <chr> <dbl> <chr> <chr>
1 1 Water 1 The Boy in the Iceberg 1 Katara water
2 1 Water 1 The Boy in the Iceberg 1 Katara earth
3 1 Water 1 The Boy in the Iceberg 1 Katara fire
4 1 Water 1 The Boy in the Iceberg 1 Katara air
5 1 Water 1 The Boy in the Iceberg 1 Katara my
6 1 Water 1 The Boy in the Iceberg 1 Katara grandmother
7 1 Water 1 The Boy in the Iceberg 1 Katara used
8 1 Water 1 The Boy in the Iceberg 1 Katara to
9 1 Water 1 The Boy in the Iceberg 1 Katara tell
10 1 Water 1 The Boy in the Iceberg 1 Katara me
Characters by Episode/Chapter
<- avatar %>%
episode_count count(character, chapter) %>%
select(character, chapter) %>%
filter(character != "Scene Description") %>%
distinct() %>%
count(character) %>%
arrange(desc(n))
%>%
episode_count ::datatable() DT
Bit Players
%>%
episode_count filter(n == 1) %>%
arrange(character)
# A tibble: 272 × 2
character n
<chr> <int>
1 Aang and Zuko 1
2 Aang: 1
3 Actor Bumi 1
4 Actor Iroh 1
5 Actor Jet 1
6 Actor Ozai 1
7 Actor Sokka 1
8 Actor Toph 1
9 Actor Zuko 1
10 Actress Azula 1
# ℹ 262 more rows
Cabbage merchant
The cabbage merchant appears in 4 episodes, and you can see his path to resignation as Aang and company keep busting up his cabbage kiosk.
%>%
avatar filter(character == "Cabbage merchant") %>%
select(chapter, character_words) %>%
::gt() gt
chapter | character_words |
---|---|
The King of Omashu | No! My cabbages! |
The King of Omashu | My cabbages! You're gonna pay for this! |
The King of Omashu | Off with their heads! One for each head of cabbage! |
The King of Omashu | My cabbages! |
The Waterbending Scroll | My cabbages! This place is worse than Omashu! |
The Serpent's Pass | Ahhh! My cabbages! |
The Tales of Ba Sing Se | My cabba- Oh, forget it. |
Who Spoke the Most?
Surprisingly, Sokka has the most lines.
<- avatar_words %>%
line_count count(character) %>%
arrange(desc(n))
1:20,] %>%
line_count[::gt() gt
character | n |
---|---|
Sokka | 18293 |
Aang | 17821 |
Katara | 14961 |
Zuko | 8972 |
Toph | 5434 |
Iroh | 5252 |
Azula | 3299 |
Zhao | 1607 |
Jet | 1604 |
Suki | 1221 |
Hakoda | 1065 |
Pathik | 1030 |
Roku | 1015 |
Ozai | 1002 |
Hama | 955 |
Mai | 844 |
Bumi | 818 |
Long Feng | 757 |
Warden | 722 |
Ty Lee | 705 |
Understanding Each Character’s Journey
Using tidytext
, I do a sentiment analysis of each episode (here called a chapter
) to determine the overal sentiment for a character.
<- get_sentiments("bing")
bing
<- c("Aang", "Katara", "Zuko", "Toph", "Iroh", "Sokka", "Azula", "Mai", "Ty Lee")
characters
<- avatar_words %>%
sentiment_summary inner_join(bing) %>%
count(book_num, chapter_num, chapter, character, sentiment) %>%
filter(character %in% characters) %>%
arrange(book_num, chapter_num) %>%
pivot_wider(names_from = sentiment, values_from = n) %>%
mutate(positive = tidyr::replace_na(positive, 0),
negative = tidyr::replace_na(negative, 0)) %>%
mutate(sentiment = positive - negative)
Joining with `by = join_by(word)`
<- avatar_words %>%
index_chapters select(book_num, chapter_num) %>%
distinct() %>%
mutate(index = row_number())
<- sentiment_summary %>%
sentiment_index inner_join(y= index_chapters, by=c("book_num", "chapter_num"))
<- ggplot(sentiment_index) +
out_plot aes(x=index, y=sentiment, fill=character, episode=chapter, book = book_num, episode_number=chapter_num) +
geom_col(show_legend = FALSE) +
facet_wrap(~character, ncol=2) +
labs(title= "Each Character's Sentiment Journey", x="Episode Number",
subtitle = "mouse over each graph for more information") +
geom_vline(xintercept = 21, lty=2) +
geom_vline(xintercept = 41, lty=2)
Warning in geom_col(show_legend = FALSE): Ignoring unknown parameters:
`show_legend`
::ggplotly(out_plot) plotly
Sentiment Heatmap
<- sentiment_index %>%
s_index ::complete(chapter_num, character)
tidyr
ggplot(s_index) +
aes(x=index, y=character, fill=sentiment) +
geom_tile() +
scale_fill_viridis_b(na.value="black")
Warning: Removed 30 rows containing missing values or values outside the scale range
(`geom_tile()`).
Zuko has the most interesting journey
Zuko has many ups and downs, which may reflect his overall lack of confidence and his tendency for self-loathing.
<- sentiment_index %>%
zuko filter(character=="Zuko")
<- ggplot(zuko) +
out_plot aes(x=index, y=sentiment, fill=character, episode=chapter, book = book_num, group=character, episode_number=chapter_num) +
geom_col(show_legend = FALSE) +
facet_wrap(~character, ncol=2) +
annotate(geom="text", x=27, y= -8 , label = "Zuko Alone\nA Turning Point") +
annotate(geom="text", x=53, y = 11, label = "Where Zuko\ntrains Aang") +
labs(title= "Zuko has lots of ups and downs", x="Episode Number",
subtitle = "mouse over for more episode information") +
ylim(c(-13, 13)) +
geom_vline(xintercept = 21, lty=2) +
geom_vline(xintercept = 41, lty=2)
Warning in geom_col(show_legend = FALSE): Ignoring unknown parameters:
`show_legend`
::ggplotly(out_plot) plotly
Aang and Zuko’s Journeys
Plotting the sentiment journey of Zuko and Aang together shows that they often mirror each other, except in the last parts of Book 3.
<- sentiment_index %>%
zuko_aang filter(character %in% c("Zuko", "Aang"))
<- ggplot(zuko_aang) +
out_plot aes(x=index, y=sentiment, fill=character, episode=chapter, book = book_num, episode_number=chapter_num) +
geom_col(show_legend = FALSE, alpha=0.7) +
labs(title= "Aang and Zuko's Journeys Often Mirror Each Other",
x="Episode Number",
subtitle = "mouse over for more episode information") +
ylim(c(-13, 13)) +
geom_vline(xintercept = 21, lty=2) +
geom_vline(xintercept = 41, lty=2)
Warning in geom_col(show_legend = FALSE, alpha = 0.7): Ignoring unknown
parameters: `show_legend`
::ggplotly(out_plot) plotly
Iroh is so chill and positive
<- sentiment_index %>%
iroh filter(character=="Iroh")
<- ggplot(iroh) +
out_plot aes(x=index, y=sentiment, fill=character, episode=chapter, book = book_num, episode_number=chapter_num) +
geom_col(show_legend = FALSE) +
labs(title= "Iroh is just so chill and positive", x="Episode Number",
subtitle = "mouse over for more episode information") +
ylim(c(-13, 13)) +
geom_vline(xintercept = 21, lty=2) +
geom_vline(xintercept = 41, lty=2)
Warning in geom_col(show_legend = FALSE): Ignoring unknown parameters:
`show_legend`
::ggplotly(out_plot) plotly
Citation
@online{laderas2020,
author = {Laderas, Ted and Laderas, Ted},
title = {Sentiment Analysis of {Avatar}},
date = {2020-08-11},
url = {https://laderast.github.io/articles/2020-08-11-avatar_last_airbender/},
langid = {en}
}