What was your dataset?
Load your dataset in with the function below. The input is the date the dataset was issued. You should be able to get this from the tt_available()
function.
coffee <- readr:: read_csv ('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-07/coffee_ratings.csv' )
Rows: 1339 Columns: 43
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (24): species, owner, country_of_origin, farm_name, lot_number, mill, ic...
dbl (19): total_cup_points, number_of_bags, aroma, flavor, aftertaste, acidi...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
What was your question?
Given your inital exploration of the data, what was the question you wanted to answer?
Does processing method affect overall coffee rating?
Initial Skim of Data
Data summary
Name
coffee
Number of rows
1339
Number of columns
43
_______________________
Column type frequency:
character
24
numeric
19
________________________
Group variables
None
Variable type: character
species
0
1.00
7
7
0
2
0
owner
7
0.99
3
50
0
315
0
country_of_origin
1
1.00
4
28
0
36
0
farm_name
359
0.73
1
73
0
571
0
lot_number
1063
0.21
1
71
0
227
0
mill
315
0.76
1
77
0
460
0
ico_number
151
0.89
1
40
0
847
0
company
209
0.84
3
73
0
281
0
altitude
226
0.83
1
41
0
396
0
region
59
0.96
2
76
0
356
0
producer
231
0.83
1
100
0
691
0
bag_weight
0
1.00
1
8
0
56
0
in_country_partner
0
1.00
7
85
0
27
0
harvest_year
47
0.96
3
24
0
46
0
grading_date
0
1.00
13
20
0
567
0
owner_1
7
0.99
3
50
0
319
0
variety
226
0.83
4
21
0
29
0
processing_method
170
0.87
5
25
0
5
0
color
218
0.84
4
12
0
4
0
expiration
0
1.00
13
20
0
566
0
certification_body
0
1.00
7
85
0
26
0
certification_address
0
1.00
40
40
0
32
0
certification_contact
0
1.00
40
40
0
29
0
unit_of_measurement
0
1.00
1
2
0
2
0
Variable type: numeric
total_cup_points
0
1.00
82.09
3.50
0
81.08
82.50
83.67
90.58
▁▁▁▁▇
number_of_bags
0
1.00
154.18
129.99
0
14.00
175.00
275.00
1062.00
▇▇▁▁▁
aroma
0
1.00
7.57
0.38
0
7.42
7.58
7.75
8.75
▁▁▁▁▇
flavor
0
1.00
7.52
0.40
0
7.33
7.58
7.75
8.83
▁▁▁▁▇
aftertaste
0
1.00
7.40
0.40
0
7.25
7.42
7.58
8.67
▁▁▁▁▇
acidity
0
1.00
7.54
0.38
0
7.33
7.58
7.75
8.75
▁▁▁▁▇
body
0
1.00
7.52
0.37
0
7.33
7.50
7.67
8.58
▁▁▁▁▇
balance
0
1.00
7.52
0.41
0
7.33
7.50
7.75
8.75
▁▁▁▁▇
uniformity
0
1.00
9.83
0.55
0
10.00
10.00
10.00
10.00
▁▁▁▁▇
clean_cup
0
1.00
9.84
0.76
0
10.00
10.00
10.00
10.00
▁▁▁▁▇
sweetness
0
1.00
9.86
0.62
0
10.00
10.00
10.00
10.00
▁▁▁▁▇
cupper_points
0
1.00
7.50
0.47
0
7.25
7.50
7.75
10.00
▁▁▁▇▁
moisture
0
1.00
0.09
0.05
0
0.09
0.11
0.12
0.28
▃▇▅▁▁
category_one_defects
0
1.00
0.48
2.55
0
0.00
0.00
0.00
63.00
▇▁▁▁▁
quakers
1
1.00
0.17
0.83
0
0.00
0.00
0.00
11.00
▇▁▁▁▁
category_two_defects
0
1.00
3.56
5.31
0
0.00
2.00
4.00
55.00
▇▁▁▁▁
altitude_low_meters
230
0.83
1750.71
8669.44
1
1100.00
1310.64
1600.00
190164.00
▇▁▁▁▁
altitude_high_meters
230
0.83
1799.35
8668.81
1
1100.00
1350.00
1650.00
190164.00
▇▁▁▁▁
altitude_mean_meters
230
0.83
1775.03
8668.63
1
1100.00
1310.64
1600.00
190164.00
▇▁▁▁▁
Total Number of Samples Per Country
coffee %>%
janitor:: tabyl (country_of_origin) %>%
arrange (desc (n)) %>%
gt:: gt ()
Mexico
236
0.176250934
0.1763826607
Colombia
183
0.136669156
0.1367713004
Guatemala
181
0.135175504
0.1352765321
Brazil
132
0.098581031
0.0986547085
Taiwan
75
0.056011949
0.0560538117
United States (Hawaii)
73
0.054518297
0.0545590433
Honduras
53
0.039581777
0.0396113602
Costa Rica
51
0.038088125
0.0381165919
Ethiopia
44
0.032860344
0.0328849028
Tanzania, United Republic Of
40
0.029873040
0.0298953662
Uganda
36
0.026885736
0.0269058296
Thailand
32
0.023898432
0.0239162930
Nicaragua
26
0.019417476
0.0194319880
Kenya
25
0.018670650
0.0186846039
El Salvador
21
0.015683346
0.0156950673
Indonesia
20
0.014936520
0.0149476831
China
16
0.011949216
0.0119581465
India
14
0.010455564
0.0104633782
Malawi
11
0.008215086
0.0082212257
Peru
10
0.007468260
0.0074738416
United States
10
0.007468260
0.0074738416
Myanmar
8
0.005974608
0.0059790732
Vietnam
8
0.005974608
0.0059790732
Haiti
6
0.004480956
0.0044843049
Philippines
5
0.003734130
0.0037369208
Panama
4
0.002987304
0.0029895366
United States (Puerto Rico)
4
0.002987304
0.0029895366
Ecuador
3
0.002240478
0.0022421525
Laos
3
0.002240478
0.0022421525
Burundi
2
0.001493652
0.0014947683
Cote d?Ivoire
1
0.000746826
0.0007473842
Japan
1
0.000746826
0.0007473842
Mauritius
1
0.000746826
0.0007473842
Papua New Guinea
1
0.000746826
0.0007473842
Rwanda
1
0.000746826
0.0007473842
Zambia
1
0.000746826
0.0007473842
NA
1
0.000746826
NA
Distribution of total_cup_points
versus processing_method
ggplot (coffee) +
aes (y= total_cup_points, x= processing_method, fill= processing_method) +
geom_boxplot () +
theme (axis.text.x = element_text (angle= 90 , hjust = 1 )) +
coord_flip ()
Counts of Country of Origin versus Processing Method
coffee %>%
mutate (country_of_origin= fct_rev (country_of_origin)) %>%
ggplot () +
aes (y= country_of_origin, x= processing_method,
color= processing_method) +
geom_count () +
theme (axis.text.x = element_text (angle= 90 ))
Here’s a sortable table of the above table
library (reactable)
coffee %>%
janitor:: tabyl (country_of_origin, processing_method) %>%
reactable:: reactable ()
Sorted Heatmap of scores by total_cup_points
coffee %>% mutate (sample_id = rownames (coffee)) %>%
select (sample_id, country_of_origin, total_cup_points, aroma, flavor, acidity, body, balance, uniformity, clean_cup, sweetness, cupper_points)%>%
pivot_longer (cols = c (aroma, flavor, acidity, body, balance, uniformity, clean_cup, sweetness, cupper_points), names_to= "type" , values_to= "score" ) %>%
mutate (sample_id = fct_reorder (sample_id, total_cup_points)) %>%
ggplot () +
aes (y= sample_id, x= type, fill= score) +
geom_tile ()
Bi-clustered Heatmap of Scores
Loading required package: plotly
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
Loading required package: viridis
Loading required package: viridisLite
======================
Welcome to heatmaply version 1.5.0
Type citation('heatmaply') for how to cite the package.
Type ?heatmaply for the main documentation.
The github page is: https://github.com/talgalili/heatmaply/
Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
You may ask questions at stackoverflow, use the r and heatmaply tags:
https://stackoverflow.com/questions/tagged/heatmaply
======================
coffee %>% mutate (sample_id = rownames (coffee)) %>%
select (aroma, flavor, acidity, body, balance, uniformity, clean_cup, sweetness, cupper_points) %>% heatmaply ()
Warning in doTryCatch(return(expr), name, parentenv, handler): unable to load shared object '/Library/Frameworks/R.framework/Resources/modules//R_X11.so':
dlopen(/Library/Frameworks/R.framework/Resources/modules//R_X11.so, 0x0006): Library not loaded: /opt/X11/lib/libSM.6.dylib
Referenced from: <31EADEB5-0A17-3546-9944-9B3747071FE8> /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/modules/R_X11.so
Reason: tried: '/opt/X11/lib/libSM.6.dylib' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/opt/X11/lib/libSM.6.dylib' (no such file), '/opt/X11/lib/libSM.6.dylib' (no such file), '/Library/Frameworks/R.framework/Resources/lib/libSM.6.dylib' (no such file), '/Library/Java/JavaVirtualMachines/jdk-11.0.18+10/Contents/Home/lib/server/libSM.6.dylib' (no such file)
Processing Method: Dry / Natural
coffee %>%
filter (processing_method == "Natural / Dry" ) %>%
mutate (country_of_origin = fct_reorder (country_of_origin, total_cup_points, median)) %>%
ggplot () +
aes (y= total_cup_points, x= country_of_origin, fill= country_of_origin) +
geom_boxplot () +
theme (axis.text.x = element_text (angle= 90 ), legend.position = "none" ) +
coord_flip () +
labs (title= "Tanzania leads with ratings in Natural/Dry" )
Processing Method: Washed / Wet
coffee %>%
filter (processing_method == "Washed / Wet" ) %>%
mutate (country_of_origin = fct_reorder (country_of_origin, total_cup_points, median)) %>%
ggplot () +
aes (y= total_cup_points, x= country_of_origin, fill= country_of_origin) +
geom_boxplot () +
theme (axis.text.x = element_text (angle= 90 ), legend.position = "none" ) +
coord_flip () +
labs (title= "US leads in Ratings in Washed/Wet" )
Mexico: Processing Methods
coffee %>%
filter (country_of_origin == "Mexico" ) %>%
mutate (processing_method = fct_reorder (processing_method, total_cup_points, median)) %>%
ggplot () +
aes (y= total_cup_points, x= processing_method, fill= processing_method) +
geom_boxplot (color= "black" ) +
coord_flip ()
Linear model of total_cup_points
broom:: tidy (lm (total_cup_points ~
country_of_origin +
category_one_defects, data= coffee)) %>%
filter (p.value < 0.05 ) %>%
arrange (p.value)
# A tibble: 9 × 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 82.4 0.286 288. 0
2 country_of_originHonduras -3.06 0.535 -5.72 0.0000000133
3 country_of_originEthiopia 3.10 0.573 5.41 0.0000000757
4 category_one_defects -0.163 0.0371 -4.40 0.0000116
5 country_of_originMexico -1.40 0.359 -3.90 0.000100
6 country_of_originHaiti -5.00 1.37 -3.64 0.000284
7 country_of_originNicaragua -1.92 0.706 -2.73 0.00649
8 country_of_originKenya 1.89 0.718 2.63 0.00859
9 country_of_originColombia 0.741 0.376 1.97 0.0488
Citation BibTeX citation:
@online{laderas2020,
author = {Laderas, Ted and Laderas, Ted},
title = {Coffee {Data} {Exploration}},
date = {2020-07-08},
url = {https://laderast.github.io/articles/2020-07-08_coffee/},
langid = {en}
}
For attribution, please cite this work as:
Laderas, Ted, and Ted Laderas. 2020.
“Coffee Data
Exploration.” July 8, 2020.
https://laderast.github.io/articles/2020-07-08_coffee/ .