This article is about creating a visualisation showing average rating and proportion of cocoa percent(% chocolate) greather than or equal to 70% by top 15 company locaton.
packages = c('tidyverse', 'readxl', 'knitr')
for(p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
cho<- read_csv("data/chocolate.csv")
cho$cocoa_percent = as.numeric(gsub("[\\%,]", "", cho$cocoa_percent))
cho_mod <-subset(cho, cocoa_percent >= 70)
cho_mod_avg <-cho_mod %>%
group_by(company_location) %>%
summarize(avg_rating = mean(rating, na.rm=TRUE)) %>%
ungroup()
cho_mod_percent <- cho_mod %>%
group_by(company_location) %>%
summarize(avg_cocoa_percent = mean(cocoa_percent, na.rm=TRUE)) %>%
ungroup()
cho_new = merge(cho_mod_avg,cho_mod_percent, by= "company_location")
cho_new_desc <- cho_new[order(-cho_new$avg_rating, -cho_new$avg_cocoa_percent),]
cho_top15 <- cho_new_desc[1:15, ]
cho_top15_coco_percent <-cho_mod[cho_mod$company_location %in% c("Chile","Fiji","Denmark","Switzerland","Poland","Vietnam","Colombia",
"Guatemala","Australia","U.A.E","Argentina","Amsterdam","Thailand",
"Canada","Scotland"),]
cho_final = merge(cho_top15,cho_top15_coco_percent, by= "company_location")