This article is about creating a Pareto chart and a pyramid
packages = c('tidyverse', 'readxl', 'knitr')
for(p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
orders <- read_xls("data/Superstore-2021.xls",
sheet = "Orders")
returns <- read_xls("data/Superstore-2021.xls",
sheet = "Returns")
joined_tab <- left_join(returns, orders,
by = c('Order ID' = 'Order ID'))
freq_returned <- joined_tab %>%
count(`Sub-Category`) %>%
rename(Returns = n)
freq_sorted <- freq_returned %>%
arrange(desc(Returns))
freq_cum <- freq_sorted %>%
mutate(cumfreq = cumsum(Returns))
ggplot()+
geom_bar(data = freq_cum, aes(x=reorder(freq_cum$'Sub-Category',-freq_cum$Returns), y=freq_cum$Returns),stat = 'identity', fill = 'light blue') +
geom_point(data = freq_cum,
aes(x=freq_cum$`Sub-Category`,y=freq_cum$cumfreq))+
geom_line(data = freq_cum,
aes(x=freq_cum$`Sub-Category`,y=freq_cum$cumfreq,group =1))+
scale_y_continuous(
name='absolute frequency',breaks = seq(0,4000,322.6),
sec.axis = sec_axis(~(./sum(freq_cum$Returns)),breaks = seq(0,2,by=0.1),labels = scales::percent))+
labs(title = 'Pareto Chart of return by sub-category', x='Sub-Category')+
theme_bw(base_size = 7)
residents_stats<- read_csv("data/respopagesextod2021.csv")
residents_data <- residents_stats[c(3,4,6)]
residents_data_ordered <- residents_stats %>%
arrange(AG)
residents_data_sorted <- residents_data_ordered %>% group_by(AG, Sex) %>%
summarise(Pop = sum(Pop))
ggplot(residents_data_sorted, aes(x = AG, fill = Sex,
y = ifelse(test = Sex =='Males',
yes = -Pop, no = Pop))) +
geom_bar(stat = 'identity') +
coord_flip()+
scale_y_continuous(name='Male versus Female Population',breaks = seq(-160000,160000,20000),labels =abs(seq(-160000,160000,20000)))+
theme(axis.text.x=element_text(size=5))+
labs(title = 'Age-Sex Pyramid of Singapore Population by Age Group and Sex', x='Age Group')