IanGabes / p2h2

Geek Repo:Geek Repo

Github PK Tool:Github PK Tool

HWF Data Summary

Dewey Dunnington 11/19/2016

Libraries

library(prettymapr)
library(dplyr)
library(ggplot2)
library(ggspatial)
library(reshape2)

Load in data

mt <- read.csv('data/monthly_totals.csv', stringsAsFactors = FALSE)
mt$posix <- lubridate::dmy(mt$Date)
mt$posix[is.na(mt$posix)] <- lubridate::mdy(mt$Date[is.na(mt$posix)])
mt$month <- paste(lubridate::year(mt$posix),sprintf('%0.2d', lubridate::month(mt$posix)), sep="-")
mt$Items <- tolower(mt$Items)
mt$Supplied[mt$Supplied != "Y"] <- "N"
mt$Supplied <- factor(mt$Supplied, levels=c("Y", "N"))
mtgeo <- read.csv('data/monthly_totals_postals.csv')

deliveries <- mt %>% group_by(month, posix, Delivery.ID) %>% 
  summarise(Geo=unique(Geo), nitems=length(Items), 
            nyes=sum(Supplied=="Y"), nno=sum(Supplied=="N")) %>% 
  merge(mtgeo, by='Geo', all.x=T) %>% 
  select(posix, month, Delivery.ID, nitems, nyes, nno, postal, lon, lat)

Plot geo

deliveriessum <- deliveries %>% group_by(postal, lon, lat) %>% 
  summarise(n=sum(nitems), fulfilled=sum(nyes)/sum(nitems))
ggplot(deliveriessum, aes(lon, lat)) + geom_osm() + 
  geom_spatial(mapping=aes(size=n, col=fulfilled)) + coord_fixed() +
  scale_x_continuous(expand = c(0.05, 0.05)) + 
  scale_y_continuous(expand = c(0, 0.05))

Plot geo, facet by month

deliveriessum <- deliveries %>% group_by(postal, lon, lat, month) %>% 
  summarise(n=sum(nitems), fulfilled=sum(nyes)/sum(nitems))
ggplot(deliveriessum, aes(lon, lat)) + geom_osm() + 
  geom_spatial(mapping=aes(size=n, col=fulfilled)) +
  facet_wrap(~month) + coord_fixed() +
  scale_x_continuous(expand = c(0.05, 0.05)) + 
  scale_y_continuous(expand = c(0.05, 0.05))

Tabular summary by items

mtsum <- mt %>% group_by(Items) %>% 
  summarise(SuppliedY=sum(Supplied=="Y"), 
            SuppliedN=length(Supplied)-SuppliedY, 
            Total=length(Supplied))
mtsumtot <- rbind(mtsum[order(mtsum$Total, decreasing = T),], data.frame(Items="Totals", SuppliedY=sum(mtsum$SuppliedY),
                                 SuppliedN=sum(mtsum$SuppliedN),
                                 Total=sum(mtsum$Total)))
# knitr::kable(mtsumtot)

Graphical summary by items

# plot by order of item frequency
mtsummelt <- mt %>% select(Items, Supplied, month)
mtsummelt$Items <- factor(mtsummelt$Items, levels=mtsum$Items[order(mtsum$Total)])
ggplot(mtsummelt, aes(x=Items, fill=Supplied)) + coord_flip() + stat_count()

# plot proportion
ggplot(mtsummelt, aes(x=Items, fill=Supplied)) + coord_flip() + stat_count(position='fill')

# plot by order of proportion
mtsummelt$Items <- factor(mtsummelt$Items, levels=mtsum$Items[order(mtsum$SuppliedY/mtsum$Total)])
ggplot(mtsummelt, aes(x=Items, fill=Supplied)) + coord_flip() + stat_count(position='fill')

mtsummelt$Items <- factor(mtsummelt$Items, levels=mtsum$Items[order(mtsum$Total)])
ggplot(mtsummelt, aes(x=Items, fill=Supplied)) + stat_count() + coord_flip() +
  facet_wrap(~month) + theme_grey(8)

Graphical summary by date

# plot deliveries over time
deliverymelt <- deliveries %>% melt(id.vars=c("posix", "Delivery.ID", "nitems", "postal"),
                                    measure.vars=c("nyes", "nno"), value.name="itemcount",
                                    variable.name="Supplied")
ggplot(deliverymelt, aes(x=posix, y=itemcount, fill=Supplied)) + geom_bar(stat='identity')

About


Languages

Language:R 100.0%