In this post we will explore a reference to the latest GitHub Monkeybox data and how to visualize in R.
Moneybox outbreaks have been on the global forefront as cases rise
world-wide. This original
post describes where to access the data and how to visualize it
using maps and other useful packages. I will explore how
this could be extended to a dashboard for real time data analysis, along
with a few additional visualizations.
# Read worldwide case data
case_series <- read.csv("https://raw.githubusercontent.com/globaldothealth/monkeypox/main/timeseries-country-confirmed.csv")
head(case_series)
Date Cases Cumulative_cases Country
1 2022-07-25 1 1 Andorra
2 2022-07-26 2 3 Andorra
3 2022-07-27 0 3 Andorra
4 2022-07-28 0 3 Andorra
5 2022-07-29 0 3 Andorra
6 2022-07-30 0 3 Andorra
To obtain a data frame of locations of different maps, there are three options:
world
usa
state
county
We can quickly observe that the regions might not match our case data above. They also don’t share the same name. We will resolve this when we do some joining and data fusion.
df_world <- map_data("world")
# df_usa <- map_data("usa")
# df_state <- map_data("state")
# df_county <- map_data("county")
head(df_world)
long lat group order region subregion
1 -69.89912 12.45200 1 1 Aruba <NA>
2 -69.89571 12.42300 1 2 Aruba <NA>
3 -69.94219 12.43853 1 3 Aruba <NA>
4 -70.00415 12.50049 1 4 Aruba <NA>
5 -70.06612 12.54697 1 5 Aruba <NA>
6 -70.05088 12.59707 1 6 Aruba <NA>
# df_world %>% group_by(region) %>% count()
case_map = case_series
case_map =
case_map %>%
mutate(Country = fct_recode(Country,
"USA" = "United States",
"UK" = "United Kingdom",
"Democratic Republic of the Congo" = "Democratic Republic Of The Congo",
"Bosnia and Herzegovina" = "Bosnia And Herzegovina")) %>%
filter(Country != "Gibraltar") %>%
rename(region = Country) %>%
mutate(Cumulative_cases = as.numeric(Cumulative_cases))
case_map %>% head
Date Cases Cumulative_cases region
1 2022-07-25 1 1 Andorra
2 2022-07-26 2 3 Andorra
3 2022-07-27 0 3 Andorra
4 2022-07-28 0 3 Andorra
5 2022-07-29 0 3 Andorra
6 2022-07-30 0 3 Andorra
cases_joined = case_map %>% inner_join(df_world, by = "region")
cases_joined %>% tail
Date Cases Cumulative_cases region long
3587832 2022-08-05 0 1 Venezuela -60.80098
3587833 2022-08-05 0 1 Venezuela -60.48149
3587834 2022-08-05 0 1 Venezuela -60.40449
3587835 2022-08-05 0 1 Venezuela -60.34023
3587836 2022-08-05 0 1 Venezuela -60.16748
3587837 2022-08-05 0 1 Venezuela -60.01753
lat group order subregion
3587832 8.592139 1582 98756 <NA>
3587833 8.547265 1582 98757 <NA>
3587834 8.610254 1582 98758 <NA>
3587835 8.628759 1582 98759 <NA>
3587836 8.616992 1582 98760 <NA>
3587837 8.549316 1582 98761 <NA>
plot_cases = function(date, xlim, ylim, title){
cases_joined %>% filter(Date == date) %>%
ggplot(., aes(long, lat, group = group)) +
geom_polygon(aes(fill = Cumulative_cases), color = "white", size = 0.2) +
scale_fill_viridis() +
theme_linedraw() +
theme(legend.position = "right",
legend.direction = "vertical") +
labs(fill = "Cumulative Cases") +
ggtitle(label = title, subtitle = paste("As of ", date)) +
xlab("Longitude") +
ylab("Latitude") +
coord_map(xlim = xlim, ylim = ylim)
}
plot_cases("2022-08-05", c(-180, 180), c(-55, 90), "Global Monkeypox Cases")

The CDC also has a CSV file you can download to get the latest just in the US. I downloaded that and load it in here.
cases_usa =
read.csv("C:\\Users\\blake\\Desktop\\blog_data\\2022 U.S. Map & Case Count.csv") %>%
rename(region = State) %>%
mutate(region = tolower(region))
map_usa = map_data("state")
usa_joined = cases_usa %>% inner_join(map_usa, by = "region")
usa_joined %>% head
region Cases Case.Range long lat group order subregion
1 alabama 19 11 to 50 -87.46201 30.38968 1 1 <NA>
2 alabama 19 11 to 50 -87.48493 30.37249 1 2 <NA>
3 alabama 19 11 to 50 -87.52503 30.37249 1 3 <NA>
4 alabama 19 11 to 50 -87.53076 30.33239 1 4 <NA>
5 alabama 19 11 to 50 -87.57087 30.32665 1 5 <NA>
6 alabama 19 11 to 50 -87.58806 30.32665 1 6 <NA>
ggplot(usa_joined, aes(long, lat, group = group)) +
geom_polygon(aes(fill = Cases)) +
scale_fill_viridis() +
theme_linedraw() +
theme(legend.position = "right",
legend.direction = "vertical") +
labs(fill = "Cases") +
ggtitle(label = "USA Monkeybox Cases") +
xlab("Longitude") +
ylab("Latitude")

case_series =
case_series %>%
mutate(Date = as_date(Date),
Country = as.factor(Country))
plot_stats = function(country){
case_series %>% filter(Country == country) %>%
ggplot(., aes(x = Date)) +
geom_line(aes(y = Cumulative_cases, color = "Cumulative_cases")) +
geom_line(aes(y = Cases, color = "Cases")) +
scale_x_date(breaks = scales::breaks_pretty(10)) +
labs(color = "Category") +
ggtitle(paste(country, "Monkeypox Cases")) +
xlab("Date") +
ylab("Number of Cases") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
}
plot_stats("United States")
plot_stats("Canada")

top10 =
case_series %>%
arrange(-desc(Date)) %>%
group_by(Country) %>%
summarise(Last_Total_Cases = last(Cumulative_cases)) %>%
arrange(desc(Last_Total_Cases)) %>%
filter(row_number() <= 12)
case_series %>% filter(Country %in% top10$Country) %>%
ggplot(., aes(x = Date)) +
geom_line(aes(y = Cumulative_cases, color = "Cumulative_cases")) +
geom_line(aes(y = Cases, color = "Cases")) +
facet_wrap(~Country) +
scale_x_date(breaks = scales::breaks_pretty(10)) +
labs(color = "Category") +
ggtitle(paste("Top 12 Countries Monkeypox Cases")) +
xlab("Date") +
ylab("Number of Cases") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

https://www.r-bloggers.com/2022/07/access-and-map-the-latest-monkeypox-case-data-in-r/