Load library & data set

library(ggplot2)

YemenData = read.csv('Datasets/YemenCholeraOutbreak.csv')
YemenData$Date = as.Date(YemenData$Date)#, format='%m/%d/%y')
YemenData = YemenData[order(YemenData$Date),]
YemenData$Times = as.numeric(YemenData$Date-as.Date('5/22/2017', format='%m/%d/%Y'))

Basic Plots

Notes

  • ggplot takes dataframes as the basic object, not an x vector and y vector

  • Once you’ve loaded the dataset, you can tell ggplot which variables to use for the x values, y values, color, etc. But note that ggplot won’t actually plot them until you tell it to draw soemthing!

  • geom’s are different types of plot objects that you can add (draw) to the plot. You can set up points, lines, and other kinds of objects.

  • aes = aesthetics, this lets you tell ggplot what information to plot and how. You can set aes either in the main ggplot call or within a geom


Simple Example

Let’s do a simple example:

# select the data  say which variables to use     draw as points
ggplot(YemenData,  aes(x=Date, y=Deaths))        + geom_point()


You can also add some automatic processing, like a Loess-smoothed line:

ggplot(YemenData, aes(x=Date, y=Deaths)) + geom_point() + geom_smooth(method = 'loess')


Slightly fancier plots

Let’s add some more variables and specify the colors! We can also make a variable to hold the plot (in this case choleraplot) so we can add things later on. If we make choleraplot a variable, then we’ll need to use print(choleraplot) to display the plot at the end.

choleraplot = ggplot(YemenData) + 
  geom_point(aes(x=Date, y=Deaths), color = 'steelblue') +
  geom_smooth(aes(x=Date, y=Deaths), method = 'loess') +
  geom_point(aes(x=Date, y=Cases), color = 'grey') +
  geom_smooth(aes(x=Date, y=Cases), color = 'black', method = 'loess')
print(choleraplot)


Looks nice, but the axis labels aren’t quite right, and we let’s add a title. We’ll do that with the labs function:

choleraplot = choleraplot + labs(title="Yemen Cholera Epidemic", x="Date", y="Number of Individuals")
print(choleraplot)


Figure Legends

Now, we need a figure legend! Can make it appear automatically by defining the color in aes.

choleraplot = ggplot(YemenData) + 
  geom_point(aes(x=Date, y=Deaths, color = 'Deaths')) +
  #geom_smooth(aes(x=Date, y=Deaths), color = 'Deaths', method = 'loess') +
  geom_point(aes(x=Date, y=Cases, color = 'Cases')) +
  #geom_smooth(aes(x=Date, y=Cases, color = 'Cases'), method = 'loess') + 
  labs(title="Yemen Cholera Epidemic", subtitle = "Data from Humanitarian Data Exchange", x="Date", y="Number of Individuals")
print(choleraplot)


But now, the legend title is “colour”. To remove the legend title or change it to something else, we can use either the guides or the overall figure theme:

choleraplot = choleraplot + guides(color = guide_legend(''))
  # theme(legend.title=element_blank())
  # guides(color = guide_legend('Legend Title!'))
print(choleraplot)


More variations

Change the color scheme:

choleraplot = choleraplot + scale_colour_brewer(palette = "Set1")
print(choleraplot)

# choleraplot = choleraplot + scale_colour_brewer(palette = "Set2")
# print(choleraplot)


Change the legend location:

choleraplot + theme(legend.position="bottom")

Changing the dot size

Make the size of the dots = the number of deaths at that time:

choleraplot = ggplot(YemenData) + 
  geom_point(aes(x=Date, y=Cases, color = 'Cases', size = Deaths)) +
  geom_smooth(aes(x=Date, y=Cases), color = 'red') + 
  labs(title="Yemen Cholera Epidemic", subtitle = "Data from Humanitarian Data Exchange", x="Date", y="Number of Individuals") +
  guides(color = guide_legend(''))
print(choleraplot)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Themes

There are pre-built themes that set a lot of the plot aesthetics, and more available with the ggthemes package (see here).

choleraplot+theme_minimal()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

choleraplot+theme_classic()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

library(ggthemes)
choleraplot+theme_tufte()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

choleraplot+theme_fivethirtyeight()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Panel Plots

We can plot multiple plots together using grid_arrange:

library(gridExtra)

yemencases = ggplot(YemenData) +
  geom_point(aes(x=Date, y=Cases), color = 'maroon') +
  geom_smooth(aes(x=Date, y=Cases), color = 'red')
yemendeaths = ggplot(YemenData) +
  geom_point(aes(x=Date, y=Deaths), color = 'steelblue') +
  geom_smooth(aes(x=Date, y=Deaths), color = 'blue')

grid.arrange(yemencases, yemendeaths, ncol=2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'


Or, if the panels is indicated in one of the columns, you can use facet_wrap:

facetplot = ggplot(YemenData) + 
  geom_point(aes(x=Times, y=Deaths, color = Bulletin.Type)) +
  facet_wrap(~ Bulletin.Type, ncol=2) +
  scale_colour_brewer(palette = "Set1")
  # labs(title="Yemen Cholera Epidemic", subtitle = "Data from Humanitarian Data Exchange", x="Time", y="Number of Individuals") + 
  # theme(legend.title=element_blank())
  # guides(color = guide_legend(''))
print(facetplot)

facet_grid is similar but clears off some of the headers when you have more than one variable that you’re looking at over the panels (e.g. age categories as rows, gender as columns).


Saving

Lastly, to save your plot as a file, you can use ggsave:

# Saving
ggsave("plotexample.png")  # saves the last plot
## Saving 5 x 4 in image
ggsave("plotexample.pdf", plot=choleraplot)  # save a specific plot
## Saving 5 x 4 in image
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'