## If you haven't already installed the packages we'll need, do that now
install.packages("tidyverse")
install.packages("ggeffects")
install.packages("lme4")
install.packages("raster")
install.packages("cowplot")
install.packages("ggbeeswarm")
install.packages("viridis")

## Load in your packages; we'll start with tidyverse which houses "ggplot"
## We won't be setting a working directory, since all data will be downloaded
## from online or are available through the datasets we'll load in

## tidyverse has a ton of amazing packages; loading the "tidyverse" package
## allows us to easily (and in the correct order) load in all of these packages
library(tidyverse)

############ Intro to ggplot

##Just an introduction of how you would set up your ggplot

data(iris)

##Your data goes in first! Then the items for mapping in aes (aesthetics)
##Then code the geom that you want; in this case points
##Have the color change dependent on the species in the dataset and 
##increase point size
ggplot(iris,
       aes(x = Sepal.Width,
           y = Sepal.Length))+
  geom_point(aes(color = Species),
             size = 4)

############ Mapping

## We'll need to load in the "raster" "maps" "cowplot" packages for this section
library(raster); library(maps); library(cowplot)

## We'll start by making a world map
ggplot()+
  ## borders calls polygons from databases present in the "maps" package
  borders("world")+
  theme_bw()

## You made a map! Now, let's take step further and plot counties and states
## in the US
ggplot()+
  ## county borders in grey
  borders("county",
          colour = "grey")+
  ## state borders in black
  borders("state",
          colour = "black")+
  theme_bw()

## Let's take a look at just Indiana now, with county boarders
indiana <- map_data("county", "indiana")

##Get the county names
mid_range <- function(x) mean(range(x))
seats <- do.call(rbind,
                 lapply(split(indiana,
                              indiana$subregion),
                        function(d) {
                          data.frame(lat = mid_range(d$lat),
                                     long = mid_range(d$long),
                                     subregion = unique(d$subregion))
                        }))

##Plot!
ggplot(indiana,
       aes(x = long,
           y = lat)) +
  ##Indiana only counties
  geom_polygon(aes(group = group),
               fill = "white",
               colour = "grey60") +
  ##County names
  geom_text(data = seats,
            aes(label = subregion),
            size = 3)+
  theme_bw()



## Downloading the .csv file with location information directly from NEON
## We are then going to only grab the columns (1-5, 7-11) that we will use
NEON_sites = read.csv("https://www.neonscience.org/science-design/field-sites/export")
NEON_sites = NEON_sites[,c(1:5, 7:11)]

## Let's check out the column names
colnames(NEON_sites)

## Let's make a map of the NEON sites with country borders
ggplot(NEON_sites,
       aes(x = Longitude,
           y = Latitude))+
  borders("world") + 
  geom_point()+
  theme_bw()

## Tough to see the NEON sites from this far out.
## Let's make a new map of just some countries that are near the US
ggplot(NEON_sites,
       aes(x = Longitude,
           y = Latitude))+
  borders("world",
          xlim = c(-180, -50),
          ylim = c(18,75)) + 
  geom_point()+
  theme_bw()

##We'll change the shape and color of the sites to better designate them
NEON_sites$AorT = ifelse(grepl("Aquatic", NEON_sites$Site.Type),
                         "Aquatic",
                         "Terrestrial")
NEON_sites$CorR = ifelse(grepl("Core", NEON_sites$Site.Type),
                         "Core",
                         "Relocatable")

ggplot(NEON_sites,
       aes(x = Longitude,
           y = Latitude))+
  borders("world",
          xlim = c(-180, -50),
          ylim = c(18,75)) + 
  borders("state", fill = NA)+
  geom_point(aes(shape = CorR,
                 color = AorT),
             size = 2)+
  theme_bw()

NEON_sites$CRAT = paste(NEON_sites$AorT, NEON_sites$CorR)

## Looks okay, but let's make those shapes and colors better
ggplot(NEON_sites,
       aes(x = Longitude,
           y = Latitude))+
  borders("world",
          xlim = c(-180, -50),
          ylim = c(18,75)) + 
  borders("state", fill = NA)+
  geom_point(aes(shape = CRAT,
                 fill = CRAT),
             size = 3,
             alpha = 0.7)+
  ##Change the fill of the points based on order of the levels in the factor
  scale_fill_manual(values = c("#2892D7",
                               "#2892D7",
                               "#556F44",
                               "#556F44"),
                    name = "")+
  ##Change the shape of the points based on order of the levels in the factor
  scale_shape_manual(values = c(21,23,21,23),
                     name = "")+
  ##Zoom to specific x and y limits
  coord_cartesian(ylim = c(18, 72),
                  xlim = c(-170,-60))+
  theme_bw()+
  theme(legend.position = c(.25,.3))


## A useable map! You are on your way to become a master map maker!

## Next up: shapefiles!!
## Code to remake the figure from slide 16

##Download the shapefile data from NEON
download.file("https://www.neonscience.org/sites/default/files/NEONDomains_0.zip",
              ## CHANGE THE FILE DESTINATION HERE!
              destfile="C:/Users/Michael/Documents/NEONDomains_0.zip")

## Unzip the file
## CHANGE THE FILE DESTINATION HERE!
unzip("C:/Users/Michael/Documents/NEONDomains_0.zip",
      exdir = "C:/Users/Michael/Documents")

## Load in the shapefile
## CHANGE THE FILE DESTINATION HERE!
NEON_regions = shapefile("C:/Users/Michael/Documents/NEON_Domains.shp")

## Shapefiles are S4 objects, meaning that the typical $ operator to access data
## won't work here; for S4 objects, we use @ instead!
NEON_regions@data

## fortify() converts a mapping file (shapefile) to a data frame that can 
## more easily be plotted in ggplot
NEON_reg2 = fortify(NEON_regions, region = "DomainName")

## This may take a bit, so be patient!

MAIN = ggplot(NEON_sites, aes(x = Longitude, y = Latitude))+
  borders("state") + 
  geom_polygon(data = NEON_reg2,
               aes(x = long,y = lat,group = group, fill = id),color = "white")+
  borders("state", fill = NA)+
  geom_point(shape = 21, size = 2)+
  coord_cartesian(xlim = c(-65,-135),
                  ylim = c(20, 50))+
  guides(fill = guide_legend(nrow = 7, byrow = FALSE))+
  theme_bw()+
  theme(legend.position = "bottom",
        legend.title = element_blank())
MAIN

HAWAII = ggplot() +
  geom_polygon(data = NEON_reg2,
               aes(x = long,y = lat,group = group, fill = id),color = "white")+
  geom_point(data = NEON_sites, aes(x = Longitude, y = Latitude),
             shape = 21, size = 2)+
  coord_cartesian(xlim = c(-161, -154), ylim = c(18, 23))+
  theme_bw()+
  theme(legend.position = "None",
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        axis.title = element_blank())

ALASKA = ggplot() +
  geom_polygon(data = NEON_reg2,
               aes(x = long,y = lat,group = group, fill = id),color = "white")+
  geom_point(data = NEON_sites, aes(x = Longitude, y = Latitude),
             shape = 21, size = 2)+
  coord_cartesian(xlim = c(-170, -130), ylim = c(50, 75))+
  theme_bw()+
  theme(legend.position = "None",
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        axis.title = element_blank())


PUERTORICO = ggplot() +
  geom_polygon(data = NEON_reg2,
               aes(x = long,y = lat,group = group, fill = id),color = "white")+
  geom_point(data = NEON_sites, aes(x = Longitude, y = Latitude),
             shape = 21, size = 2)+
  coord_cartesian(xlim = c(-69, -64), ylim = c(17, 19))+
  theme_bw()+
  theme(legend.position = "None",
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        axis.title = element_blank())

##Get ratios for plotting "correct" proportions of Alaska, Hawaii, Puerto Rico
(ratioHawaii  <- (23 - 18) / (-154 - (-161)))
(ratioAlaska  <- (75 - 50) / (-130 - (-170)))
(ratioPR  <- (19 - 17) / (-64 - (-69)))


ggdraw(MAIN) +
  draw_plot(ALASKA, width = 0.26, height = 0.26 * 10/6 * ratioAlaska, 
            x = 0.0725, y = 0.37) +
  draw_plot(HAWAII, width = 0.15, height = 0.15 * 10/6 * ratioHawaii, 
            x = 0.325, y = 0.37) +
  draw_plot(PUERTORICO, width = 0.15, height = 0.15 * 10/6 * ratioPR, 
            x = 0.725, y = 0.37)



#####################
####Correctly plotting mixed effects models in R

##Load in ggeffects and lme4
library(ggeffects)
library(lme4)

##Load in an easy to use dataset from lme4
data(sleepstudy)

##Let's take a look at differences among ggpredict, ggeffects, and ggemmeans

##Fit a linear model
## Response ~ Predictor(s)
m2 <- lm(Reaction ~ Days * Subject, data = sleepstudy)
summary(m2)


##Same terms, same model, different functions
##The difference lies in how they treat categorical predictors when they are not
##in the model (marginal effects of your continuous variable; holding categorical
##predictors at the baseline (ggpredict) or median value (ggeffect and ggemmeans))
out.pred = ggpredict(m2, terms = c("Days"))
out.eff = ggeffect(m2, terms = c("Days"))
out.emm = ggemmeans(m2, terms = c("Days"))

colors <- c("ggpredict" = "black",
            "ggeffect" = "red",
            "ggemmeans" = "blue")


##Plot the output from these
ggplot(out.pred,
       aes(x = x,
           y = predicted))+
  geom_ribbon(aes(ymax = conf.high,
                  ymin = conf.low),
              alpha = 0.2)+ 
  geom_line(aes(color = "ggpredict"), 
            size = 1)+
  geom_ribbon(data = out.eff,
              aes(ymax = conf.high,
                  ymin = conf.low),
              alpha = 0.2,
              fill = "red")+ 
  geom_line(data = out.eff,
            aes(color = "ggeffect"), 
            size = 1)+
  geom_ribbon(data = out.emm,
              aes(ymax = conf.high,
                  ymin = conf.low),
              alpha = 0.2,
              fill = "blue")+ 
  geom_line(data = out.emm,
            aes(color = "ggemmeans"), 
            size = 1)+
  scale_color_manual(values = colors)+
  ylab("Reaction")+
  xlab("Subject")+
  theme_classic()+
  theme(legend.position = c(0.25, 0.75))

##Note how the ggeffect and ggemmeans lines are the same!
##Both are different from ggpredict though
##This is because ggeffect and ggemmeans are holding the categorical predictor
##at a constant level that is proportional to the nnumber of predictors
##ggpredict, however, holds the categorical predictor at the reference level


##Now let's take a look at mixed effects models!

##Fit a mixed effects model of Reaction time by days with random slope of
##days by subject (Days | Subject)
m1 <- lmer(Reaction ~ Days + (Days | Subject),
           data = sleepstudy)

##Take a look at the intercept and slope coefficients of the fixed (main effect)
##portion of the model; intercept is 251 seconds; and reaction time increases
##by 10.46 seconds per day
fixef(m1)


##Lets plot the main effects with the correct error estimate!
##Terms specifies the terms that are present in the fixed portion of your model
##Type specifies whether it is fixed (main) or random (mixed effects) portions
fixeff.m1 = ggeffect(m1,
                     terms = c("Days"),
                     type = "fe")

ggplot(sleepstudy,
       aes(x = Days,
           y = Reaction))+
  geom_point(shape = 21,
             size = 3)+
  geom_ribbon(data = fixeff.m1,
              aes(x = x, 
                  y = predicted,
                  ymin = conf.low,
                  ymax = conf.high), 
              alpha = 0.2)+
  geom_line(data = fixeff.m1, 
            aes(x = x,
                y = predicted), 
            color = "black", 
            size = 1)+
  theme_classic()


##Since we have a random slopes model, let's take a look at how the reaciton of
##individual subjects changes through time
##NOTE this time we are using ggpredict! Not ggeffect or ggemmeans; this is 
##because ggpredict PREDICTS the values based on the information we give it
##Here we want to know the predicted values for the subjects across days
##Terms again, we are looking at both the slope (days) and intercept (subject)
##of the model
##Type we use "re" instead to specify that we are looking at the random portions
raneff.m1 = ggpredict(m1, terms = c("Days", "Subject"), type = "re")


ggplot(raneff.m1,
       aes(x = x,
           y = predicted))+
  geom_line(aes(group = group), 
            alpha = 0.75, 
            color = "grey")+
  geom_ribbon(data = fixeff.m1,
              aes(x = x,
                  ymin = conf.low,
                  ymax = conf.high),
              alpha = 0.2)+
  geom_line(data = fixeff.m1,
            aes(x = x,
                y = predicted),
            color = "black", 
            size = 1)+
  ylab("Reaction")+
  xlab("Days")+
  theme_classic()





##If you want to show variation across your random terms (slope or intercept),
##but don't want to have multiple lines on the same figure, you can use a 
##Random forest plot (also know as a dotplot)

##This will create a random forest plot for intercepts only!

##Create a data frame
##replace "m1" with model name
##replace "Subject" with random term is
##replace "sleepstudy" with your dataframe
dat2 = data.frame(Intercepts = ranef(m1, condVar = TRUE)$Subject[ , 1],
                  sd.interc = 1.96*sqrt(attr(ranef(m1, condVar = TRUE)[[1]],
                                             "postVar")[1, 1, 1]),
                  lev.names = unique(sleepstudy$Subject))

##This gets 95% CI points
dat2$conf.low = dat2$Intercepts - dat2$sd.interc
dat2$conf.high = dat2$Intercepts + dat2$sd.interc

##Color points based on whether it is above or below the population level estimate
dat2$direction = ifelse(dat2$Intercepts > 0, "pos", "neg")

##Reorder subject factor levels based on the values of the Intercepts data
dat2$lev.names = fct_reorder(dat2$lev.names,
                             dat2$Intercepts)

ggplot(dat2, aes(x = as.factor(lev.names),
                 y = Intercepts))+
  geom_hline(yintercept = 0)+
  geom_errorbar(aes(ymin = conf.low,
                    ymax = conf.high),
                width = 0)+
  geom_point(aes(color = direction), size = 3)+
  scale_color_manual(values = c("red", "blue"))+
  ylab("Intercept Estimate")+
  xlab("Subject")+
  coord_flip()+
  theme_classic()+
  theme(legend.position = "none")


##Here is code to look at the random slopes

##Create a data frame
##replace "m1" with model name
##replace "Subject" with random term is
##replace "sleepstudy" with your dataframe
dat3 = data.frame(Slopes = ranef(m1,
                                 condVar = TRUE)$Subject[ , 2] + fixef(m1)[2],
                  sd.slopesc = 1.96 * sqrt(attr(ranef(m1,
                                                      condVar = TRUE)[[1]],
                                                "postVar")[2, 2, 1]),
                  lev.names=as.factor(unique(sleepstudy$Subject)))

##This gets 95% CI points
dat3$conf.low = dat3$Slopes - dat3$sd.slopesc
dat3$conf.high = dat3$Slopes + dat3$sd.slopesc

##This code creates a factor that we will use to color our points
##If the confidence bands overlap 0, that subject will be coded as "none" for
##no change; if the confidence bands are below 0, that subject will be coded as
##"decrease"; if the confidence bands are above 0, that subject will be coded as
##"increase".
##"None" = slope for that level is not different from zero
##"Decrease" = slope is negative and response decreases as predictor increases
##"Increase" = slope is positive and response increases as predictor increases
dat3$direction = ifelse(dat3$conf.low < 0 & 0 < dat3$conf.high,
                        "none",
                        ifelse(dat3$conf.low < 0 & 0 > dat3$conf.high,
                               "decrease",
                               "increase"))

##Reorder subject factor levels based on the values of the slopes
dat3$lev.names = fct_reorder(dat3$lev.names,
                             dat3$Slopes)

ggplot(dat3, aes(x = lev.names, y = Slopes))+
  geom_errorbar(aes(ymin = conf.low,
                    ymax = conf.high),
                width = 0)+
  geom_point(aes(fill = direction), size = 3, shape = 21)+
  ##A line at zero
  geom_hline(yintercept = 0)+
  ##Another line at the population mean slope (fixed effect)
  geom_hline(yintercept = fixef(m1)[2], linetype = "dashed")+
  scale_fill_manual(values = c("red", "grey43"))+
  ylab("Slope Estimate")+
  xlab("Subject")+
  coord_flip()+
  theme_classic()+
  theme(legend.position = "none")





################################################################################
##Plot customization

library(viridis)

##Let's load in a new dataset
data(iris)
colnames(iris)

##Custom y axis, color gradient based on x axis
ggplot(iris,
       aes(x = Petal.Length,
           y = Petal.Width))+
  geom_point(aes(color = Petal.Length),
             size = 2)+
  ##Log transformed y axis
  ##Breaks at 0.1, 0.25, 0.5, 1, 2.5
  scale_y_continuous(trans = "log",
                     breaks = c(0.1, 0.25, 0.5, 1, 2.5),
                     labels = c("0.1 mm",
                                "0.25 mm",
                                "0.5 mm",
                                "1 mm",
                                "2.5 mm"))+
  ##Viridis color gradient
  scale_color_viridis()+
  ylab("Petal Width (mm)")+
  xlab("Petal Length (mm)")+
  theme_classic()



##Discrete colors, boxplots, and correctly jittered points

library(ggbeeswarm)

ggplot(iris,
       aes(x = Species,
           y = Petal.Width))+
  ##Add a boxplot! Remove outliers, since we will be plotting data with it
  geom_boxplot(outlier.shape = NA)+
  ##There is a lot of overlap in our data, so instead of using geom_point or
  ##geom_jitter, we will use geom_beeswarm to give a nice jittering of our points
  geom_beeswarm(aes(fill = Species),
                shape = 21,
                size = 4,
                alpha = 0.4)+
  ##Let's fill in our points with some nice colors
  scale_fill_manual(values = c("#817E9F",
                               "#7FC29B",
                               "#CBFF4D"))+
  ylab("Petal Width (mm)")+
  xlab("Species")+
  theme_classic()+
  ##Don't need a legend here
  theme(legend.position = "none")


##Faceting plot

ggplot(iris,
       aes(x = Petal.Length,
           y = Petal.Width))+
  ##Facet by species; label with BOTH variable and levels
  facet_wrap( ~ Species, labeller = "label_both")+
  geom_point(aes(color = Species),
             size = 4,
             alpha = 0.75)+
  scale_color_manual(values = c("#75DBCD",
                                "#805E73",
                                "#FAA381"))+
  theme(
    ##Change the size and color of the facet labels
    strip.text = element_text(size = 16),
    strip.background = element_rect(colour="black", fill="white"),
    ##No color background
    panel.background = element_rect(fill=NA),
    ##Get rid of background grid with element_blank()
    panel.grid = element_blank(),
    ##Let's use grids only on our y axis at the hashmarks
    panel.grid.major.y = element_line(color = "light grey"),
    ##No panel border
    panel.border = element_rect(color = "black", fill = NA),
    ##Axis line is just the y and x axis, not around the whole thing
    axis.line = element_line(color="black"),
    ##Legend position
    legend.position = "none",
    ##Axis text size and color
    axis.text = element_text(size=16,color="black"),
    ##Axis title size and color
    axis.title = element_text(size=18,color="black")
  )




##Custom theme, color, axis title on two lines, SAVING FIGURES

irisplot = ggplot(iris,
                  aes(x = Petal.Length,
                      y = Petal.Width))+
  geom_point(aes(color = Species),
             size = 4,
             alpha = 0.75)+
  scale_color_manual(values = c("#817E9F",
                                "#7FC29B",
                                "#CBFF4D"))+
  ##Using "\n" within your titles tells ggplot to send the next set of text on 
  ##another line
  ylab("Petal\nWidth (mm)")+
  xlab("Petal Length (mm)")+
  theme(
    ##No color background
    panel.background = element_rect(fill=NA),
    ##Get rid of background grid with element_blank()
    panel.grid = element_blank(),
    ##Let's use grids only on our y axis at the hashmarks
    panel.grid.major.y = element_line(color = "light grey"),
    ##No panel border
    panel.border = element_blank(),
    ##Axis line is just the y and x axis, not around the whole thing
    axis.line = element_line(color="black"),
    ##Legend position
    legend.position = c(0.8, 0.2),
    ##Legend title size and color
    legend.title = element_text(size=18,color="black"),
    ##Legend text size and color
    legend.text = element_text(size=16,color="black"),
    ##Get rid of the weird grey box around the legend key
    legend.key = element_rect(fill=NA,color=NA),
    ##Add a box around the legend
    legend.box.background = element_rect(color = "black"),
    legend.box.margin = margin(1, 1, 1, 1),
    ##Axis text size and color
    axis.text = element_text(size=16,color="black"),
    ##Axis title size and color
    axis.title = element_text(size=18,color="black")
  )
irisplot

ggsave("C:/Users/Michael/Documents/irisplot.tiff",
       plot = irisplot,
       dpi = 600)