FIA_range_shift_analysis_subset2022.Rmd

---
title: "FIA Analysis Take 3"
author: "Katie Nigro"
date: "`r Sys.Date()`"
output: html_document
editor_options: 
  chunk_output_type: console
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Set-up

These are the packages I need:

```{r,message=FALSE,warning=FALSE}
library(readr)
library(tidyr)
library(dplyr)
library(plyr)
library(purrr)
library(DT)
library(ggplot2)
library(ggfortify)
library(ggpubr)
library(leaflet)
library(mapdata)
library(car)
library(ggmap)
library(fitdistrplus)
library(logspline)
library(SIBER)
library(emmeans)
library(MASS)
#library(rjags)
library(MuMIn)
library(stringr)
```

Let's first read in the data.

```{r,warning=FALSE,message=FALSE}
##read in FIA tree & regen data 
tree<- read_csv("TREE.csv")
seedling<- read_csv("SEEDLING.csv")
plot<- read_csv("annual_plots2020.csv")
```

```{r}
#subset plots to those with NF_SAMPLING_STATUS_CD = 1 that were inventoried
cond<- read_csv("compiled_data_annual2020/COND.csv")
##read in environmental vars
env_vars_OG <- read_csv("env_vars_FIA_2022.csv") %>% 
  dplyr::select(-1)

plot_subset <- plot %>% 
  filter(CN %in% env_vars_OG$PLT_CN) %>% 
  left_join(cond, by=c("CN"="PLT_CN")) %>% 
  filter(!(COND_STATUS_CD == 2 & NF_SAMPLING_STATUS_CD ==0)) %>% 
  filter(!(COND_STATUS_CD == 2 & NF_PLOT_STATUS_CD == 3))

nrow(plot_subset)
length(unique(plot_subset$CN))

plot_subset %>% 
  group_by(CN) %>% 
  dplyr::summarise(n_conds = n_distinct(COND_STATUS_CD)) %>% 
  group_by(n_conds) %>% 
  dplyr::summarise(n_plots=n()) #all plots only have one condition status (Either forest or nonforest)

```

```{r}
#filter env_vars to just plots in new subset
env_vars <- env_vars_OG %>% 
  filter(PLT_CN %in% plot_subset$CN)
```

Now I will summarize the number of adults and seedlings of each species in each plot.

```{r,echo=FALSE,warning=FALSE,message=FALSE, results='hide'}
adults <- tree %>% 
  group_by(SPCD,PLT_CN) %>% 
  dplyr::summarise(n = n())

head(adults)

seeds <- seedling %>% 
  group_by(SPCD, PLT_CN) %>% 
  dplyr::summarise(n = n()) 


head(seeds)
```

```{r,warning=FALSE, message=FALSE, echo=FALSE}
##read in plots by disturbance type
fire<- read_csv("fire.plots2022.csv")
ID<- read_csv("insect.disease.plots2022.csv")
harvest<- read_csv("harvest.plots2022.csv")
other<- read_csv("other.plots2022.csv")
wind<- read_csv("wind.plots2022.csv")
undis<- read_csv("undisturbed.plots2022.csv")

dist <- bind_rows(fire,ID,harvest,other,wind,undis)%>% 
  filter(PLT_CN %in% plot_subset$CN)
```

```{r,echo=FALSE,eval=FALSE}
survey_data<- dist %>% 
  left_join(plot, by=c("PLT_CN" = "CN"))

survey_data %>% 
  group_by(DESIGNCD) %>% 
  dplyr::summarise(n=n()) #all plots are design code = 1 = standard plot design 

##disturbance types sample size
dist %>% 
  group_by(Agent) %>% 
  dplyr::summarise(n=n())
```

Join disturbance data with adult and seedling dataframes. There are NA's in the data because the adults and seeds dataframes have all plots, where as the disturbance dataframe only has plots that met my criteria. So this is ok.

```{r,echo=FALSE}
dist_adults_joined <- left_join(adults, dist, by="PLT_CN")
head(dist_adults_joined)
summary(is.na(dist_adults_joined)) #there are NAs for the plots that are not in my analysis, this is ok 

##this table shows the number of seedling plots surveyed each year. We will just use those surveyed from 2009-2018
left_join(seeds, dist, by="PLT_CN") %>% group_by(MEASYEAR) %>% tally() %>% datatable()

dist_seeds_joined <- left_join(seeds, dist, by="PLT_CN") %>% 
  filter(MEASYEAR > 2008)
head(dist_seeds_joined)
summary(is.na(dist_seeds_joined)) #there are NAs for the plots that are not in my analysis, this is ok 

```

##Selecting species for analysis 

I will use both the most recent surveys and the previous survey (if applicable) to tally adult presence at a plot. If the adult of a species was present in at least one of the surveys (either alive or dead), that plot will be counted as having adults of that species present. 
```{r, echo=FALSE, results='hide'}
### count plots as containing adults if either the most current survey OR the previous survey documented the species ###

#make data frame linking most recent plot CN to previous plot CN
recent.plots <- dist %>% 
                  dplyr::select("PLT_CN")

plot_progression <- left_join(recent.plots, plot, by=c("PLT_CN"="CN")) %>%
  dplyr::select(c("PLT_CN","PREV_PLT_CN"))

#make datatable with species recorded in most recent survey (SPCD) and in the previous survey (SPCD.prev)
prev_plot_spp <- plot_progression %>% 
  filter(!is.na(PREV_PLT_CN)) %>% 
  dplyr::select("PREV_PLT_CN") %>% 
  left_join(adults, by=c("PREV_PLT_CN" = "PLT_CN")) %>% 
  left_join(plot_progression, by="PREV_PLT_CN") %>% 
  dplyr::rename(SPCD.prev = SPCD, n.prev = n) %>% 
  right_join(dist_adults_joined, by="PLT_CN") %>% 
  filter(!is.na(Agent))

length(unique(prev_plot_spp$PREV_PLT_CN))
length(unique(prev_plot_spp$PLT_CN))

#looking at differences in species codes between recent and previous surveys
species.sum <- prev_plot_spp %>% 
  group_by(PLT_CN, Agent) %>% 
  dplyr::summarise(spp = toString(unique(SPCD)),spp.prev = toString(unique(SPCD.prev)))
#look at just plots where species differ between recent and previous surveys
species.sum %>% 
  filter(!spp == spp.prev) %>% 
  filter(!spp.prev=="NA")

## melt dataframe so that all species are listed under "SPCD", whether identified in most recent survey or previous survey. Clean up the dataframe by removing unneccesary columns. Then remove duplicated rows that were created when recent survey was fully joined with previous survey 
species.sum.long<- prev_plot_spp %>% 
  pivot_longer(c(SPCD.prev, SPCD), names_to="period", values_to="SPCD") %>% 
  dplyr::select(-one_of(c("n.prev","n","period"))) %>%
  distinct()
```

The histogram below displays the year that plots in the analysis were measured. Some plots were measured twice, and we used both surveys to detect adult presence. These plots (~40%) have both the recent survey year (all between 2010-2018) and the previous survey year (all between 2000-2013) displayed in the histogram. Some plots (~60%) were only surveyed once, and their year of measurement is shown in pink (surveyed between 1995-2018). The second histogram displays the difference in years between the first and second survey for all plots that were surveyed twice.  

```{r, echo=FALSE}
##look at when recent vs. previous surveys were done
timeline <- left_join(recent.plots, plot, by=c("PLT_CN"="CN")) %>% 
                      dplyr::select(c("PLT_CN","PREV_PLT_CN","MEASYEAR")) %>% 
                        left_join(plot, by=c("PREV_PLT_CN"="CN")) %>% 
                          dplyr::select(c("PLT_CN","PREV_PLT_CN","MEASYEAR.x","MEASYEAR.y")) %>% 
                            dplyr::rename(MEASYEAR.rec = MEASYEAR.x, MEASYEAR.prev = MEASYEAR.y) %>% 
                              dplyr::mutate(year.diff = MEASYEAR.rec - MEASYEAR.prev) 
par(mfrow=c(1,1))
#####histogram of 3 plot time periods
hist(timeline[is.na(timeline$PREV_PLT_CN),]$MEASYEAR.rec,col=rgb(1,0,0,0.5),breaks=24, main="Histogram of plot measurement year",xlab="Measurement Year") #plots that didn't have a previous survey were surveyed from 1995 - 2018
hist(timeline[!is.na(timeline$PREV_PLT_CN),]$MEASYEAR.rec,col=rgb(0,0,1,0.3),breaks=9,add=TRUE) #all plots that had a previous survey had their most recent survey between 2010 - 2018
hist(timeline$MEASYEAR.prev, col=rgb(1,1,0,0.2),breaks=14, add=TRUE) #previous surveys were all measured between 2000 - 2013
legend("topright",legend=c("MEASYEAR of plots with only 1 survey","recent MEASYEAR of plots with 2 surveys", "previous MEASYEAR of plots with 2 surveys"),fill=c(rgb(1,0,0,0.5),rgb(0,0,1,0.3),rgb(1,1,0,0.2)))
######

##number of plots in each category
no.prev.surv.plots <- length(timeline[is.na(timeline$PREV_PLT_CN),]$MEASYEAR.rec) 
no.prev.surv.plots#plots that didn't have a previous survey 
prev.surv.plots<- length(timeline[!is.na(timeline$PREV_PLT_CN),]$MEASYEAR.rec) 
prev.surv.plots#plots that had a previous survey 
no.prev.surv.plots/(prev.surv.plots+no.prev.surv.plots) #57% of plots in analysis only had one survey

##difference in years between first and second survey
par(mfrow=c(1,1))
hist(timeline$year.diff, main="Histogram of number of years between surveys") #most plots have 10 years between surveys
```

Since some the plots have a most recent survey as early as 1995- 2006, we want to exclude these plots from our seedling sample, as seedlings present in 1995 are likely to be adult trees now and do not really capture the newer cohort of trees that we would expect to have been influenced by recent climate warming. Therefore, we will only look at plots most recently surveyed in or after 2007, which will include the later chunk of plots with only one survey and all the most recent visits of plots with two surveys. Below are tables of sample sizes for each species and disturbance type. The first shows adult sample sizes and the second shows seedling sample sizes.

```{r, echo=FALSE,warning=FALSE,message=FALSE}
#calculate sample size for adults and seedlings
adult_sample_cmbsurv <- species.sum.long %>%
  filter(!is.na(SPCD)) %>% 
  group_by(SPCD, Agent) %>% 
  dplyr::summarise(n_plots = n_distinct(PLT_CN))
datatable(adult_sample_cmbsurv)

sample_seeds <- dist_seeds_joined %>% 
  filter(!is.na(Agent)) %>%
  filter(MEASYEAR > 2008) %>% #only look at seedling data from plots surveyed in the later chunk of surveys
  group_by(SPCD, Agent) %>% 
  dplyr::summarise(n_plots = n_distinct(PLT_CN))
datatable(sample_seeds, caption = "Seedling Sample Size")
```

Let's set the minimum number of plots at 60 Since there are always less seedling plots than adult plots, we will look at the seedlings first. We will also only look at fire, insect/disease, and undisturbed categories since the other disturbance categories have very low sample sizes. 

```{r, echo=FALSE}
#seedling sample 
sample_seeds_over60 <- sample_seeds %>% 
  filter(n_plots >= 60) %>% 
  filter(!Agent %in% c("harvest","other",'wind')) %>% 
  group_by(SPCD) %>% 
  dplyr::summarise(dist_over60 = n_distinct(Agent), disturbance = list(Agent)) %>% 
  filter(dist_over60 > 1) 

#make a dataframe to translate species codes to names
species.names <- data.frame(species.code = c(15,17,19,66,73,93,101,106,108,113,122,133,202,242,746,814), species.name = c("white fir","grand fir","subalpine fir","Rocky Mountain juniper","western larch","Engelmann spruce","whitebark pine","two needle pinyon","lodgepole pine","limber pine","ponderosa pine","singleleaf pinyon","Douglas-fir","western redcedar","trembling aspen","Gambel oak"))
species.names<- species.names %>% 
  mutate(species.code = as.numeric(species.code))

sample_seeds_over60 %>% 
  left_join(species.names, by=c("SPCD"="species.code")) %>% 
  datatable() #picked up western larch and rocky mountain juniper in 2/3 disturbances (insect.disease and none for juniper and fire and none for western larch.)
```

There are 16 species in the FIA database that have 60 or more plots in at least 2 of the 3 disturbance types with seedlings present. 

This is a sample size table for each species, age and disturbance

```{r,echo=FALSE}
sample_size <- adult_sample_cmbsurv %>% 
  filter(SPCD %in% species.names$species.code) %>% 
  filter(!Agent %in% c('harvest','other','wind')) %>% 
  dplyr::rename(n.adult = n_plots) %>% 
  left_join(sample_seeds) %>% 
  dplyr::rename(n.seed = n_plots) %>% 
  left_join(species.names, by=c("SPCD"="species.code"))

sample_size_wide<- sample_size %>% 
  filter(Agent == "fire") %>% 
  dplyr::rename(n.adult.fire = n.adult, n.seed.fire = n.seed) %>% 
  dplyr::select(-Agent) %>% 
  left_join(sample_size %>% 
  filter(Agent == "insect.disease") %>% 
  dplyr::rename(n.adult.ID = n.adult, n.seed.ID = n.seed) %>% 
  dplyr::select(-Agent),
    by="species.name") %>% 
  left_join(sample_size %>% 
  filter(Agent == "none") %>% 
  dplyr::rename(n.adult.none = n.adult, n.seed.none = n.seed) %>% 
  dplyr::select(-Agent),
    by="species.name") %>% 
  dplyr::select(species.name, n.seed.fire, n.adult.fire, n.seed.ID, n.adult.ID, n.seed.none, n.adult.none)

#write.csv(sample_size_wide, "sample_size_wide.csv")

insectonlyspp <- sample_size %>% 
  filter(n.seed < 60 & Agent == "fire") %>% 
  pull(species.name)
fireonlyspp <- sample_size %>% 
  filter(n.seed < 60 & Agent == "insect.disease") %>% 
  pull(species.name)
```

And the below plots show sample size for each species within each disturbance type graphically. 

```{r}
adult.sample <- ggplot(sample_size,aes(x=Agent, y=n.adult, fill=factor(species.name)))+
  geom_bar(stat='identity',position='dodge2')+
  ggtitle("Number of plots with Adults")+
  xlab("")+ylab("# plots")+
    ylim(c(0,4000))+
  scale_fill_discrete(name = "Species")

seedling.sample <- ggplot(sample_size,aes(x=Agent, y=n.seed, fill=factor(species.name)))+
  geom_bar(stat='identity',position='dodge2')+
  ggtitle("Number of plots with Seedlings")+
  xlab("")+ylab("# plots")+
  ylim(c(0,4000))+
  scale_fill_discrete(name = "Species")

ggarrange(adult.sample,seedling.sample, common.legend = TRUE)
```

Looking at the seedling and adult sample sizes, we will exclude white fir, grand fir, limber pine, two needle pinyon,  and western redcedar in fire plots from the analysis, because these groups have very low numbers of plots with seedlings. We also exclude singleleaf pinyon from the analysis because the plots in this analysis do not seem to capture the full range of this species (it's niche is weirdly cut off in climate space).  

```{r}
#look at sample size of seedling only plots

seedlingonly.sample = data.frame()

for(i in 1:nrow(species.names)){
n.seedonly <- dist_seeds_joined %>% 
  filter(SPCD == species.names$species.code[i]) %>% 
  filter(! PLT_CN %in% c(species.sum.long %>% filter(SPCD == species.names$species.code[i]) %>% pull(PLT_CN))) %>%
  dplyr::group_by(Agent,SPCD) %>% 
  dplyr::summarise(n.seedonly=n())
seedlingonly.sample = rbind(seedlingonly.sample, n.seedonly)
}

View(sample_size %>% 
  left_join(seedlingonly.sample))

```

##Measurement years distribution

The histograms below show the distribution of recent (blue) and previous (green) measurement years for the plots where adults and seedlings of each species were found.  
```{r, echo=FALSE}
# spp_hist_data <- species.sum.long %>% 
#   left_join(plot, by=c("PREV_PLT_CN" = "CN")) %>% 
#   dplyr::select(c(PREV_PLT_CN:SPCD,MEASYEAR.y)) %>% 
#   dplyr::rename("Prev.MEASYEAR" = "MEASYEAR.y","MEASYEAR"="MEASYEAR.x") 
# par(mfrow=c(5,3))
# for(i in 1:length(species.names$species.code)){
# hist(spp_hist_data[spp_hist_data$SPCD==species.names$species.code[i],]$MEASYEAR, col=rgb(0,0,1,0.5), breaks=(max(spp_hist_data[which(spp_hist_data$SPCD==species.names$species.code[i] & !is.na(spp_hist_data$MEASYEAR)),]$MEASYEAR) - min(spp_hist_data[which(spp_hist_data$SPCD==species.names$species.code[i] & !is.na(spp_hist_data$MEASYEAR)),]$MEASYEAR))+1, main=paste("Histogram for",species.names$species.name[i],"adults",sep=" "),xlab="Measurement Year")
# hist(spp_hist_data[spp_hist_data$SPCD==species.names$species.code[i],]$Prev.MEASYEAR, col=rgb(0,1,0,0.3), breaks=(max(spp_hist_data[which(spp_hist_data$SPCD==species.names$species.code[i] & !is.na(spp_hist_data$Prev.MEASYEAR)),]$Prev.MEASYEAR) - min(spp_hist_data[which(spp_hist_data$SPCD==species.names$species.code[i] & !is.na(spp_hist_data$Prev.MEASYEAR)),]$Prev.MEASYEAR))+1, add=TRUE)
# }

```

```{r, echo=FALSE}
#seedling histograms
# par(mfrow=c(5,3))
# for(i in 1:length(species.names$species.code)){
# hist(dist_seeds_joined[dist_seeds_joined$SPCD == species.names$species.code[i],]$MEASYEAR, col=rgb(0,0,1,0.5), breaks=(max(dist_seeds_joined[which(dist_seeds_joined$SPCD==species.names$species.code[i] & !is.na(dist_seeds_joined$MEASYEAR)),]$MEASYEAR) - min(dist_seeds_joined[which(dist_seeds_joined$SPCD==species.names$species.code[i] & !is.na(dist_seeds_joined$MEASYEAR)),]$MEASYEAR))+1,  main=paste("Histogram for",species.names$species.name[i],"seedlings",sep=" "),xlab="Measurement Year")
# }
```

```{r, echo=FALSE}
##create function that appends climate data to each species/lifestage/disturbance

#' merge_sp_dist_env
#'
#' @param joined_df dataframe with disturbance and species data joined. Either species.sum.long or dist_seeds_joined
#' @param sp_code code for desired species. Can be found in FIA user guide. 
#' @param agent_name name of the desired disturbance. Either fire, insect.disease, harvest, other or none. 
#' @param env_vars environmental variables dataframe. Called env_vars here. 
#'
#' @return dataframe with environmental variables associated with each plot for a desired species/lifestage/disturbance combination 
#' @export
#'
#' @examples
merge_sp_dist_env <- function(joined_df, sp_code, agent_name, env_vars) {  sp_dist_env <- joined_df %>% 
    filter(SPCD == sp_code &
             Agent == agent_name) %>% 
    dplyr::select(SPCD, PLT_CN, Agent, dist_year, source) %>% 
    left_join(env_vars, by = "PLT_CN")
  
  return(sp_dist_env)
}

   
#FIA codes for all desired species (n=14) and disturbances (n=3)
sp_codes <- as.character(unique(sample_seeds_over60$SPCD))
dist_names <- c("fire","insect.disease","none")

#put environmental data for each species/disturbance combination in a dataframe within a list for adults
adult_data <- list()
for(i in 1:length(sp_codes)){
  for(j in 1:length(dist_names)){
    df <- merge_sp_dist_env(species.sum.long, sp_codes[i], dist_names[j], env_vars)
    adult_data[[paste(sp_codes[i],"_",dist_names[j],"_cvars",sep="")]]<- df
  }
}
#list for seedlings 
seedling_data <- list()
for(i in 1:length(sp_codes)){
  for(j in 1:length(dist_names)){
    df <- merge_sp_dist_env(dist_seeds_joined, sp_codes[i], dist_names[j], env_vars)
    seedling_data[[paste(sp_codes[i],"_",dist_names[j],"_cvars",sep="")]]<- df
  }
}
```

```{r}
##writing out adult and seedling data with environmental vars
#write.csv(adult_data[[1]],paste(names(adult_data)[1],".csv",sep=""))

```


#Analysis

##Principal Components Analysis

I will first run a principal components analysis (PCA) on a suite of climate variables for all plots included in the analysis (n = 59,453). Climate variables were collected from PRISM and [ClimateWNA] (<https://sites.ualberta.ca/~ahamann/data/climatewna.html>). First I will evaluate which climate variables are highty correlated (\>0.9) and only keep one of each highly correlated pair.

```{r, echo = FALSE,eval=FALSE}
#first look at correlations in climate variables
colnames(env_vars)
```

```{r, echo=FALSE}
env_vars %>% 
  dplyr::select(-PLT_CN,-X,-RSCD,-INVYR,-ANN_INV,-CYCLE,-td) %>% 
  cor() %>% 
  abs()>0.9
##CMD is only super correlated to temperature variables and not precipitation variables, which makes me think it is overall more driven by temperature than precipitation

env_vars_cut <- env_vars %>% 
  dplyr::select(-PLT_CN,-X,-RSCD,-INVYR,-ANN_INV,-CYCLE,-td, -ppt, -tmean, -tmax, - tmin, -vpdmin, -vpdmax, -mwmt, -ahm, -DD5, -bffp, -effp, -emt, -eref, -tave_wt, -tave_sm, -ppt_sm)

# env_vars_cut_alt <- env_vars %>% 
#   dplyr::select(-PLT_CN,-X,-td, -ppt, -tmean, -tmax, - tmin, -vpdmin, -vpdmax, -mwmt, -ahm, -DD5, -bffp, -effp, -emt, -eref, -tave_wt, -tave_sm, -ppt_sm)
# 
# env_vars_cut_alt %>% 
#   cor() %>% 
#   abs()>0.9

#log transform precipitation variables to make relationships linear 
# env_vars_cut_alt_trans <- env_vars_cut_alt %>% 
#   mutate(MSP_log = log(msp),SHM_log = log(shm), PPT_wt_log = log(ppt_wt), PAS_log = log(pas+0.999), DD_0_log = log(dd_0), .keep="unused")

```

```{r,eval=FALSE,echo=FALSE}
#look at linearity of relationships
ncol(env_vars_cut)
# 
# pdf(file="C:/Users/Katie/Google Drive/FIA project/FIA_rproject/pre_trans_pairs_2022.pdf")
# 
# pairs(env_vars_cut, lower.panel = NULL)
# 
# dev.off()
```

```{r, echo=FALSE}
##log-transform precipitation variables to make relationships between climate variables more linear 
env_vars_cut_trans <- env_vars_cut %>% 
  mutate(MSP_log = log(msp),SHM_log = log(shm), PPT_wt_log = log(ppt_wt), PAS_log = log(pas+0.999), DD_0_log = log(dd_0), .keep="unused")
```

```{r,eval=FALSE}
#check linearity of relationships with plots
#these take a while to produce

# pdf(file="C:/Users/Katie/Google Drive/FIA project/FIA_rproject/post_trans_pairs.pdf")
# 
# pairs(env_vars_cut_trans, lower.panel=NULL)
# 
# dev.off()
```

After removing some variables due to high correlation, some of the precipitation variables were log-transformed to make relationships linear, which is an assumption of PCA. The climate variable codes which were kept and their definitions are as follows:

-   CMD: climate moisture deficit
-   DD_0: degree days below 0 degrees Celsius
-   MCMT: mean temperature of the coldest month (Celsius)
-   MSP_log: log-transformed mean summer (May - Sept) precipitation (mm)
-   NFFD: number of frost-free degree days
-   PAS_log: log-transformed precipitation as snow (mm)
-   PPT_wt_log: log-transformed winter (Dec - Feb) precipitation (mm)
-   SHM_log: log-transformed summer heat moisture index (mean temperature of the warmest month/ (mean summer precipitation/1000))

After running the PCA on these variables, we see that the first two components capture 88% of the variability in climate data.

```{r, echo = FALSE}
#run the PCA on transformed climate variables
# set.seed(83)
# clim_pca <- prcomp(env_vars_cut_trans,scale.=T) #proceed from correlation matrix, which scales variables--important because our variables have different units
# saveRDS(clim_pca, file = "FIA_pca_2022_subset.RDS")
clim_pca <- readRDS("FIA_pca_2022_subset.RDS")

##climate pca without CMD
#alt_clim_pca <- prcomp(env_vars_cut_alt_trans, scale.=T)
#summary(alt_clim_pca)
#par(mfrow=c(1,1))
#plot(alt_clim_pca,type="l")

##to get eigenvalues, we do the square of the stdev
eigenvals <- (clim_pca$sdev)^2
eigenvals
sum(eigenvals) #the sum of the eigenvalues = the number of variables (8)
eigenvals/sum(eigenvals)#this is the proportion variance explained
summary(clim_pca)
par(mfrow=c(1,1))
plot(clim_pca,type="l")
```

```{r}
#try varimax rotation
library(psych)

set.seed(83)
clim_pca_rotated <- principal(env_vars_cut_trans,cor=TRUE,nfactors=2,rotate="varimax", scores=TRUE)
head(clim_pca_rotated$scores)

l.x = clim_pca_rotated$loadings[,1]
l.y = clim_pca_rotated$loadings[,2]
loadings.rotated = data.frame(rc1 = l.x, rc2 = l.y, names=names(l.x))

ggplot(data=clim_pca_rotated$scores, aes(x=RC1, y=RC2))+
  geom_point(col="gray")+
  xlab("RC1 (54%)")+
  ylab("RC2 (37%)")+
  geom_segment(data = loadings.rotated, aes(x=0, y=0, xend=rc1*1.5, yend=rc2*1.5),arrow = arrow(type = "open", length = unit(0.2, "cm")), size=1)+
  geom_text(data=loadings.rotated, aes(x=rc1,y=rc2,label=names),col="red",vjust=c(-1,1,1,-2,3,-2,-1,2.5))
```

The variables most strongly correlated with PC1 are climate moisture deficit (CMD), precipitation as snow (PAS), number of frost free degree days (NFFD), degree days below zero (DD_0), and mean temperature of the coldest month (MCMT). The variables most strongly correlated with PC2 are mean summer precipitation (MSP), winter precipitation (PPT_wt), and summer heat moisture index (SHM_log). So, PC1 seems to be a temperature related axis, while PC2 corresponds to precipitation.

```{r, echo=FALSE}
as.data.frame(clim_pca$rotation[,1:2]) %>% 
  tibble::rownames_to_column() %>% 
  arrange(desc(abs(PC1)))
as.data.frame(clim_pca$rotation[,1:2]) %>% 
  tibble::rownames_to_column() %>% 
  arrange(desc(abs(PC2)))
as.data.frame(clim_pca$rotation[,1:3]) %>% 
  tibble::rownames_to_column() %>% 
  arrange(desc(abs(PC3))) #PC3 separates monsoonal areas from dry summer/wet winter areas
```

```{r, echo=FALSE}
#plot pca and climate variable loadings

# png("figures/FIA_pca22_subset_realnames.png", width = 700, height = 400)

#make climate variables real words

loadings.df<- as.data.frame(clim_pca$rotation) %>% 
  mutate(x=c(-1,-3.4,-3.2,2,-3.2,2.3,3.2,2.5), y=c(-2,-1.5,0,-4,1.9,-1.5,1,2.5), names = c(str_wrap("mean temp coldest month", width=14),str_wrap("# frost free degree days",width=14), str_wrap("climate moisture  deficit", width=14),str_wrap("log(mean summer precip)",width=14), str_wrap( "log(summer heat moisture index)",width=14),str_wrap("log(winter precip)",width=14),str_wrap("log(precip  as snow)",width=14), str_wrap("log(degree days below 0)",width=14)))


autoplot(clim_pca,x=1,y=2,colour="grey",loadings.colour="red", loadings=T, loadings.label=F, loadings.label.colour="black", scale=0, loadings.label.vjust=c(.5,-.3,-.5,-.3,-.4,-.05,1.4,0), loadings.label.hjust=c(1.1,1,0,-.1,1.1,-.05,0,-.1), loadings.label.size=6, alpha=0.5)+
  geom_hline(yintercept=0,linetype="dashed")+geom_vline(xintercept=0,linetype="dashed")+
  geom_abline(intercept=0, slope = 1, linetype="dashed")+
  geom_abline(intercept=0, slope = -1, linetype="dashed")+
  annotate("text",x=-0.4,y=3.2,label="cold/dry",size=6,angle=90)+
  annotate(geom="text",x=5.2,y=-0.2,label="cold/wet",size=6)+
  annotate(geom="text",x=-0.4,y=-4,label="hot/wet",size=6,angle=90)+
  annotate(geom="text",x=-10,y=- 0.2,label="hot/dry",size=6)+
  annotate(geom="text",x=3.2,y= 3.6,label="cold",size=6, angle=45)+
  annotate(geom="text",x=5,y= -4.5,label="wet",size=6, angle=-45)+
  annotate(geom="text",x=-5,y= -4.5,label="hot",size=6, angle=45)+
  annotate(geom="text",x=-3.2,y= 3.7,label="dry",size=6, angle=-45)+
  annotate(geom="text", x=loadings.df$x,y=loadings.df$y, label=loadings.df$names, size=4.5) +
  theme_bw()+
  theme(axis.text = element_text(color="black",size=18),text = element_text(size=18, lineheight = 0.1),legend.text = element_text(size=18))

# dev.off()
```

```{r}
env_vars %>% 
  left_join(plot, by=c("PLT_CN"="CN")) %>% 
  bind_cols(clim_pca$x) %>% 
ggplot(aes(x=PC1,y=PC2,color=LAT, alpha=0.5))+
  geom_point()

env_vars %>% 
  left_join(plot, by=c("PLT_CN"="CN")) %>% 
  bind_cols(clim_pca$x) %>% 
ggplot(aes(x=PC1,y=PC2,color=ELEV))+
  geom_point()+
  scale_color_viridis_c(option="magma")

```


```{r, echo = FALSE}
#make dataframe with PCA coordinates of all plots 
clim_pca_points <- as.data.frame(clim_pca$x)
clim_pca_points <- clim_pca_points %>% 
  bind_cols(plot = env_vars$PLT_CN)

#left join the positions on the PCA for adults and seedlings in all disturbance types by their plot ID #s
adult_data_pcapts <-
  map2( .x = seq_along( along.with = adult_data )
        , .y = adult_data
        , .f = function( i, j )
          {
          left_join( x = j, 
                     y = clim_pca_points, 
                     by=c("PLT_CN" = "plot") )
          })
names(adult_data_pcapts)<- names(adult_data)

seedling_data_pcapts <-
  map2( .x = seq_along( along.with = seedling_data )
        , .y = seedling_data
        , .f = function( i, j )
          {
          left_join( x = j, 
                     y = clim_pca_points, 
                     by=c("PLT_CN" = "plot") )
          })
names(seedling_data_pcapts) <- names(seedling_data)

```

```{r, echo = FALSE}
################  ALTERNATE FOR VARIMAX ROTATION  ################
# #make dataframe with PCA coordinates of all plots 
# clim_pca_points <- as.data.frame(clim_pca_rotated$scores)
# clim_pca_points <- clim_pca_points %>% 
#   bind_cols(plot = env_vars$PLT_CN) %>% 
#   dplyr::rename(PC1=RC1, PC2=RC2)
# 
# #left join the positions on the PCA for adults and seedlings in all disturbance types by their plot ID #s
# adult_data_pcapts <-
#   map2( .x = seq_along( along.with = adult_data )
#         , .y = adult_data
#         , .f = function( i, j )
#           {
#           left_join( x = j, 
#                      y = clim_pca_points, 
#                      by=c("PLT_CN" = "plot") )
#           })
# names(adult_data_pcapts)<- names(adult_data)
# 
# seedling_data_pcapts <-
#   map2( .x = seq_along( along.with = seedling_data )
#         , .y = seedling_data
#         , .f = function( i, j )
#           {
#           left_join( x = j, 
#                      y = clim_pca_points, 
#                      by=c("PLT_CN" = "plot") )
#           })
# names(seedling_data_pcapts) <- names(seedling_data)

```
These boxplots display the range of PC1 and PC2 coordinates that plots within each disturbance category occupy. The disturbances are pretty evenly distributed across climate space, with insect/disease plots occupying slightly higher values on PC1 and PC2, which correspond to cooler and wetter climates, and fire plots occupying slightly lower values on PC1 and PC2, corresponding to warmer and drier climates, with undisturbed plots having a median climate even hotter and drier than the other two disturbance categories (but also with the most variation). ANOVAs indicate that these three disturbances are all significantly different from each other in their positions on both PC1 and PC2, likely in part due to the extremely high sample size. 

```{r, echo=FALSE}
#color palette
disturbance.colors<- c("#bb5f4c","#8e5db0","#729b57")
#red,blue,gray c("#661100", "#6699CC", "#888888")

#summarize position on PC1 and 2 of plots in each disturbance category. 
pca_pts_by_dist <- clim_pca_points %>% 
  left_join(dist, by=c("plot" = "PLT_CN"))

ggplot(pca_pts_by_dist[which(! pca_pts_by_dist$Agent %in% c("other","harvest","wind")),], aes(x=Agent,y=PC1,fill=Agent))+
  geom_boxplot()+
  scale_fill_manual(values=disturbance.colors)+
  theme_bw()

#significant differnces in disturbances? yes
pc1.agent.mod <- aov(PC1~Agent, data=pca_pts_by_dist %>% filter(!Agent %in% c("other","harvest","wind")))
summary(pc1.agent.mod)
TukeyHSD(pc1.agent.mod)

##PC2
pc2.agent.mod <- aov(PC2~Agent, data=pca_pts_by_dist %>% filter(!Agent %in% c("other","harvest","wind")))
summary(pc2.agent.mod)
TukeyHSD(pc2.agent.mod)

ggplot(pca_pts_by_dist[which(! pca_pts_by_dist$Agent %in% c("other","harvest","wind")),], aes(x=Agent,y=PC2,fill=Agent))+
  geom_boxplot()+
  scale_fill_manual(values=disturbance.colors)+
  theme_bw()
```


##Species specific range shifts

The one species-specific test we can do, is a t-test comparing the mean positions on PC1 and PC2 of seedlings vs. adults in different disturbance types.

###PC1

First, PC1:

```{r, echo=FALSE, results='hide'}
##check for normality PC1
#grab multiplot function script
source("multiplot_fxn.R")

#make density plots to check for normality in each of the species x disturbance x age category
##adult
adultplots = list()
for(i in 1:length(adult_data)){
group <- adult_data_pcapts %>% 
  pluck(i) %>% 
  ungroup() %>% 
  pull(PC1) 
adultplots[[i]]<- group
}

plotlist<- list()
for(i in 1:length(adult_data)){
  plot<- ggdensity(adultplots[[i]])
  plotlist[[i]]<- plot
}
qplotlist<- list()
for(i in 1:length(adult_data)){
  plot<- ggqqplot(adultplots[[i]])
  qplotlist[[i]]<- plot
}

##seedling
seedlingplots = list()
for(i in 1:length(seedling_data)){
group <- seedling_data_pcapts %>% 
  pluck(i) %>% 
  ungroup() %>% 
  pull(PC1) 
seedlingplots[[i]]<- group
}

splotlist<- list()
for(i in 1:length(seedling_data)){
  plot<- ggdensity(seedlingplots[[i]])
  splotlist[[i]]<- plot
}
sqplotlist<- list()
for(i in 1:length(seedling_data)){
  plot<- ggqqplot(seedlingplots[[i]])
  sqplotlist[[i]]<- plot
}


vartest.results<-data.frame()
for(i in 1:length(seedling_data)){
vartest<- var.test(seedlingplots[[i]],adultplots[[i]],alternative="two.sided")
vartest.results<-rbind(vartest.results, data.frame(num=i, p=vartest$p.value))
}  
vartest.results##not all have equal variance between adult and seedling data
```

```{r, echo=FALSE, results='hide'}
#shapiro test to formally evaluate normality of data. Even though, not all pass this test, I think we can still use t-test because sample size is relatively large 

#shapiro test for adults
shapiro.results.a<-data.frame()
for(i in 1:length(adult_data)){
shapiro<- shapiro.test(adultplots[[i]])
shapiro.results.a<-rbind(shapiro.results.a, data.frame(num=i, p=shapiro$p))
} 
shapiro.results.a %>% 
  filter(p<0.05)
#look at those that failed normality test
data.names<- data.frame(name = names(adult_data_pcapts),num=seq(1,length(adult_data_pcapts),1))

#this table shows sample size for adult data that didn't pass normality test. Lowest for adults is 151, I think this is reasonably large to still use a t-test. 
shapiro.results.a %>% 
  filter(p<0.05) %>% 
  left_join(data.names, by='num') %>% 
  separate(name,sep="_",into=c("species","agent","var")) %>% 
  mutate(species = as.numeric(species)) %>% 
  left_join(sample_size, by=c("species" = "SPCD", "agent"="Agent")) %>% 
  datatable()


shapiro.results.s<-data.frame()
for(i in 1:length(seedling_data)){
shapiro<- shapiro.test(seedlingplots[[i]])
shapiro.results.s<-rbind(shapiro.results.s, data.frame(num=i, p=shapiro$p))
} 
head(shapiro.results.s)
shapiro.results.s %>% 
  filter(p<0.05)

#this table shows sample size for seedling data that didn't pass normality test. Lowest is 162 so i think this is big enough to be fine.   
shapiro.results.s %>% 
  filter(p<0.05) %>% 
  left_join(data.names, by='num') %>% 
  separate(name,sep="_",into=c("species","agent","var")) %>% 
    mutate(species = as.numeric(species)) %>% 
  left_join(sample_size, by=c("species" = "SPCD", "agent"="Agent")) %>% 
  datatable()

```

```{r, echo=FALSE, results="hide"}
#do the t-tests for PC1
pc1.ttest.pvalues <- data.frame()
for(i in 1:length(adult_data_pcapts)){
temp.adult <- adult_data_pcapts %>% 
  pluck(i) %>% 
  ungroup() %>% 
  dplyr::select(PC1)

temp.seed <- seedling_data_pcapts %>% 
  pluck(i) %>% 
  ungroup() %>% 
  dplyr::select(PC1)

temp.test<- t.test(temp.adult, temp.seed, paired=FALSE, var.equal = TRUE, conf.level=0.95)
pc1.ttest.pvalues <- bind_rows(pc1.ttest.pvalues, data.frame(uid = names(adult_data_pcapts)[i], diff.means = temp.test$estimate[2] - temp.test$estimate[1],p.value = temp.test$p.value))
}

#unequal variance t-test for some species
pc1.unequal.var <- vartest.results %>% 
                      filter(p<0.05)

pc1.ttest.pvalues.alt <- data.frame()
for(i in pc1.unequal.var$num){
temp.adult <- adult_data_pcapts %>% 
  pluck(i) %>% 
  ungroup() %>% 
  dplyr::select(PC1)

temp.seed <- seedling_data_pcapts %>% 
  pluck(i) %>% 
  ungroup() %>% 
  dplyr::select(PC1)

temp.test<- t.test(temp.adult, temp.seed, paired=FALSE, var.equal = FALSE, conf.level=0.95)
pc1.ttest.pvalues.alt <- bind_rows(pc1.ttest.pvalues.alt, data.frame(uid = names(adult_data_pcapts)[i], diff.means = temp.test$estimate[2] - temp.test$estimate[1],p.value = temp.test$p.value))
}
pc1.ttest.pvalues.alt
##replace rows with unequal variance results
pc1.ttest.pvalues<- pc1.ttest.pvalues %>% 
                rows_update(pc1.ttest.pvalues.alt, by="uid")
pc1.ttest.pvalues

#get dataframe in order
pc1.ttest.pvalues
pc1.ttest.pvalues.sep <- pc1.ttest.pvalues %>% 
  separate(uid, c("species","agent"),sep="_",extra="drop",remove = FALSE) %>% 
  mutate(species = as.numeric(species)) %>% 
  left_join(species.names, by = c("species" = "species.code")) %>%   mutate(sym.pos = ifelse(p.value<0.05, 0.45, NA)) %>% 
  filter(!(agent == "fire" & species.name %in% insectonlyspp)) %>% 
  filter(!(agent == "insect.disease" & species.name %in% fireonlyspp)) %>% 
  filter(!species.name == "singleleaf pinyon")
```

This plot shows the difference in seedling vs. adult positions on PC1 (seedling mean - adult mean), separated by species and disturbance type. Differences in means greater than zero indicate that seedlings occupy higher values on PC1 (cooler climates) than adults whereas negative differences in means indicate that seedlings occupy lower values on PC1 (warmer climates) than adults. Asterisks indicate that seedling and adult positions on PC1 are significantly different (p\<0.05), as evaluated with a T-test.

```{r,echo=FALSE}

allspp_pc1<- ggplot(pc1.ttest.pvalues.sep, aes(x = agent, y = diff.means, fill = agent))+
  geom_col()+
  geom_text(data = subset(pc1.ttest.pvalues.sep, !is.na(sym.pos)), aes(x=agent, y=sym.pos), label="*", color="black",size=8)+
  scale_fill_manual(values=disturbance.colors)+
  geom_hline(yintercept=0,col="black")+
  facet_wrap(~species.name,nrow=3)+
  ylim(c(-0.2,0.5))+
  ggtitle("PC1")+
  xlab("")+
  ylab("Difference in Means")+
  labs(fill = "Disturbance Agent")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust=1), axis.text=element_text(color="black", size=14), legend.text = element_text(color="black", size=14), text=element_text(color="black", size=14), strip.text = element_text(color="black", size=11), legend.position = "none",legend.justification = c(1, -0.3))

# png("allspecies.plot.pc1.png", width=700,height=600)
# 
# allspp_pc1
# 
# dev.off()
```

###PC2

Now for PC2, where postive differences mean that seedlings occupied higher values on PC2 (wetter climates) than adults, and negative differences mean that seedlings occupied lower values on PC2 (drier climates).

```{r, echo=FALSE, results='hide'}
##check for normality on PC2

##adults
pc2_adultplots = list()
for(i in 1:length(adult_data)){
group <- adult_data_pcapts %>% 
  pluck(i) %>% 
  ungroup() %>% 
  pull(PC2) 
pc2_adultplots[[i]]<- group
}

pc2_plotlist<- list()
for(i in 1:length(adult_data)){
  plot<- ggdensity(pc2_adultplots[[i]])
  pc2_plotlist[[i]]<- plot
}
pc2_qplotlist<- list()
for(i in 1:length(adult_data)){
  plot<- ggqqplot(pc2_adultplots[[i]])
  pc2_qplotlist[[i]]<- plot
}

##seedlings
pc2_seedlingplots = list()
for(i in 1:length(adult_data)){
group <- seedling_data_pcapts %>% 
  pluck(i) %>% 
  ungroup() %>% 
  pull(PC2) 
pc2_seedlingplots[[i]]<- group
}

pc2_splotlist<- list()
for(i in 1:length(adult_data)){
  plot<- ggdensity(pc2_seedlingplots[[i]])
  pc2_splotlist[[i]]<- plot
}
pc2_sqplotlist<- list()
for(i in 1:length(adult_data)){
  plot<- ggqqplot(pc2_seedlingplots[[i]])
  pc2_sqplotlist[[i]]<- plot
}

##test for equal variance. p<0.05 indicates unequal variance
pc2_vartest.results<-data.frame()
for(i in 1:length(adult_data)){
vartest<- var.test(pc2_seedlingplots[[i]],pc2_adultplots[[i]],alternative="two.sided")
pc2_vartest.results<-rbind(pc2_vartest.results, data.frame(num=i, p=vartest$p.value))
}   
pc2_vartest.results %>% 
  filter(p<0.05)
```


```{r, eval=FALSE, echo=FALSE}
#plot checks for normality PC2
##adults
# multiplot(plotlist=pc2_plotlist[1:16],cols=4)
# multiplot(plotlist=pc2_qplotlist[1:16],cols=4)
# multiplot(plotlist=pc2_qplotlist[17:33],cols=4)

##seedlings
# multiplot(plotlist=pc2_splotlist,cols=4)
# multiplot(plotlist=pc2_sqplotlist[1:16],cols=4)
# multiplot(plotlist=pc2_sqplotlist[17:33],cols=4)

```

```{r}
#shapiro test to formally evaluate normality of data. Even though, not all pass this test, I think we can still use t-test because sample size is relatively large 

#shapiro test for adults
pc2.shapiro.results.a<-data.frame()
for(i in 1:length(adult_data)){
shapiro<- shapiro.test(pc2_adultplots[[i]])
pc2.shapiro.results.a<-rbind(pc2.shapiro.results.a, data.frame(num=i, p=shapiro$p))
} 
pc2.shapiro.results.a %>% 
  filter(p<0.05)

#this table shows sample size for adult data that didn't pass normality test. Lowest for adults is 195, I think this is reasonably large to still use a t-test. 
pc2.shapiro.results.a %>% 
  filter(p<0.05) %>% 
  left_join(data.names, by='num') %>% 
  separate(name,sep="_",into=c("species","agent","var")) %>% 
  mutate(species=as.numeric(species)) %>% 
  left_join(sample_size, by=c("species"="SPCD","agent"="Agent")) %>% 
  datatable()


pc2.shapiro.results.s<-data.frame()
for(i in 1:length(pc2_seedlingplots)){
shapiro<- shapiro.test(pc2_seedlingplots[[i]])
pc2.shapiro.results.s<-rbind(pc2.shapiro.results.s, data.frame(num=i, p=shapiro$p))
} 
pc2.shapiro.results.s %>% 
  filter(p<0.05)

#this table shows sample size for seedling data that didn't pass normality test. Lowest is 67. I think this is ok for t-test.   
pc2.shapiro.results.s %>% 
  filter(p<0.05) %>% 
  left_join(data.names, by='num') %>% 
  separate(name,sep="_",into=c("species","agent","var")) %>% 
  mutate(species=as.numeric(species)) %>% 
  left_join(sample_size, by=c("species"="SPCD","agent"="Agent")) %>%   datatable()

```

```{r, echo=FALSE, results='hide'}
#do the t-test with assumption of equal vars PC2
pc2.ttest.pvalues <- data.frame()
for(i in 1:length(adult_data_pcapts)){
temp.adult <- adult_data_pcapts %>% 
  pluck(i) %>% 
  ungroup() %>% 
  dplyr::select(PC2)

temp.seed <- seedling_data_pcapts %>% 
  pluck(i) %>% 
  ungroup() %>% 
  dplyr::select(PC2)

temp.test<- t.test(temp.adult, temp.seed, paired=FALSE, var.equal = TRUE, conf.level=0.95)
pc2.ttest.pvalues <- bind_rows(pc2.ttest.pvalues, data.frame(uid = names(adult_data_pcapts)[i], diff.means = temp.test$estimate[2] - temp.test$estimate[1],p.value = temp.test$p.value))

}

#unequal variance t-test for some species/disturbances for PC2
pc2.unequal.var <- pc2_vartest.results %>% 
                      filter(p<0.05)

pc2.ttest.pvalues.alt <- data.frame()
for(i in pc2.unequal.var$num){
temp.adult <- adult_data_pcapts %>% 
  pluck(i) %>% 
  ungroup() %>% 
  dplyr::select(PC2)

temp.seed <- seedling_data_pcapts %>% 
  pluck(i) %>% 
  ungroup() %>% 
  dplyr::select(PC2)

temp.test<- t.test(temp.adult, temp.seed, paired=FALSE, var.equal = FALSE, conf.level=0.95)
pc2.ttest.pvalues.alt <- bind_rows(pc2.ttest.pvalues.alt, data.frame(uid = names(adult_data_pcapts)[i], diff.means = temp.test$estimate[2] - temp.test$estimate[1],p.value = temp.test$p.value))
}
pc2.ttest.pvalues.alt
##replace rows with unequal variance results
pc2.ttest.pvalues<- pc2.ttest.pvalues %>% 
                rows_update(pc2.ttest.pvalues.alt, by="uid")
pc2.ttest.pvalues

#get data in order
pc2.ttest.pvalues
pc2.ttest.pvalues.sep <- pc2.ttest.pvalues %>% 
  separate(uid, c("species","agent"),sep="_",extra="drop",remove = FALSE) %>% 
  mutate(species = as.numeric(species)) %>% 
  left_join(species.names, by = c("species" = "species.code")) %>%   mutate(sym.pos = ifelse(p.value<0.05, 0.5, NA)) %>% 
  filter(!(agent=="fire" & species.name %in% insectonlyspp)) %>% 
  filter(!(agent=="insect.disease" & species.name %in% fireonlyspp)) %>% 
  filter(!species.name=="singleleaf pinyon")
```

```{r, echo=FALSE}
allspp_pc2<- ggplot(pc2.ttest.pvalues.sep, aes(x = agent, y = diff.means, fill = agent))+
  geom_col()+
  geom_text(data = subset(pc2.ttest.pvalues.sep, !is.na(sym.pos)), aes(x=agent, y=sym.pos), label="*", color="black",size=8)+
  scale_fill_manual(values=disturbance.colors)+
  geom_hline(yintercept=0,col="black")+
  facet_wrap(~species.name, nrow=3)+
  ylim(c(-0.31,0.55))+
  ggtitle("PC2")+
  xlab("")+
  ylab("Difference in Means")+
  labs(fill = "Disturbance Agent")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust=1), axis.text=element_text(color="black",size=14), text=element_text(color="black",size=14), legend.text = element_text(color="black",size=14), strip.text = element_text(color="black",size=11),legend.position = "none", legend.justification = c(1,-0.3))

# png("allsp_plot_pc2.png", width=700, height=600)
# 
# allspp_pc2
# 
# dev.off()
```

##Niche models

```{r, echo=FALSE}
#extract all species x disturbance combinations and make into iso style data
#group 1 = adults, group 2 = seedlings

# iso_data <- list()
# for(i in 1:length(adult_data_pcapts)){
#     temp_iso <- rbind(adult_data_pcapts[[i]],seedling_data_pcapts[[i]]) %>%
#   dplyr::select(c("PC1","PC2")) %>%
#   mutate(group=c(rep(1,nrow(adult_data_pcapts[[i]])),rep(2,nrow(seedling_data_pcapts[[i]]))),community=1) %>%
#   dplyr::rename(iso1=PC1,iso2=PC2)
#     iso_data[[i]]<- temp_iso
# }
# names(iso_data)<-names(adult_data_pcapts)
# names(iso_data)
```

```{r, echo=FALSE}
#make iso data into list of SIBER objects

# siber.objs <- list()
# for(i in 1:length(iso_data)){
#   temp.obj<- createSiberObject(as.data.frame(iso_data[[i]]))
#   siber.objs[[i]]<- temp.obj
# }
# names(siber.objs)<-names(adult_data_pcapts)
# #
# # #remove species*disturbance without enough data
# siber.objs[which(names(siber.objs) %in% c("15_fire_cvars","66_fire_cvars","106_fire_cvars","113_fire_cvars","133_fire_cvars","242_fire_cvars","73_insect.disease_cvars"))]<-NULL
# #
#  names(siber.objs)
```

```{r, echo=FALSE}
#estimate multivariate normal ellipses for each group using bayesian estimation

# options for running jags

# parms <- list()
# parms$n.iter <- 2 * 10^4   # number of iterations to run the model for
# parms$n.burnin <- 1 * 10^3 # discard the first set of values
# parms$n.thin <- 10     # thin the posterior by this many
# parms$n.chains <- 2        # run this many chains
# 
# # define the priors
# 
# priors <- list()
# priors$R <- 1 * diag(2)
# priors$k <- 2
# priors$tau.mu <- 1.0E-3
# 
# # fit the ellipses which uses an Inverse Wishart prior on the covariance matrix Sigma, and a vague normal prior on the means. Fitting is via the JAGS method.
# 
# ellipses.posterior.list <- list()
# for(i in 1:length(siber.objs)){
#   temp.mvn <- siberMVN(siber.objs[[i]], parms, priors)
#   ellipses.posterior.list[[i]] <- temp.mvn
#   }
# names(ellipses.posterior.list)<-names(siber.objs)
# 
# SEA.B.list <- list()
# for(i in 1:length(ellipses.posterior.list)){
#   temp.SEA.B <- siberEllipses(ellipses.posterior.list[[i]])
#   SEA.B.list[[i]] <- temp.SEA.B
# }
#  names(SEA.B.list)<-names(siber.objs)

```

```{r, echo=FALSE}
#save model output
 # saveRDS(iso_data, file="iso_data.2022_subset.rds")
 # saveRDS(siber.objs, file="siber.objs.2022_subset.rds")
 # saveRDS(ellipses.posterior.list, file="ellipses.posterior.list.2022_subset.rds")
 # saveRDS(SEA.B.list, file="SEA.B.list.2022_subset.rds")
```

```{r, echo=FALSE}
##################### ALTERNATE FOR VARIMAX ROTATION #######################
# #save model output
#  saveRDS(iso_data, file="iso_data.varimax.rds")
#  saveRDS(siber.objs, file="siber.objs.varimax.rds")
#  saveRDS(ellipses.posterior.list, file="ellipses.posterior.list.varimax.rds")
#  saveRDS(SEA.B.list, file="SEA.B.list.varimax.rds")
```

```{r}
#read in siber files
iso_data <- readRDS("iso_data.2022_subset.rds")
siber.objs <- readRDS("siber.objs.2022_subset.rds")
ellipses.posterior.list<- readRDS("ellipses.posterior.list.2022_subset.rds")
SEA.B.list <- readRDS("SEA.B.list.2022_subset.rds")
```

```{r}
##################### ALTERNATE FOR VARIMAX ROTATION #######################
#read in siber files
# iso_data <- readRDS("iso_data.varimax.rds")
# siber.objs <- readRDS("siber.objs.varimax.rds")
# ellipses.posterior.list<- readRDS("ellipses.posterior.list.varimax.rds")
# SEA.B.list <- readRDS("SEA.B.list.varimax.rds")
```

```{r}
#look at niche size 

adult.area <- data.frame()
seedling.area<- data.frame()
for(i in 1:length(SEA.B.list)){
  adultmean <- mean(SEA.B.list[[i]][,1])
  seedlingmean <- mean(SEA.B.list[[i]][,2])
adult.area<- rbind(adult.area, adultmean)
seedling.area <- rbind(seedling.area, seedlingmean)
}

adult.area$name <- names(SEA.B.list)
seedling.area$name <- names(SEA.B.list)

niche.areas <- adult.area %>% 
  left_join(seedling.area) 

colnames(niche.areas) = c("adult_area","name","seedling_area")

niche.areas.long <- niche.areas %>% 
  pivot_longer(c("adult_area", "seedling_area"), names_to="age", values_to = "niche.area")

ggplot(niche.areas.long, aes(x=name, y=niche.area, fill=age))+
  geom_bar(stat='identity',position="dodge")+
  theme(axis.text.x = element_text(angle=45))
```

```{r,echo=FALSE, eval=FALSE}
#look at trace plots for bayesian models

# pdf("trace.plots_2022_subset.pdf")
# 
# par(mfrow=c(3,2))
# for(i in 1:length(ellipses.posterior.list)){
#   for (j in 1:2){
#     for(k in 1:6){
#       plot(ellipses.posterior.list[[i]][[j]][,k],type="l")
#     }
#   }
# }
# 
# dev.off()
# 
# head(ellipses.posterior.list[[1]])
# nrow(ellipses.posterior.list[[1]][[1]])
# head(ellipses.posterior.list[[1]][[1]])
```

```{r}
#plot siber niches for each species x disturbance combination
# plot.names <-
#   as.data.frame(names(siber.objs)) %>%
#   tidyr::separate(col="names(siber.objs)", into=c("species.code","disturbance","extra"), sep="_") %>%
#     mutate(species.code = as.numeric(species.code)) %>%
#     left_join(species.names,by="species.code") %>%
#     dplyr::select(species.name, disturbance) %>%
#     unite(col="name", c(species.name, disturbance), sep=" ",)
# 
# disturbance.colors
# color.list = c(disturbance.colors[c(2:3,1:3,1:3,2:3,1,3,1:3,1:3,2:3,1:3,2:3,1:3,2:3,1:3,2:3,1:3,1:3)])
# 
# par(mfrow=c(1,1))
# for(i in 1:length(siber.objs)){
#   png(filename = paste(names(siber.objs)[i],"varimax",".png",sep=""))
# plot(x=unlist(c(siber.objs[[i]]$original.data %>% filter(group==1) %>% dplyr::select(iso1))),
#      y=unlist(c(siber.objs[[i]]$original.data %>% filter(group==1)%>% dplyr::select(iso2))),col="black", xlim=c(-4,6),ylim=c(-4,6),pch=16, xlab="PC1",ylab="PC2",main=paste(plot.names$name[i]),cex=1.2)+
# points(x=unlist(c(siber.objs[[i]]$original.data %>% filter(group==2) %>% dplyr::select(iso1))),
#      y=unlist(c(siber.objs[[i]]$original.data %>% filter(group==2)%>% dplyr::select(iso2))),col=color.list[i],pch=16)
# addEllipse(siber.objs[[i]]$ML.mu[[1]][ , , 1],
#                      siber.objs[[i]]$ML.cov[[1]][ , , 1],
#                      m = NULL,
#                      n = 100,
#                      p.interval = 0.95,
#                      ci.mean = FALSE,
#                      col = "black",
#                      lty = 1,
#                      lwd = 2)
# addEllipse(siber.objs[[i]]$ML.mu[[1]][ , , 2],
#                      siber.objs[[i]]$ML.cov[[1]][ , , 2],
#                      m = NULL,
#                      n = 100,
#                      p.interval = 0.95,
#                      ci.mean = FALSE,
#                      col = color.list[i],
#                      lty = 1,
#                      lwd = 2)
# legend(-6,-1.5,legend=c("Adult","Seedling"),col=c("black",color.list[i]), pch=16, lty=1, lwd=2,cex=1.2)
# 
# dev.off()
# }
```

```{r}
# par(mfrow=c(1,1))
# plot(x=unlist(c(siber.objs[[31]]$original.data %>% filter(group==1) %>% dplyr::select(iso1))),
#      y=unlist(c(siber.objs[[31]]$original.data %>% filter(group==1)%>% dplyr::select(iso2))),col="black", xlim=c(-4,6),ylim=c(-4,6),pch=16, xlab="PC1",ylab="PC2",main="Douglas-fir Fire",cex=1.2)
# 
# plot(x=unlist(c(siber.objs[[31]]$original.data %>% filter(group==2) %>% dplyr::select(iso1))),
#      y=unlist(c(siber.objs[[31]]$original.data %>% filter(group==2)%>% dplyr::select(iso2))),col=disturbance.colors[1], xlim=c(-4,6),ylim=c(-4,6),pch=16, xlab="PC1",ylab="PC2",main="Douglas-fir Fire",cex=1.2)
# 
# plot(x=unlist(c(siber.objs[[31]]$original.data %>% filter(group==1) %>% dplyr::select(iso1))),
#      y=unlist(c(siber.objs[[31]]$original.data %>% filter(group==1)%>% dplyr::select(iso2))),col="black", xlim=c(-4,6),ylim=c(-4,3),pch=16, xlab="PC1",ylab="PC2",main="Douglas-fir Fire",cex=1.2)+
# points(x=unlist(c(siber.objs[[31]]$original.data %>% filter(group==2) %>% dplyr::select(iso1))),
#      y=unlist(c(siber.objs[[31]]$original.data %>% filter(group==2)%>% dplyr::select(iso2))),col=alpha(disturbance.colors[1],0.6),pch=16)+
# points(x=mean(unlist(c(siber.objs[[31]]$original.data %>% filter(group==1) %>% dplyr::select(iso1)))), y = mean(unlist(c(siber.objs[[31]]$original.data %>% filter(group==1)%>% dplyr::select(iso2)))), col = "grey", pch = 18, cex=1.5)+
# points(x=mean(unlist(c(siber.objs[[31]]$original.data %>% filter(group==2) %>% dplyr::select(iso1)))), y = mean(unlist(c(siber.objs[[31]]$original.data %>% filter(group==2)%>% dplyr::select(iso2)))), col = "gold1", pch = 18, cex = 1.5)
# addEllipse(siber.objs[[31]]$ML.mu[[1]][ , , 1],
#                      siber.objs[[31]]$ML.cov[[1]][ , , 1],
#                      m = NULL,
#                      n = 100,
#                      p.interval = 0.95,
#                      ci.mean = FALSE,
#                      col = "black",
#                      lty = 1,
#                      lwd = 2)
# addEllipse(siber.objs[[31]]$ML.mu[[1]][ , , 2],
#                      siber.objs[[31]]$ML.cov[[1]][ , , 2],
#                      m = NULL,
#                      n = 100,
#                      p.interval = 0.95,
#                      ci.mean = FALSE,
#                      col = disturbance.colors[1],
#                      lty = 1,
#                      lwd = 2)


```


### Niche Overlap


```{r}
#calculate niche areas and overlap
##this takes ~10 hours length(ellipses.posterior.list)

# overlap.list<-list()
# for(i in 1:2){
#     cat(i, "\n")
# temp.overlap <- bayesianOverlap("1.1","1.2",ellipses.posterior.list[[i]],draws=NULL,p.interval=0.95)
# 
# temp.overlap$exp.prop.adult = (temp.overlap$area2 - temp.overlap$overlap)/temp.overlap$area1
# 
# temp.overlap$contr.prop.adult = (temp.overlap$area1 - temp.overlap$overlap)/temp.overlap$area1
# 
# overlap.list[[i]] <- temp.overlap
# }
# names(overlap.list)<- names(ellipses.posterior.list)
# 
# saveRDS(overlap.list, file = "overlap.list.2022.subset.RDS")
# 
# for(i in 3:length(ellipses.posterior.list)){
#     cat(i, "\n")
# temp.overlap <- bayesianOverlap("1.1","1.2",ellipses.posterior.list[[i]],draws=NULL,p.interval=0.95)
# 
# temp.overlap$exp.prop.adult = (temp.overlap$area2 - temp.overlap$overlap)/temp.overlap$area1
# 
# temp.overlap$contr.prop.adult = (temp.overlap$area1 - temp.overlap$overlap)/temp.overlap$area1
# 
# overlap.list[[i]] <- temp.overlap
# }
# names(overlap.list)<- names(ellipses.posterior.list)
# 
# saveRDS(overlap.list, file = "overlap.list.2022.subset.RDS")

```

```{r}
#read in overlap data
overlap.list <- readRDS("overlap.list.2022.subset.RDS")
```


```{r}
#calculate net expansion for each replicate
overlap.list2<- lapply(overlap.list, function(x) {mutate(x, net.expansion = exp.prop.adult - contr.prop.adult)}) 

#summarize overlap by taking mean
overlap.summary<-list()
for(i in 1:length(overlap.list2)){
  temp.summary<- overlap.list2[[i]] %>%
    dplyr::summarise(adult.mean.area = mean(area1), seed.mean.area = mean(area2),overlap.mean.area=mean(overlap),mean.exp.prop.adult=mean(exp.prop.adult),mean.contr.prop.adult=mean(contr.prop.adult), mean.net.expansion = mean(net.expansion), lwr.net.expansion = quantile(net.expansion, 0.025),upr.net.expansion = quantile(net.expansion, 0.975))
  overlap.summary[[i]]<- temp.summary
  }####I need to calculate 95% confidence intervals here, not standard error.

overlap.table <- as.data.frame(matrix(unlist(overlap.summary),nrow = 41, ncol=8,byrow=TRUE))  
colnames(overlap.table)<-names(overlap.summary[[1]])
overlap.table<- overlap.table %>% 
  mutate(names = names(overlap.list)) %>% 
  separate(names,into=c("species","disturbance","extra"),sep="_") %>% 
  mutate(species = as.numeric(species)) %>% 
  left_join(species.names,by=c("species"="species.code")) %>% 
  filter(!species.name=="singleleaf pinyon")
```

Lets look at some plots of expansion and contraction across species and disturbances. 

```{r}
#some plots
ggplot(overlap.table %>% filter(!(disturbance == 'fire' & species.name %in% insectonlyspp)) %>% filter(!(disturbance=="insect.disease" & species.name %in% fireonlyspp)),aes(x=species.name,y=mean.exp.prop.adult,fill=disturbance))+
  geom_bar(stat='identity',position='dodge')+
  scale_fill_manual(values=disturbance.colors)+
  theme(axis.text.x = element_text(angle=45,vjust=0.9,hjust=0.8))

ggplot(overlap.table %>% filter(!(disturbance == 'fire' & species.name %in% insectonlyspp)) %>% filter(!(disturbance=="insect.disease" & species.name %in% fireonlyspp)),aes(x=species.name,y=mean.contr.prop.adult,fill=disturbance))+
  geom_bar(stat='identity',position='dodge')+
  scale_fill_manual(values=disturbance.colors)+
  theme(axis.text.x = element_text(angle=45,vjust=0.9,hjust=0.8))

#look at expansion - contraction
overlap.table$diff = overlap.table$mean.exp.prop.adult - overlap.table$mean.contr.prop.adult

#add column for difference from 0 net expansion
overlap.table <- overlap.table %>% 
  mutate(nodiff = ifelse(lwr.net.expansion <0 & upr.net.expansion>0, "nodiff", "diff"))

#for 8 species
diff.plot<- ggplot(overlap.table %>% filter(!species.name %in% c(insectonlyspp,fireonlyspp)),aes(x=disturbance,y=mean.net.expansion,color=disturbance, alpha=nodiff))+
  geom_hline(yintercept=0, linetype=2, size=0.8)+
  geom_point(position = position_dodge(0.9),size=4)+
  geom_errorbar(aes(ymin=lwr.net.expansion, ymax =upr.net.expansion), width=.5, position = position_dodge(0.9),size=1)+
  scale_color_manual(values=disturbance.colors)+
  scale_alpha_manual(values=c(1,0.5))+
  facet_wrap(~species.name, nrow=1, labeller = labeller(species.name = label_wrap_gen(10)))+
  #scale_x_discrete(position='top',name="")+
  ylab("Net Expansion")+
  # annotate("text",x=8.5,y=0.3,label="expansion",size=7)+
  # annotate("text",x=8.5,y=-0.4,label="contraction",size=7)+
  theme_bw()+
  theme(axis.text = element_text(size=24),legend.text = element_text(size=24),legend.title = element_text(size=24),legend.position = 'none',axis.title = element_text(size=24),plot.margin=margin(0,0.5,.6,.1,"cm"), axis.text.x=element_blank(), axis.ticks.x=element_blank(),axis.title.x = element_blank(),strip.text = element_text(size=16,color="black"))

#for all species
diff.plot_all<- ggplot(overlap.table,aes(x=disturbance,y=mean.net.expansion,color=disturbance, alpha=nodiff,shape=nodiff))+
  geom_hline(yintercept=0, linetype=2, size=0.8)+
  geom_point(position = position_dodge(0.9),size=4)+
  geom_errorbar(aes(ymin=lwr.net.expansion, ymax =upr.net.expansion), width=.5, position = position_dodge(0.9),size=1)+
  scale_color_manual(values=disturbance.colors, name="Disturbance",labels=c("fire","insect/disease","none"))+
  scale_alpha_manual(values=c(1,0.4),labels=c("no","yes"),name="95% CI overlaps 0")+
  scale_shape_manual(values=c(16,1),labels=c("no","yes"),name="95% CI overlaps 0")+
  facet_wrap(~species.name, nrow=3, labeller = labeller(species.name = label_wrap_gen(20)))+
  #scale_x_discrete(position='top',name="")+
  ylab("Net Expansion")+
  # annotate("text",x=8.5,y=0.3,label="expansion",size=7)+
  # annotate("text",x=8.5,y=-0.4,label="contraction",size=7)+
  theme_bw()+
  theme(axis.text = element_text(size=24),legend.text = element_text(size=24),legend.title = element_text(size=24),legend.position = 'bottom',axis.title = element_text(size=24),plot.margin=margin(0,0.5,.6,.1,"cm"), axis.text.x=element_blank(), axis.ticks.x=element_blank(),axis.title.x = element_blank(),strip.text = element_text(size=16,color="black"))

# png("figures/diffplot.png", width = 1000, height = 600)
# 
# diff.plot_all
# 
# dev.off()


expansion.plot <- ggplot(overlap.table %>% filter(!(disturbance == "fire" & species.name %in% insectonlyspp)) %>% filter(!(disturbance=="insect.disease" & species.name %in% fireonlyspp)),aes(x=disturbance,y=mean.exp.prop.adult))+
  geom_boxplot(fill="gray")+
  geom_jitter(height=0,width=0.2, aes(color=species.name),pch=15,size=2)+
  scale_color_manual(values=c("#119329",
"#c975eb",
"#e6c035",
"#002489",
"#00daa9",
"#530046",
"#dfd783",
"#5676ef",
"#3a5e00",
"#0072d7",
"#a65200",
"#ff8cc0",
"#614800",
"#b15674",
"#ff8e68"),name="Species")+
  xlab("")+
  ylab("Proportion Expanded")+
  ylim(c(-0.001,0.42))+
  theme_bw()+
  theme(axis.text=element_text(size=16,color="black"),legend.text = element_text(size=14),axis.title = element_text(size=14),legend.title = element_text(size=14))

contraction.plot <- ggplot(overlap.table %>% filter(!(disturbance == "fire" & species.name %in% insectonlyspp)) %>% filter(!(disturbance=="insect.disease" & species.name %in% fireonlyspp)),aes(x=disturbance,y=mean.contr.prop.adult))+
  geom_boxplot(fill="gray")+
  geom_jitter(height=0,width=0.2, aes(color=species.name),pch=15,size=2)+
  scale_color_manual(values=c("#119329",
"#c975eb",
"#e6c035",
"#002489",
"#00daa9",
"#530046",
"#dfd783",
"#5676ef",
"#3a5e00",
"#0072d7",
"#a65200",
"#ff8cc0",
"#614800",
"#b15674",
"#ff8e68"),name="Species")+
  xlab("")+
  ylab("Proportion Contracted")+   ylim(c(-0.001,0.42))+
  theme_bw()+
  theme(axis.text=element_text(size=16,color="black"),legend.text = element_text(size=16),axis.title = element_text(size=14),legend.title = element_text(size=16))

ggarrange(contraction.plot, expansion.plot, nrow=1, ncol=2,common.legend = TRUE)

ggplot(overlap.table %>% 
  pivot_longer(c(mean.exp.prop.adult,mean.contr.prop.adult),names_to = "shift",values_to = "proportion") %>% 
    filter(!(disturbance == "fire" & species %in% c(17,113,106))),aes(x=species.name,y=proportion,fill=shift))+
  geom_boxplot()+
  xlab("")+
  ylab("Mean Proportion Shift")+
  scale_fill_manual(values=c("darkorange1","steelblue"),labels=c("Contraction", "Expansion"),name="Shift Type")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,vjust=0.9,hjust=0.8),axis.text=element_text(size=16,color="black"),legend.text = element_text(size=16),axis.title = element_text(size=16),legend.title = element_text(size=16))

ggplot(overlap.table %>% 
  pivot_longer(c(mean.exp.prop.adult,mean.contr.prop.adult),names_to = "shift",values_to = "proportion")%>% 
    filter(!(disturbance == "fire" & species %in% c(17,113,106))),aes(x=shift,y=proportion,fill=shift))+
  geom_boxplot()+
  xlab("")+
  ylab("Mean Proportion Shift")+
  scale_fill_manual(values=c("darkorange1","steelblue"),labels=c("Contraction", "Expansion"),name="Shift Type")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45,vjust=0.9,hjust=0.8),axis.text=element_text(size=16,color="black"),legend.text = element_text(size=16),axis.title = element_text(size=16),legend.title = element_text(size=16))
```

Now I will look at differences between disturbances in the amount of contraction/expansion. There are no significant differences between disturbance types and amount of contraction or expansion, however there is significantly more contraction across the board than expansion. This is true for both the set of species with all disturbances (n=8) and all species (n=13).

Full set results:

```{r, echo=FALSE}
overlap.table.partialset <- overlap.table
overlap.table.fullset <- overlap.table %>% 
  filter(!species.name %in% c(insectonlyspp,fireonlyspp))

dist.exp.anova <- aov(log(mean.exp.prop.adult)~factor(disturbance),data=overlap.table.fullset)
par(mfrow=c(2,2))
plot(dist.exp.anova)
summary(dist.exp.anova) #no significant difference between disturbances in expansion

dist.contr.anova<- aov(log(mean.contr.prop.adult)~factor(disturbance), data=overlap.table.fullset)
plot(dist.contr.anova)
summary(dist.contr.anova)#no difference between disturbances for contractions

par(mfrow=c(1,1))
hist(overlap.table.fullset$mean.exp.prop.adult) #not normal
hist(overlap.table.fullset$mean.contr.prop.adult) #normal-ish
var.test(overlap.table.fullset$mean.exp.prop.adult, overlap.table.fullset$mean.contr.prop.adult, alternative = "two.sided")
wilcox.test(overlap.table.fullset$mean.exp.prop.adult, overlap.table.fullset$mean.contr.prop.adult) #significant difference between amount of contraction and amount of expansion

#for the partial set (all species)
par(mfrow=c(1,1))
hist(overlap.table.partialset$mean.exp.prop.adult) #not normal
hist(overlap.table.partialset$mean.contr.prop.adult) #normal-ish
var.test(overlap.table.partialset$mean.exp.prop.adult, overlap.table.partialset$mean.contr.prop.adult, alternative = "two.sided")
wilcox.test(overlap.table.partialset$mean.exp.prop.adult, overlap.table.partialset$mean.contr.prop.adult) #significant difference between amount of contraction and amount of expansion
```

Partial Set Results:

```{r, echo=FALSE}
#partial set analysis
dist.exp.anova.partial <- aov(log(mean.exp.prop.adult)~factor(disturbance),data=overlap.table.partialset)
par(mfrow=c(2,2))
plot(dist.exp.anova.partial)
summary(dist.exp.anova.partial) #no significant difference between disturbances in expansion

dist.contr.anova.partial<- aov(log(mean.contr.prop.adult)~factor(disturbance), data=overlap.table.partialset)
plot(dist.contr.anova.partial)
summary(dist.contr.anova.partial)#no difference between disturbances for contractions

par(mfrow=c(1,1))
hist(overlap.table.partialset$mean.exp.prop.adult) #not normal
hist(overlap.table.partialset$mean.contr.prop.adult) #normal-ish
var.test(overlap.table.partialset$mean.exp.prop.adult, overlap.table.partialset$mean.contr.prop.adult, alternative = "two.sided")
wilcox.test(overlap.table.partialset$mean.exp.prop.adult, overlap.table.partialset$mean.contr.prop.adult) #significant difference between amount of contraction and amount of expansion


```

There is a moderate correlation between expansion and contraction, with species/disturbances that experienced more expansion generally experiencing less contraction. 

```{r, echo=FALSE}
##are expansion and contraction correlated?
cor.test(overlap.table.fullset$mean.exp.prop.adult,overlap.table.fullset$mean.contr.prop.adult)
par(mfrow=c(1,1))
plot(overlap.table.fullset$mean.exp.prop.adult,overlap.table.fullset$mean.contr.prop.adult)

cor.test(overlap.table.partialset$mean.exp.prop.adult,overlap.table.partialset$mean.contr.prop.adult)
par(mfrow=c(1,1))
plot(overlap.table.partialset$mean.exp.prop.adult,overlap.table.partialset$mean.contr.prop.adult)

```

###Vector Analysis
from https://cran.r-project.org/web/packages/SIBER/vignettes/Centroid-Vectors.html

```{r}
centroids.list<- list()
for(i in 1:length(ellipses.posterior.list)){
temp.centroids <- siberCentroids(ellipses.posterior.list[[i]])
centroids.list[[i]]<- temp.centroids
}
names(centroids.list) <- names(ellipses.posterior.list)

angles_distances.list <- list()
for(i in 1:length(ellipses.posterior.list)){
  temp.angles<- allCentroidVectors(centroids.list[[i]], do.plot=FALSE)
  angles_distances.list[[i]] <- temp.angles
}
names(angles_distances.list) <- names(ellipses.posterior.list)

```

```{r}
#make plots
median_vectors.list<- list()
for(i in 1:length(angles_distances.list)){
median_vectors <- dplyr::summarise(group_by(angles_distances.list[[i]], comparison),
          medAngle = median(angles), medDist = median(distances))
median_vectors.list[[i]]<- median_vectors
}
names(median_vectors.list) <- names(ellipses.posterior.list)

origins.list<- list()
for(i in 1:length(median_vectors.list)){
origins <- data.frame(comparison = median_vectors.list[[i]]$comparison, 
                      x = 0, y = 0)
origins.list[[i]]<- origins
}
names(origins.list) <- names(ellipses.posterior.list)

ends.list <- list()
for(i in 1:length(median_vectors.list)){
ends    <- with(median_vectors.list[[i]], data.frame(comparison = comparison,
                                           x = medDist * cos(medAngle),
                                           y = medDist * sin(medAngle)))
ends.list[[i]]<- ends
}
names(ends.list) <- names(ellipses.posterior.list)

# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# generate the start and end points of the medians for the arrows
for_arrows.list <- list()
for(i in 1:length(origins.list)){
for_arrows <- dplyr::bind_rows(origins.list[[i]], ends.list[[i]])
for_arrows.list[[i]]<- for_arrows
}
names(for_arrows.list)<- names(ends.list)

# rename the comparison label for nice plot labels below
# aa <- unlist(strsplit(as.character(for_arrows$comparison), "[.]"))
# aa <- aa[seq(3,length(aa),5)]
# for_arrows$comparison2 <- factor(aa)


# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
# create the cartesian points for the estimated tips of the arrows
cart_positions.list<- list()
for(i in 1:length(angles_distances.list)){
cart_positions <- with(angles_distances.list[[i]], data.frame(x = distances * cos(angles),
                             y = distances * sin(angles),
                             comparison = comparison ))
cart_positions.list[[i]]<- cart_positions
}
names(cart_positions.list)<- names(angles_distances.list)

#rename
for(i in 1:length(cart_positions.list)){
cart_positions.list[[i]]$comparison <- names(cart_positions.list)[i]
}

for(i in 1:length(for_arrows.list)){
for_arrows.list[[i]]$comparison <- names(for_arrows.list)[i]
}

for_arrows.all <- do.call("rbind",for_arrows.list)
cart_positions.all <- do.call("rbind",cart_positions.list)
# rename as above
# bb <- unlist(strsplit(as.character(angles_distances$comparison), "[.]"))
# bb <- bb[seq(3,length(bb),5)]
# cart_positions$comparison2 <- factor(bb)


# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
# plot it
ggplot(cart_positions.all, aes(x,y) ) + 
  geom_bin2d(bins = 20) +
  scale_fill_gradient(low = "white", high = "black") +
  #coord_cartesian(xlim = c(-20, +20), ylim = c(-20, +20)) +
  facet_wrap( ~ comparison, scales = "fixed") + 
  theme_classic() +
  geom_path(data = for_arrows.all, 
            arrow = arrow(type = "open", length = unit(0.2, "cm")),
            col = "red", alpha = 0.6) +
  ylab("PC2") +
  xlab("PC1") + 
  theme(text = element_text(size=15))

#lump by disturbance
species.names$species.code<- as.character(species.names$species.code)

cart_positions.all<- cart_positions.all %>%
  separate(comparison,into=c("species","disturbance","extra"),sep="_") %>% 
  left_join(species.names,by=c('species'="species.code")) %>% 
  filter(!(disturbance=="fire" & species.name %in% insectonlyspp)) %>% 
  filter(!(disturbance=="insect.disease" & species.name %in% fireonlyspp)) %>% 
  filter(!species.name=="singleleaf pinyon")
for_arrows.all <- for_arrows.all %>% 
  separate(comparison,into=c("species","disturbance","extra"),sep="_") %>% 
  left_join(species.names,by=c('species'="species.code")) %>% 
  filter(!(disturbance=="fire" & species.name %in% insectonlyspp)) %>% 
  filter(!(disturbance=="insect.disease" & species.name %in% fireonlyspp)) %>% 
  filter(!species.name=="singleleaf pinyon") 
#number species by elevation
#based on PC1 mean score of adults
species.numbers <- data.frame(species.name = c("two needle pinyon","Gambel oak","ponderosa pine","Rocky Mountain juniper","white fir","trembling aspen","Douglas-fir","western redcedar","limber pine","grand fir","western larch","lodgepole pine","Engelmann spruce","subalpine fir","whitebark pine"), species.number = seq(1,15,1))

for_arrows.all <- for_arrows.all %>%
  left_join(species.numbers)

ggplot(cart_positions.all, aes(x,y)) + 
  geom_bin2d(bins = 50) +
  scale_fill_gradient(low = "white", high = "black") +
  coord_cartesian(xlim = c(-.2, +.5), ylim = c(-.4, +.2)) +
  facet_wrap( ~ factor(species.name), scales = "fixed", ncol=5) + 
  theme_classic() +
  geom_path(data = for_arrows.all, 
            arrow = arrow(type = "open", length = unit(0.2, "cm")),
            alpha = 1, size=1.5,aes(col=disturbance)) +
  scale_color_manual(values=c(disturbance.colors[1:2],"black"))+
  ylab("PC2") +
  xlab("PC1") + 
  theme(text = element_text(size=15))

##finding colors for boxplot
library(viridis)
library(RColorBrewer)
display.brewer.pal(n=4, name = 'BrBG')

devtools::install_github("ropenscilabs/ochRe")
library("ochRe")
names(ochre_palettes)
#viz_palette(ochre_palettes[["parliament"]])

#install.packages("pals")
library(pals)

rect.colors = brewer.pal(n=4, name='BrBG')
# rect.colors = ochre_palettes[["parliament"]][c(1,4,6,5)]
# rect.colors = ochre_palettes[["olsen_seq"]][c(1,13,10,12)]
# rect.colors = tol()[c(8,5,3,2)]
#another option: c(#888C24,#004D60,#C9F0E8,#C63B0B,#C63B0B)
#####

df.top.triangle <- data.frame(x=c(-Inf,0,Inf),y=c(Inf,0,Inf))
df.right.triangle <- data.frame(x=c(Inf,0,Inf),y=c(-Inf,0,Inf))
df.bottom.triangle <- data.frame(x=c(-Inf,0,Inf),y=c(-Inf,0,-Inf))
df.left.triangle <- data.frame(x=c(-Inf,0,-Inf),y=c(-Inf,0,Inf))

arrowsplot1<- ggplot(cart_positions.all, aes(x,y)) +
  geom_polygon(data=df.top.triangle, aes(x,y),fill=rect.colors[3],alpha=0.5)+
  geom_polygon(data=df.right.triangle, aes(x,y),fill=rect.colors[4],alpha=0.5)+
  geom_polygon(data=df.bottom.triangle, aes(x,y),fill=rect.colors[1],alpha=0.5)+
  geom_polygon(data=df.left.triangle, aes(x,y),fill=rect.colors[2],alpha=0.5)+
  #geom_bin2d(bins = 50, fill="white", aes(alpha= ..density..)) +
  #scale_alpha(range=c(0.1,1)) +
  #coord_cartesian(xlim = c(-.4, +.4), ylim = c(-.4, +.43)) +
  facet_wrap( ~ factor(disturbance, levels=c("fire","insect.disease","none"), labels=c("fire","insect/disease","none")), scales = "fixed", ncol=2) + 
  theme_classic() +
  geom_hline(yintercept=0)+geom_vline(xintercept=0)+
  geom_abline(intercept=0, slope = 1, linetype="dashed")+
  geom_abline(intercept=0, slope = -1, linetype="dashed")+
  geom_path(data = for_arrows.all, aes(col=species.name),
            arrow = arrow(type = "open", length = unit(0.2, "cm")),
            alpha = 0.7, size=1) +
  scale_color_manual(values=rep("black",15),labels=NULL,name=NULL)+
  #scale_color_manual(values=c("#d55f90","#5aa554","#ad74d6","#9ab13f","#6070d5","#cd9337","#563686","#45c097","#8b2965","#9b8a40","#ca71c0","#b95336","#628ed6","#ba4758"))+
  geom_text(data=for_arrows.all %>% filter(!y==0),aes(label=species.number,x=x,y=y),size=5,hjust=-.4,vjust=.4,col="darkred")+
  annotate("text",x=0.13,y=0.47,label="cold/dry",size=5)+
  annotate(geom="text",x=0.39,y=-0.02,label="cold/wet",size=5)+
  annotate(geom="text",x=0.1,y=-0.47,label="hot/wet",size=5)+
  annotate(geom="text",x=-0.4,y= -0.02,label="hot/dry",size=5)+
  ylab("PC2 (16.09%)") +
  xlab("PC1 (74.4%)") + 
  scale_y_continuous(limits=c(-.5,.5),expand=c(0,0))+
  scale_x_continuous(limits=c(-.5,.5),expand=c(0,0))+
  theme(text = element_text(size=20,color="black"),legend.position = "none",
   axis.text.x = element_text(angle = 45, hjust = 1, vjust= 1, color="black"), axis.text.y = element_text(color="black"),panel.spacing.x = unit(25, "points"))

#separate panels of arrows plot
arrowsplot_fire<- ggplot(cart_positions.all %>% filter(disturbance=="fire"), aes(x,y)) +
  geom_polygon(data=df.top.triangle, aes(x,y),fill=rect.colors[3],alpha=0.5)+
  geom_polygon(data=df.right.triangle, aes(x,y),fill=rect.colors[4],alpha=0.5)+
  geom_polygon(data=df.bottom.triangle, aes(x,y),fill=rect.colors[1],alpha=0.5)+
  geom_polygon(data=df.left.triangle, aes(x,y),fill=rect.colors[2],alpha=0.5)+
  #geom_bin2d(bins = 50, fill="white", aes(alpha= ..density..)) +
  #scale_alpha(range=c(0.1,1)) +
  #coord_cartesian(xlim = c(-.4, +.4), ylim = c(-.4, +.43)) +
  #facet_wrap( ~ factor(disturbance, levels=c("fire","insect.disease","none"), labels=c("fire","insect/disease","none")), scales = "fixed", ncol=2) + 
  theme_classic() +
  geom_hline(yintercept=0)+geom_vline(xintercept=0)+
  geom_abline(intercept=0, slope = 1, linetype="dashed")+
  geom_abline(intercept=0, slope = -1, linetype="dashed")+
  geom_path(data = for_arrows.all %>% filter(disturbance=="fire"), aes(col=species.name),
            arrow = arrow(type = "open", length = unit(0.2, "cm")),
            alpha = 0.7, size=1) +
  scale_color_manual(values=rep("black",15),labels=NULL,name=NULL)+
  #scale_color_manual(values=c("#d55f90","#5aa554","#ad74d6","#9ab13f","#6070d5","#cd9337","#563686","#45c097","#8b2965","#9b8a40","#ca71c0","#b95336","#628ed6","#ba4758"))+
  geom_text(data=for_arrows.all %>% filter(!y==0) %>% filter(disturbance=="fire"),aes(label=species.number,x=x,y=y),size=5,hjust=-.4,vjust=.4,col="darkred")+
  annotate("text",x=0.13,y=0.47,label="cold/dry",size=5)+
  annotate(geom="text",x=0.39,y=-0.02,label="cold/wet",size=5)+
  annotate(geom="text",x=0.1,y=-0.47,label="hot/wet",size=5)+
  annotate(geom="text",x=-0.4,y= -0.02,label="hot/dry",size=5)+
  ylab("PC2 (16.09%)") +
  xlab("PC1 (74.4%)") + 
  labs(title="Fire")+
  scale_y_continuous(limits=c(-.5,.5),expand=c(0,0))+
  scale_x_continuous(limits=c(-.5,.5),expand=c(0,0))+
  theme(text = element_text(size=20,color="black"),legend.position = "none", plot.title = element_text(hjust = 0.5),
   axis.text.x = element_text(angle = 45, hjust = 1, vjust= 1, color="black"), axis.text.y = element_text(color="black"),panel.spacing.x = unit(25, "points"))

arrowsplot_insect.disease<- ggplot(cart_positions.all %>% filter(disturbance=="insect.disease"), aes(x,y)) +
  geom_polygon(data=df.top.triangle, aes(x,y),fill=rect.colors[3],alpha=0.5)+
  geom_polygon(data=df.right.triangle, aes(x,y),fill=rect.colors[4],alpha=0.5)+
  geom_polygon(data=df.bottom.triangle, aes(x,y),fill=rect.colors[1],alpha=0.5)+
  geom_polygon(data=df.left.triangle, aes(x,y),fill=rect.colors[2],alpha=0.5)+
  #geom_bin2d(bins = 50, fill="white", aes(alpha= ..density..)) +
  #scale_alpha(range=c(0.1,1)) +
  #coord_cartesian(xlim = c(-.4, +.4), ylim = c(-.4, +.43)) +
  #facet_wrap( ~ factor(disturbance, levels=c("insect.disease","insect.disease","none"), labels=c("insect.disease","insect/disease","none")), scales = "fixed", ncol=2) + 
  theme_classic() +
  geom_hline(yintercept=0)+geom_vline(xintercept=0)+
  geom_abline(intercept=0, slope = 1, linetype="dashed")+
  geom_abline(intercept=0, slope = -1, linetype="dashed")+
  geom_path(data = for_arrows.all %>% filter(disturbance=="insect.disease"), aes(col=species.name),
            arrow = arrow(type = "open", length = unit(0.2, "cm")),
            alpha = 0.7, size=1) +
  scale_color_manual(values=rep("black",15),labels=NULL,name=NULL)+
  #scale_color_manual(values=c("#d55f90","#5aa554","#ad74d6","#9ab13f","#6070d5","#cd9337","#563686","#45c097","#8b2965","#9b8a40","#ca71c0","#b95336","#628ed6","#ba4758"))+
  geom_text(data=for_arrows.all %>% filter(!y==0) %>% filter(disturbance=="insect.disease"),aes(label=species.number,x=x,y=y),size=5,hjust=-.4,vjust=.4,col="darkred")+
  annotate("text",x=0.13,y=0.47,label="cold/dry",size=5)+
  annotate(geom="text",x=0.39,y=-0.02,label="cold/wet",size=5)+
  annotate(geom="text",x=0.1,y=-0.47,label="hot/wet",size=5)+
  annotate(geom="text",x=-0.4,y= -0.02,label="hot/dry",size=5)+
  ylab("PC2 (16.09%)") +
  xlab("PC1 (74.4%)") + 
    labs(title="Insect/Disease")+
  scale_y_continuous(limits=c(-.5,.5),expand=c(0,0))+
  scale_x_continuous(limits=c(-.5,.5),expand=c(0,0))+
  theme(text = element_text(size=20,color="black"),legend.position = "none",plot.title = element_text(hjust = 0.5),
   axis.text.x = element_text(angle = 45, hjust = 1, vjust= 1, color="black"), axis.text.y = element_text(color="black"),panel.spacing.x = unit(25, "points"))

arrowsplot_none<- ggplot(cart_positions.all %>% filter(disturbance=="none"), aes(x,y)) +
  geom_polygon(data=df.top.triangle, aes(x,y),fill=rect.colors[3],alpha=0.5)+
  geom_polygon(data=df.right.triangle, aes(x,y),fill=rect.colors[4],alpha=0.5)+
  geom_polygon(data=df.bottom.triangle, aes(x,y),fill=rect.colors[1],alpha=0.5)+
  geom_polygon(data=df.left.triangle, aes(x,y),fill=rect.colors[2],alpha=0.5)+
  #geom_bin2d(bins = 50, fill="white", aes(alpha= ..density..)) +
  #scale_alpha(range=c(0.1,1)) +
  #coord_cartesian(xlim = c(-.4, +.4), ylim = c(-.4, +.43)) +
  #facet_wrap( ~ factor(disturbance, levels=c("none","insect.disease","none"), labels=c("none","insect/disease","none")), scales = "fixed", ncol=2) + 
  theme_classic() +
  geom_hline(yintercept=0)+geom_vline(xintercept=0)+
  geom_abline(intercept=0, slope = 1, linetype="dashed")+
  geom_abline(intercept=0, slope = -1, linetype="dashed")+
  geom_path(data = for_arrows.all %>% filter(disturbance=="none"), aes(col=species.name),
            arrow = arrow(type = "open", length = unit(0.2, "cm")),
            alpha = 0.7, size=1) +
  scale_color_manual(values=rep("black",15),labels=NULL,name=NULL)+
  #scale_color_manual(values=c("#d55f90","#5aa554","#ad74d6","#9ab13f","#6070d5","#cd9337","#563686","#45c097","#8b2965","#9b8a40","#ca71c0","#b95336","#628ed6","#ba4758"))+
  geom_text(data=for_arrows.all %>% filter(!y==0) %>% filter(disturbance=="none"),aes(label=species.number,x=x,y=y),size=5,hjust=-.4,vjust=.4,col="darkred")+
  annotate("text",x=0.13,y=0.47,label="cold/dry",size=5)+
  annotate(geom="text",x=0.39,y=-0.02,label="cold/wet",size=5)+
  annotate(geom="text",x=0.1,y=-0.47,label="hot/wet",size=5)+
  annotate(geom="text",x=-0.4,y= -0.02,label="hot/dry",size=5)+
  ylab("PC2 (16.09%)") +
  xlab("PC1 (74.4%)") + 
    labs(title="None")+
  scale_y_continuous(limits=c(-.5,.5),expand=c(0,0))+
  scale_x_continuous(limits=c(-.5,.5),expand=c(0,0))+
  theme(text = element_text(size=20,color="black"),legend.position = "none", plot.title = element_text(hjust = 0.5),
   axis.text.x = element_text(angle = 45, hjust = 1, vjust= 1, color="black"), axis.text.y = element_text(color="black"),panel.spacing.x = unit(25, "points"))

#plot.margin = unit(c(5.5,7,70,5.5),"points"),

blank_arrows_plot <- ggplot(cart_positions.all, aes(x,y)) +
  geom_polygon(data=df.top.triangle, aes(x,y),fill=rect.colors[3],alpha=0.5)+
  geom_polygon(data=df.right.triangle, aes(x,y),fill=rect.colors[4],alpha=0.5)+
  geom_polygon(data=df.bottom.triangle, aes(x,y),fill=rect.colors[1],alpha=0.5)+
  geom_polygon(data=df.left.triangle, aes(x,y),fill=rect.colors[2],alpha=0.5)+
  #geom_bin2d(bins = 50, fill="white", aes(alpha= ..density..)) +
  #scale_alpha(range=c(0.1,1)) +
  #coord_cartesian(xlim = c(-.4, +.4), ylim = c(-.4, +.43)) +
  facet_wrap( ~ factor(disturbance, levels=c("fire","insect.disease","none"), labels=c("fire","insect/disease","none")), scales = "fixed", ncol=3) + 
  theme_classic() +
  geom_hline(yintercept=0)+geom_vline(xintercept=0)+
  geom_abline(intercept=0, slope = 1, linetype="dashed")+
  geom_abline(intercept=0, slope = -1, linetype="dashed")+
  #geom_path(data = for_arrows.all, aes(col=species.name),
  #          arrow = arrow(type = "open", length = unit(0.2, "cm")),
  #          alpha = 0.7, size=1) +
  scale_color_manual(values=rep("black",15),labels=NULL,name=NULL)+
  #scale_color_manual(values=c("#d55f90","#5aa554","#ad74d6","#9ab13f","#6070d5","#cd9337","#563686","#45c097","#8b2965","#9b8a40","#ca71c0","#b95336","#628ed6","#ba4758"))+
  #geom_text(data=for_arrows.all %>% filter(!y==0),aes(label=species.number,x=x,y=y),size=5,hjust=-.4,vjust=.4,col="darkred")+
  annotate("text",x=0.13,y=0.47,label="cold/dry",size=5)+
  annotate(geom="text",x=0.39,y=-0.02,label="cold/wet",size=5)+
  annotate(geom="text",x=0.1,y=-0.47,label="hot/wet",size=5)+
  annotate(geom="text",x=-0.4,y= -0.02,label="hot/dry",size=5)+
  ylab("PC2 (16.09%)") +
  xlab("PC1 (74.4%)") + 
  scale_y_continuous(limits=c(-.5,.5),expand=c(0,0))+
  scale_x_continuous(limits=c(-.5,.5),expand=c(0,0))+
  theme(text = element_text(size=20,color="black"),legend.position = "none",
        plot.margin = unit(c(5.5,7,70,5.5),"points"), axis.text.x = element_text(angle = 45, hjust = 1, vjust= 1, color="black"), axis.text.y = element_text(color="black"),panel.spacing.x = unit(20, "points"))

#option2
# ggplot(cart_positions.all, aes(x,y)) +
#     geom_bin2d(bins = 50, aes(fill= ..density..)) +
#   scale_fill_gradient(low="#EBFBFF",high="#004D60") +
#   coord_cartesian(xlim = c(-.4, +.4), ylim = c(-.4, +.4)) +
#   facet_wrap( ~ factor(disturbance), scales = "fixed", ncol=3) + 
#   theme_classic() +
#   geom_hline(yintercept=0)+geom_vline(xintercept=0)+
#   geom_abline(intercept=0, slope = 1, linetype="dashed")+
#   geom_abline(intercept=0, slope = -1, linetype="dashed")+
#   geom_path(data = for_arrows.all, aes(col=species.name),
#             arrow = arrow(type = "open", length = unit(0.2, "cm")),
#             alpha = 0.7, size=1) +
#   scale_color_manual(values=rep("black",13),labels=NULL,name=NULL)+
#   #scale_color_manual(values=c("#d55f90","#5aa554","#ad74d6","#9ab13f","#6070d5","#cd9337","#563686","#45c097","#8b2965","#9b8a40","#ca71c0","#b95336","#628ed6","#ba4758"))+
#   geom_text(data=for_arrows.all %>% filter(!y==0),aes(label=species.number,x=x,y=y),size=5,hjust=-.4,vjust=.4,col="darkred")+
#   annotate("text",x=0,y=0.4,label="hot/wet",size=5)+
#   annotate(geom="text",x=0.4,y=0,label="cold/wet",size=5,angle=90)+
#   annotate(geom="text",x=0,y=-0.4,label="cold/dry",size=5)+
#   annotate(geom="text",x=-0.4,y= 0,label="hot/dry",size=5,angle=90)+
#   ylab("PC2") +
#   xlab("PC1") + 
#   theme(text = element_text(size=15))
```

```{r}
###################### VARIMAX PLOT ################################

arrowsplot_varimax<- ggplot(cart_positions.all, aes(x,y)) +
  geom_polygon(data=df.top.triangle, aes(x,y),fill=rect.colors[4],alpha=0.5)+
  geom_polygon(data=df.right.triangle, aes(x,y),fill=rect.colors[1],alpha=0.5)+
  geom_polygon(data=df.bottom.triangle, aes(x,y),fill=rect.colors[2],alpha=0.5)+
  geom_polygon(data=df.left.triangle, aes(x,y),fill=rect.colors[3],alpha=0.5)+
  #geom_bin2d(bins = 50, fill="white", aes(alpha= ..density..)) +
  #scale_alpha(range=c(0.1,1)) +
  #coord_cartesian(xlim = c(-.4, +.4), ylim = c(-.4, +.43)) +
  facet_wrap( ~ factor(disturbance, levels=c("fire","insect.disease","none"), labels=c("fire","insect/disease","none")), scales = "fixed", ncol=3) + 
  theme_classic() +
  geom_hline(yintercept=0)+geom_vline(xintercept=0)+
  geom_abline(intercept=0, slope = 1, linetype="dashed")+
  geom_abline(intercept=0, slope = -1, linetype="dashed")+
  geom_path(data = for_arrows.all, aes(col=species.name),
            arrow = arrow(type = "open", length = unit(0.2, "cm")),
            alpha = 0.7, size=1) +
  scale_color_manual(values=rep("black",15),labels=NULL,name=NULL)+
  #scale_color_manual(values=c("#d55f90","#5aa554","#ad74d6","#9ab13f","#6070d5","#cd9337","#563686","#45c097","#8b2965","#9b8a40","#ca71c0","#b95336","#628ed6","#ba4758"))+
  geom_text(data=for_arrows.all %>% filter(!y==0),aes(label=species.number,x=x,y=y),size=5,hjust=-.4,vjust=.4,col="darkred")+
  annotate("text",x=0.13,y=0.47,label="wet",size=5)+
  annotate(geom="text",x=0.39,y=-0.02,label="hot",size=5)+
  annotate(geom="text",x=0.1,y=-0.47,label="dry",size=5)+
  annotate(geom="text",x=-0.4,y= -0.02,label="cold",size=5)+
  ylab("PC2") +
  xlab("PC1") + 
  scale_y_continuous(limits=c(-.5,.5),expand=c(0,0))+
  scale_x_continuous(limits=c(-.5,.5),expand=c(0,0))+
  theme(text = element_text(size=20,color="black"),legend.position = "none",
        plot.margin = unit(c(5.5,7,70,5.5),"points"), axis.text.x = element_text(angle = 45, hjust = 1, vjust= 1, color="black"), axis.text.y = element_text(color="black"),panel.spacing.x = unit(20, "points"))


```


```{r}
#stats on median vectors
for(i in 1:length(median_vectors.list)){
median_vectors.list[[i]]$comparison <- names(median_vectors.list)[i]
}

median_vectors_all <- do.call("rbind", median_vectors.list) %>% 
  separate(comparison,into=c("species","disturbance","extra"),sep="_") %>% 
  left_join(species.names,by=c('species'="species.code")) %>% 
  filter(!(disturbance=="fire" & species.name %in% insectonlyspp)) %>% 
  filter(!(disturbance=="insect.disease" & species.name %in% fireonlyspp)) %>% 
  filter(!species.name=="singleleaf pinyon")

#distance model 8 spp
distmodmed.8spp <- aov(log(medDist) ~ factor(disturbance), data=median_vectors_all %>% filter(!species.name %in% c(insectonlyspp)) %>%
filter(!species.name %in% c(fireonlyspp)))
par(mfrow=c(2,2))
plot(distmodmed.8spp)
summary(distmodmed.8spp)
TukeyHSD(distmodmed.8spp)

anova.stats.meddist <- data.frame(disturbance=c("fire","insect.disease","none"), y=c(.5,.5,.5),letter=c("A","A","A"))

distance.boxplot<- ggplot(median_vectors_all %>% filter(!species.name %in% c(insectonlyspp)) %>% mutate(disturbance = factor(disturbance, levels=c("fire","insect.disease","none"), labels=c("fire","insect/\ndisease","none"))), aes(x=disturbance, y=medDist, fill=disturbance))+
  geom_boxplot()+
  ylim(c(0,0.42))+
  scale_fill_manual(values=c("lightgray","darkgray","white"))+
  ylab("Median distance")+
  xlab("")+
  geom_text(data= anova.stats.meddist %>% mutate(disturbance = factor(disturbance, levels=c("fire","insect.disease","none"), labels=c("fire","insect/\ndisease","none"))), aes(x=disturbance,y=y,label=letter),size=5)+
  annotate(geom="text",x=1,y=0.04,label="p = 0.05",size=5)+
  theme_bw()+
  theme(text=element_text(size=20, color="black"),axis.text = element_text(color="black"),axis.text.x = element_text(color="black"),legend.position = "none")


median_vectors_all_blank <- median_vectors_all %>% 
  mutate(medDist = 5)

distance.boxplot_blank<- ggplot(median_vectors_all_blank %>% filter(!species.name %in% c(insectonlyspp)) %>% mutate(disturbance = factor(disturbance, levels=c("fire","insect.disease","none"), labels=c("fire","insect/\ndisease","none"))), aes(x=disturbance, y=medDist, fill=disturbance))+
  geom_boxplot()+
  ylim(c(0,0.42))+
  scale_fill_manual(values=c("lightgray","darkgray","white"))+
  ylab("Median distance")+
  xlab("")+
  #geom_text(data= anova.stats.meddist %>% mutate(disturbance = factor(disturbance, levels=c("fire","insect.disease","none"), labels=c("fire","insect/\ndisease","none"))), aes(x=disturbance,y=y,label=letter),size=5)+
  #annotate(geom="text",x=1,y=0.04,label="p = 0.05",size=5)+
  theme_bw()+
  theme(text=element_text(size=20, color="black"),axis.text = element_text(color="black"),axis.text.x = element_text(color="black"),legend.position = "none")


#angle model 8 spp
angmodmed <- aov(medAngle ~ factor(disturbance), data=median_vectors_all %>% filter(!species.name %in% c(insectonlyspp)) %>%
filter(!species.name %in% c(fireonlyspp)))
par(mfrow=c(2,2))
plot(angmodmed)
summary(angmodmed)
#TukeyHSD(angmodmed)

anova.stats.medang <- data.frame(disturbance=c("fire","insect.disease","none"), y=c(2.8,2.8,2.8),letter=c("A","A","A"))

median_vectors_all2<- median_vectors_all %>% 
  add_row(disturbance = "x",)

angle.boxplot<- ggplot(median_vectors_all2, aes(x=factor(disturbance, levels=c("fire","insect.disease","none"),labels=c("fire","insect/\ndisease","none")), y=medAngle, fill=disturbance))+
  annotate("rect",xmin = -Inf, xmax = Inf, ymin=0.79, ymax = 2.35, fill=rect.colors[3], alpha = 0.5)+
  annotate("rect",xmin = -Inf, xmax = Inf, ymin=-0.79, ymax = 0.79, fill=rect.colors[4], alpha = 0.5)+
  annotate("rect",xmin = -Inf, xmax = Inf, ymin=-2.35, ymax = -0.79, fill=rect.colors[1], alpha = 0.5)+
  annotate("rect",xmin = -Inf, xmax = Inf, ymin=-3.14, ymax = -2.35, fill=rect.colors[2], alpha = 0.5)+
  annotate("rect",xmin = -Inf, xmax = Inf, ymin=2.35, ymax = 3.14, fill=rect.colors[2], alpha = 0.5)+
  annotate("text",x="fire",y=-3, label="p = 0.15", hjust=1, size=5)+
  geom_text(aes(x="x",y=2.745),label="hot/dry",size=5)+
  geom_text(aes(x="x",y=1.7),label="cold/dry",size=5)+
  geom_text(aes(x="x",y=0.1),label="cold/wet",size=5)+
  geom_text(aes(x="x",y=-1.7),label="hot/wet",size=5)+
  geom_text(aes(x="x",y=-2.745),label="hot/dry",size=5)+
  geom_boxplot()+
  geom_jitter(data=median_vectors_all,aes(x=factor(disturbance, levels=c("fire","insect.disease","none"),labels=c("fire","insect/\ndisease","none")), y=medAngle),width=0.1,height=0.1)+
  ylab("Median angle (radians)")+
  xlab("")+
  labs(title="")+
  ylim(c(-3.14,3.14))+
  scale_fill_manual(values=c("lightgray","darkgray","white","white"))+
  #geom_text(data= anova.stats.medang, aes(x=factor(disturbance, levels=c("fire","insect.disease","none"),labels=c("fire","insect/\ndisease","none")),y=y,label=letter),size=5)+
  theme_bw()+
  theme(text=element_text(size=20, color="black"),axis.text = element_text(size=20,color="black"),legend.position = "none",axis.text.x = element_text(color=c(rep("black",3),"white"),size=16))

median_vectors_all2_blank <- median_vectors_all2 %>% 
  mutate(medAngle = 5)

angle.boxplot_blank<- ggplot(median_vectors_all2_blank, aes(x=factor(disturbance, levels=c("fire","insect.disease","none"),labels=c("fire","insect/\ndisease","none")), y=medAngle, fill=disturbance))+
  annotate("rect",xmin = -Inf, xmax = Inf, ymin=0.79, ymax = 2.35, fill=rect.colors[3], alpha = 0.5)+
  annotate("rect",xmin = -Inf, xmax = Inf, ymin=-0.79, ymax = 0.79, fill=rect.colors[4], alpha = 0.5)+
  annotate("rect",xmin = -Inf, xmax = Inf, ymin=-2.35, ymax = -0.79, fill=rect.colors[1], alpha = 0.5)+
  annotate("rect",xmin = -Inf, xmax = Inf, ymin=-3.14, ymax = -2.35, fill=rect.colors[2], alpha = 0.5)+
  annotate("rect",xmin = -Inf, xmax = Inf, ymin=2.35, ymax = 3.14, fill=rect.colors[2], alpha = 0.5)+
  geom_text(aes(x="x",y=2.745),label="hot/dry",size=5)+
  geom_text(aes(x="x",y=1.7),label="cold/dry",size=5)+
  geom_text(aes(x="x",y=0.1),label="cold/wet",size=5)+
  geom_text(aes(x="x",y=-1.7),label="hot/wet",size=5)+
  geom_text(aes(x="x",y=-2.745),label="hot/dry",size=5)+
  geom_boxplot()+
  #geom_jitter(data=median_vectors_all,aes(x=factor(disturbance, levels=c("fire","insect.disease","none"),labels=c("fire","insect/\ndisease","none")), y=medAngle),width=0.1,height=0.1)+
  ylab("Median angle (radians)")+
  xlab("")+
  ylim(c(-3.14,3.14))+
  scale_fill_manual(values=c("lightgray","darkgray","white","white"))+
  #geom_text(data= anova.stats.medang, aes(x=factor(disturbance, levels=c("fire","insect.disease","none"),labels=c("fire","insect/\ndisease","none")),y=y,label=letter),size=5)+
  theme_bw()+
  theme(text=element_text(size=20, color="black"),axis.text = element_text(size=20,color="black"),legend.position = "none",axis.text.x = element_text(color=c(rep("black",3),"white"),size=16))
```

```{r}
#combine plots
boxplots<- ggarrange(angle.boxplot,distance.boxplot,nrow=1,ncol=2, labels=c("B","C"), font.label = list(size=20))
arrowcentroidplot<- ggarrange(arrowsplot1,boxplots, nrow=2,ncol=1, heights = c(2,1), labels=c("A",""), font.label = list(size=20))
arrowcentroidplot

boxplots_blank <- ggarrange(distance.boxplot_blank,angle.boxplot_blank,nrow=2,ncol=1)
arrowcentroidplot_blank<- ggarrange(blank_arrows_plot,boxplots_blank, nrow=1,ncol=2, widths=c(2.6,1))

ggarrange(arrowsplot_fire,arrowsplot_insect.disease,arrowsplot_none, angle.boxplot,nrow=2,ncol=2,labels = c("A","B","C","D"),font.label = list(size=20))

pdf(file="figures/arrowsplot22_subset_box.pdf",width=13,height=15)

arrowcentroidplot

dev.off()

# png(file="figures/arrowsplot22_subset.png",width=1200,height=700)
# 
# arrowcentroidplot
# 
# dev.off()

# png(file="figures/arrowsplot22_blank.png",width=1200,height=500)
# 
# arrowcentroidplot_blank
# 
# dev.off()

pdf(file="figures/arrowsplot22_subset_nodist.pdf",width=15,height=13)

ggarrange(arrowsplot_fire,arrowsplot_insect.disease,arrowsplot_none, angle.boxplot,nrow=2,ncol=2,labels = c("A","B","C","D"),font.label = list(size=26))

dev.off()

```

```{r}
##looking at averages
for(i in 1:length(angles_distances.list)){
angles_distances.list[[i]]$comparison <- names(angles_distances.list)[i]
}

angles_distances_all <- do.call("rbind",angles_distances.list) %>% 
  separate(comparison,into=c("species","disturbance","extra"),sep="_") %>% 
  left_join(species.names,by=c('species'="species.code")) %>% 
  filter(!(disturbance=="fire" & species.name %in% insectonlyspp)) %>% 
  filter(!(disturbance=="insect.disease" & species.name %in% fireonlyspp)) %>% 
  filter(!species.name=="singleleaf pinyon")

angles_distances_ave <- angles_distances_all %>% 
  dplyr::group_by(species, disturbance) %>% 
  dplyr::summarise(ave.dist = mean(distances), ave.angles = mean(angles))
  
#distance model 
distmod <- aov(log(ave.dist) ~ factor(disturbance), data=angles_distances_ave)
par(mfrow=c(2,2))
plot(distmod)
summary(distmod)
TukeyHSD(distmod)

ggplot(angles_distances_ave, aes(x=disturbance, y=ave.dist))+
  geom_boxplot()

  
#angle model 
angmod <- aov(ave.angles ~ factor(disturbance), data=angles_distances_ave)
par(mfrow=c(2,2))
plot(angmod)
summary(angmod)
TukeyHSD(angmod)

ggplot(angles_distances_ave, aes(x=disturbance, y=ave.angles))+
  geom_boxplot()

```

###Niche-based Percentile Shifts

```{r}
#look at marginal posterior means
# ellipses.posterior.list
# 
# par(mfrow=c(4,3))
# 
# for(i in 1:length(ellipses.posterior.list)){
#   for(j in 1:2){
#     for(k in 1:6){
# 
# hist(ellipses.posterior.list[[i]][[j]][,k], breaks = 50, main=paste(names(ellipses.posterior.list)[i],j,sep="_"), xlab=colnames(ellipses.posterior.list[[i]][[j]])[k])
# abline(v=mean(ellipses.posterior.list[[i]][[j]][,k]), col="red", lwd=3)
#       
#     }
#   }
# }
# 
# par(mfrow=c(3,4))
# for(i in 1:length(ellipses.posterior.list)){
#     for(k in 5:6){
# hist(ellipses.posterior.list[[i]][[1]][,k], breaks = 50, main=paste(names(ellipses.posterior.list)[i]), xlab=colnames(ellipses.posterior.list[[i]][[1]])[k],col=rgb(1,0,0,0.5))
# hist(ellipses.posterior.list[[i]][[2]][,k], breaks = 50,  col=rgb(0,0,1,0.3), add=T)
# abline(v=mean(ellipses.posterior.list[[i]][[1]][,k]), col="red", lwd=3)
# abline(v=mean(ellipses.posterior.list[[i]][[2]][,k]), col="blue", lwd=3)
#     }
# }
# 
# posteriorlistcut<- ellipses.posterior.list#[-c(24,25)]
# 
# length(posteriorlistcut[grepl("fire", names(posteriorlistcut))])
# names_df <- data.frame(longname = names(posteriorlistcut)) %>% 
#   separate(longname,into=c("species","disturbance","extra"), sep="_") %>% 
#   left_join(species.names, by=c("species" = "species.code")) %>% 
#   mutate(plotnames = paste(species.name,disturbance, sep="_"))
# 
# pc_names <- c("PC2","PC1")  
# 
# par(mfrow=c(4,2))
# for(i in 1:length(posteriorlistcut[grepl("fire", names(posteriorlistcut))])
# ){
#     for(k in 5:6){
# png(filename = paste((names_df %>% filter(disturbance=="fire") %>% pull(plotnames))[i],"hist","2022subset",pc_names[7-k],".png",sep="_"))
# 
# hist(posteriorlistcut[grepl("fire", names(posteriorlistcut))][[i]][[1]][,k],
#      breaks = 50, 
#      main=paste((names_df %>% filter(disturbance=="fire") %>% pull(plotnames))[i]), 
#      xlab=pc_names[7-k],col=rgb(0,0,0,0.7),
#      xlim=c(min(c(posteriorlistcut[grepl("fire", names(posteriorlistcut))][[i]][[1]][,k],posteriorlistcut[grepl("fire", names(posteriorlistcut))][[i]][[2]][,k])),max(c(posteriorlistcut[grepl("fire", names(posteriorlistcut))][[i]][[1]][,k],posteriorlistcut[grepl("fire", names(posteriorlistcut))][[i]][[2]][,k]))), freq=F)
# 
# hist(posteriorlistcut[grepl("fire", names(posteriorlistcut))][[i]][[2]][,k],
#      breaks = 50,  
#      col=rgb(0.6,0,0,0.8), freq=F,add=T)
# 
# abline(v=mean(posteriorlistcut[grepl("fire", names(posteriorlistcut))][[i]][[1]][,k]), col="black", lwd=3)
# abline(v=mean(posteriorlistcut[grepl("fire", names(posteriorlistcut))][[i]][[2]][,k]), col="red", lwd=3)
# 
# legend("topright", c("Seedling", "Adult"), col = c("red","black"), lwd=3)
# 
# dev.off()
#     }
# }
# 
# 
# par(mfrow=c(4,2))
# for(i in 1:length(posteriorlistcut[grepl("insect.disease", names(posteriorlistcut))])
# ){
#     for(k in 5:6){
# png(filename = paste((names_df %>% filter(disturbance=="insect.disease") %>% pull(plotnames))[i],"hist","2022subset",pc_names[7-k],".png",sep="_"))
# 
# hist(posteriorlistcut[grepl("insect.disease", names(posteriorlistcut))][[i]][[1]][,k], 
#      breaks = 50, 
#      main=paste((names_df %>% filter(disturbance=="insect.disease") %>% pull(plotnames))[i]), 
#      xlab=pc_names[7-k],
#      col=rgb(0,0,0,0.7),
#      xlim=c(min(c(posteriorlistcut[grepl("insect.disease", names(posteriorlistcut))][[i]][[1]][,k],posteriorlistcut[grepl("insect.disease", names(posteriorlistcut))][[i]][[2]][,k])),max(c(posteriorlistcut[grepl("insect.disease", names(posteriorlistcut))][[i]][[1]][,k],posteriorlistcut[grepl("insect.disease", names(posteriorlistcut))][[i]][[2]][,k]))), freq=F)
# 
# hist(posteriorlistcut[grepl("insect.disease", names(posteriorlistcut))][[i]][[2]][,k], 
#      breaks = 50,  
#      col=rgb(0.2,0.6,0.8,0.8), 
#      freq=F,add=T)
# 
# abline(v=mean(posteriorlistcut[grepl("insect.disease", names(posteriorlistcut))][[i]][[1]][,k]), col="black", lwd=3)
# abline(v=mean(posteriorlistcut[grepl("insect.disease", names(posteriorlistcut))][[i]][[2]][,k]), col="blue", lwd=3)
# 
# legend("topright", c("Seedling", "Adult"), col = c("blue","black"), lwd=3)
# 
# dev.off()
#     }
# }
# 
# par(mfrow=c(4,2))
# for(i in 1:length(posteriorlistcut[grepl("none", names(posteriorlistcut))])
# ){
#     for(k in 5:6){
# png(filename = paste((names_df %>% filter(disturbance=="none") %>% pull(plotnames))[i],"hist","2022subset",pc_names[7-k],".png",sep="_"))
# 
# hist(posteriorlistcut[grepl("none", names(posteriorlistcut))][[i]][[1]][,k], 
#      breaks = 50, 
#      main=paste((names_df %>% filter(disturbance=="none") %>% pull(plotnames))[i]), 
#      xlab=pc_names[7-k],
#      col=rgb(0,0,0,0.7),
#      xlim=c(min(c(posteriorlistcut[grepl("none", names(posteriorlistcut))][[i]][[1]][,k],posteriorlistcut[grepl("none", names(posteriorlistcut))][[i]][[2]][,k])),max(c(posteriorlistcut[grepl("none", names(posteriorlistcut))][[i]][[1]][,k],posteriorlistcut[grepl("none", names(posteriorlistcut))][[i]][[2]][,k]))), freq=F)
# 
# hist(posteriorlistcut[grepl("none", names(posteriorlistcut))][[i]][[2]][,k], 
#      breaks = 50,  
#      col=rgb(0,0,0,0.3), 
#      freq=F,add=T)
# 
# abline(v=mean(posteriorlistcut[grepl("none", names(posteriorlistcut))][[i]][[1]][,k]), col="black", lwd=3)
# abline(v=mean(posteriorlistcut[grepl("none", names(posteriorlistcut))][[i]][[2]][,k]), col="black", lwd=3, lty=4)
# 
# legend("topright", c("Seedling", "Adult"), col = c("black","black"), lty=c(4,1), lwd=3)
# 
# dev.off()
#     }
# }

```

```{r}
#shifts in range margins using mean posterior estimates
adultmeans <- data.frame()
for(i in 1:length(ellipses.posterior.list)){
 adultmean <- as.data.frame(ellipses.posterior.list[[i]][[1]]) %>% 
  colMeans() %>% 
   as.data.frame.list()
 
 adultmeans <- rbind(adultmeans,adultmean)
 }
adultmeans$name = names(ellipses.posterior.list)
adultmeans$age = "adult"

seedmeans <- data.frame()
for(i in 1:length(ellipses.posterior.list)){
 seedmean <- as.data.frame(ellipses.posterior.list[[i]][[2]]) %>% 
  colMeans() %>% 
   as.data.frame.list()
 
 seedmeans <- rbind(seedmeans,seedmean)
 }
seedmeans$name = names(ellipses.posterior.list)
seedmeans$age = "seedling"

posteriormeans <- adultmeans %>% 
  bind_rows(seedmeans)

pc1quants <- data.frame()
pc2quants <- data.frame()

for(i in 1:nrow(posteriormeans)){
set.seed(583)
quants1<- quantile(mvrnorm(n=5000,mu=as.vector(c(posteriormeans[i,]$mu.1.,posteriormeans[i,]$mu.2.)), Sigma = matrix(as.vector(unlist(posteriormeans[i,1:4])),nrow=2,ncol=2, byrow = FALSE))[,1],c(0.025,0.05,0.95,0.975))

pc1quants <- rbind(pc1quants, quants1)

set.seed(583)
quants2<- quantile(mvrnorm(n=5000,mu=as.vector(c(posteriormeans[i,]$mu.1.,posteriormeans[i,]$mu.2.)), Sigma = matrix(as.vector(unlist(posteriormeans[i,1:4])),nrow=2,ncol=2, byrow = FALSE))[,2],c(0.025,0.05,0.95,0.975))

pc2quants <- rbind(pc2quants, quants2)
}

posteriorquants<- posteriormeans %>% 
 mutate(pc1_2.5 = pc1quants[,1], pc1_5 = pc1quants[,2], pc1_95 = pc1quants[,3], pc1_97.5 = pc1quants[,4],
        pc2_2.5 = pc2quants[,1], pc2_5 = pc2quants[,2], pc2_95 = pc2quants[,3], pc2_97.5 = pc2quants[,4]) 

quants_wide <- posteriorquants %>% 
  filter(age=="adult") %>% 
  rename_all(paste0, "adult") %>% 
  left_join(posteriorquants %>% filter(age=="seedling") %>% rename_all(paste0, "seed"),
            by=c("nameadult" = "nameseed")) %>% 
  mutate(pc1diff_97.5 = (pc1_97.5seed - pc1_97.5adult)/(pc1_97.5adult - pc1_2.5adult),
         pc1diff_2.5 = (pc1_2.5seed - pc1_2.5adult)/(pc1_97.5adult - pc1_2.5adult),
         pc1diff_5 = (pc1_5seed - pc1_5adult)/(pc1_95adult - pc1_5adult),
         pc1diff_95 = (pc1_95seed - pc1_95adult)/(pc1_95adult - pc1_5adult),
         pc2diff_97.5 = (pc2_97.5seed - pc2_97.5adult)/(pc2_97.5adult - pc2_2.5adult),
         pc2diff_2.5 = (pc2_2.5seed - pc2_2.5adult)/(pc2_97.5adult - pc2_2.5adult),
         pc2diff_5 = (pc2_5seed - pc2_5adult)/(pc2_95adult - pc2_5adult),
         pc2diff_95 = (pc2_95seed - pc2_95adult)/(pc2_95adult - pc2_5adult)) %>% 
  separate(nameadult,into=c("species","disturbance","extra"), sep="_") %>% 
  dplyr::filter(! species == 133)
```


```{r}
#check for normality
disturbance.list = unique(quants_wide$disturbance)
quants<- colnames(quants_wide)[34:41]
pvalues<- data.frame()
for(i in 1:length(disturbance.list)){
  for(j in 1:length(quants)){
test<- shapiro.test(quants_wide %>% filter(disturbance == disturbance.list[i]) %>% pull(quants[j]))
pvalues <- rbind(pvalues,c(test$p, paste(quants[j]),paste(disturbance.list[i])))
  }}
colnames(pvalues)= c("pvalue","quant","dist")
pvalues %>% 
  filter(pvalue < 0.05)

```


```{r}
#t-tests/Wilcoxen test of quantile differences

ttest.results <- data.frame(agent = NA, quant = NA, p.value = NA, estimate = NA, lowerci = NA, upperci = NA, t=NA, df=NA, wilcoxon=NA)
quant.list = quants
  
for(i in 1:length(disturbance.list)){
  for(j in 1:length(quant.list)){
test.temp<- quants_wide %>% filter(disturbance == disturbance.list[i]) %>% 
  pull(quant.list[j]) %>% 
  t.test(y = NULL, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95)
  
ttest.results<- rbind(ttest.results,c(agent =  disturbance.list[i], quant = quant.list[j], p.value = test.temp$p.value, estimate = test.temp$estimate, lowerci = test.temp$conf.int[1], upperci = test.temp$conf.int[2], t=test.temp$statistic, df=test.temp$parameter, wilcoxon="n"))
  }}

##Wilcoxen Tests

wilcox.results <- data.frame(agent = NA, quant = NA, p.value = NA, estimate = NA, lowerci = NA, upperci = NA, t=NA, df=NA, wilcoxon=NA)
quant.list = quants
  
for(i in 1:length(disturbance.list)){
  for(j in 1:length(quant.list)){
test.temp<- quants_wide %>% filter(disturbance == disturbance.list[i]) %>% 
  pull(quant.list[j]) %>% 
  wilcox.test(y = NULL, alternative = "two.sided", mu = 0, conf.int = TRUE, conf.level = 0.95)
wilcox.results<- rbind(wilcox.results,c(agent =  disturbance.list[i], quant = quant.list[j], p.value = test.temp$p.value, estimate = test.temp$estimate, lowerci = test.temp$conf.int[1], upperci = test.temp$conf.int[2], t=test.temp$statistic, df=NA, wilcoxon="y"))
  }}

wilcox.results.cut <- wilcox.results %>% 
  left_join(pvalues, by= c("agent"="dist","quant")) %>% 
  filter(pvalue<0.05) %>% 
  dplyr::select(-pvalue)
  
#update ttest results with wilcoxen results for non-normal data
ttest.results<- ttest.results %>% 
    rows_update(wilcox.results.cut, by=c("agent","quant"))

```

```{r}
#shifts in median
centroidshifts<- for_arrows.all %>% 
  filter(!y==0) %>% 
  left_join(quants_wide, by=c("species","disturbance")) %>% 
  mutate(pc1centroiddiff = x/(pc1_97.5adult - pc1_2.5adult),
         pc2centroiddiff = y/(pc2_97.5adult - pc2_2.5adult))

#check for normality
cenpvalues1<- data.frame()
for(i in 1:length(disturbance.list)){
test<- shapiro.test(centroidshifts %>% filter(disturbance == disturbance.list[i]) %>% pull(pc1centroiddiff))
cenpvalues1 <- rbind(cenpvalues1,c(test$p,paste(disturbance.list[i])))
}
cenpvalues1
cenpvalues2<- data.frame()
for(i in 1:length(disturbance.list)){
test<- shapiro.test(centroidshifts %>% filter(disturbance == disturbance.list[i]) %>% pull(pc2centroiddiff))
cenpvalues2 <- rbind(cenpvalues2,c(test$p,paste(disturbance.list[i])))
}
cenpvalues2

#filter data to those that have non-normal data
shapirotest_nonnormal <- rbind(cenpvalues1 %>% dplyr::rename(pvalue = colnames(cenpvalues1)[1], disturbance = colnames(cenpvalues1)[2]),cenpvalues2 %>%  dplyr::rename(pvalue = colnames(cenpvalues2)[1], disturbance = colnames(cenpvalues2)[2])) %>% mutate(pc = c(rep("pc1",3),rep("pc2",3))) %>% 
  filter(pvalue < 0.05)

##pc2 none is not normal

centroid.results.pc1<- data.frame(agent = NA,pc=NA, quant = NA, p.value = NA, estimate = NA, lowerci = NA, upperci = NA, t=NA, df=NA, wilcoxon=NA)
centroid.results.pc2<- data.frame(agent = NA,pc=NA, quant = NA, p.value = NA, estimate = NA, lowerci = NA, upperci = NA, t=NA, df=NA, wilcoxon=NA)
disturbance.list = c("fire","insect.disease","none")

for(i in 1:length(disturbance.list)){
test<- t.test(centroidshifts %>% filter(disturbance == disturbance.list[i]) %>% pull(pc1centroiddiff))
  centroid.results.pc1[i,]<- c(agent = disturbance.list[i],pc="pc1", quant = "centroid", p.value = test$p.value, estimate = test$estimate, lowerci = test$conf.int[1], upperci = test$conf.int[2], t=test$statistic, df=test$parameter, wilcoxon="n")}

for(i in 1:length(disturbance.list)){
test<- t.test(centroidshifts %>% filter(disturbance == disturbance.list[i]) %>% pull(pc2centroiddiff))

  centroid.results.pc2[i,]<- c(agent = disturbance.list[i],pc="pc2", quant = "centroid", p.value = test$p.value, estimate = test$estimate, lowerci = test$conf.int[1], upperci = test$conf.int[2], t=test$statistic, df=test$parameter, wilcoxon="n")}

### do wilcoxen test for pc2 none centroid because data was nonnormal
wilcox.results.centroid<-data.frame(agent = NA,pc=NA, quant = NA, p.value = NA, estimate = NA, lowerci = NA, upperci = NA, t=NA, df=NA, wilcoxon=NA)
for(i in 1:length(shapirotest_nonnormal$disturbance)){
test<- wilcox.test(centroidshifts %>% filter(disturbance == shapirotest_nonnormal$disturbance[i]) %>% pull(pc2centroiddiff), y=NULL, alternative="two.sided", mu=0, conf.int = TRUE, conf.level= 0.95)

wilcox.results.centroid[i,]<- c(agent = shapirotest_nonnormal$disturbance[i],pc=shapirotest_nonnormal$pc[i], quant = "centroid",p.value = test$p.value, estimate = test$estimate, lowerci = test$conf.int[1], upperci = test$conf.int[2], t=test$statistic, df=NA, wilcoxon="y")}

centroid.results.pc1<- centroid.results.pc1 %>% 
  rows_update(wilcox.results.centroid, by=c("agent","pc","quant"),unmatched = "ignore") %>% 
mutate(estimate=as.numeric(estimate),
         upperci = as.numeric(upperci),
         lowerci = as.numeric(lowerci),
         p.value = as.numeric(p.value),
         t = as.numeric(t))
centroid.results.pc2<- centroid.results.pc2 %>% 
  rows_update(wilcox.results.centroid, by=c("agent","pc","quant"),unmatched = "ignore") %>% 
mutate(estimate=as.numeric(estimate),
         upperci = as.numeric(upperci),
         lowerci = as.numeric(lowerci),
         p.value = as.numeric(p.value),
         t = as.numeric(t))

percentile.results<- ttest.results %>% 
  separate(quant,into=c("pc","quant"),sep="_") %>% 
  mutate(pc = substr(pc,1,3)) %>% 
  filter(!is.na(agent)) %>% 
  mutate(estimate=as.numeric(estimate),
         upperci = as.numeric(upperci),
         lowerci = as.numeric(lowerci),
         p.value = as.numeric(p.value),
         t = as.numeric(t)) %>% 
  bind_rows(centroid.results.pc1,centroid.results.pc2 %>% mutate(estimate=as.numeric(estimate),
         upperci = as.numeric(upperci),
         lowerci = as.numeric(lowerci),
         p.value = as.numeric(p.value))) %>%  #add in centroids
  mutate(quant = factor(quant, levels=c("2.5","5","centroid","95","97.5")))
```

```{r}
#ANOVAs
centroidshifts
#check for significant differences in PC1 2.5th percentile by disturbance type
pc1.mod2.5 <- lm(pc1diff_2.5 ~ factor(disturbance), data=centroidshifts %>% filter(!species.name %in% c(insectonlyspp,fireonlyspp)))
par(mfrow=c(2,2))
plot(pc1.mod2.5)
leveneTest(pc1diff_2.5 ~ factor(disturbance), data=centroidshifts %>% filter(!species.name %in% c(insectonlyspp,fireonlyspp)))
anova(pc1.mod2.5)
#check for significant differences in PC2 2.5th percentile by disturbance type
pc2.mod2.5 <- lm(pc2diff_2.5 ~ factor(disturbance), data=centroidshifts %>% filter(!species.name %in% c(insectonlyspp,fireonlyspp)))
par(mfrow=c(2,2))
plot(pc2.mod2.5)
leveneTest(pc2diff_2.5 ~ factor(disturbance), data=centroidshifts %>% filter(!species.name %in% c(insectonlyspp,fireonlyspp)))
anova(pc2.mod2.5)
TukeyHSD(aov(pc2diff_2.5 ~ factor(disturbance), data=centroidshifts %>% filter(!species.name %in% c(insectonlyspp,fireonlyspp))))

#check for significant differences in PC1 centroid by disturbance type
#THIS IS SIGNIFICANT
pc1.modcent <- lm(pc1centroiddiff ~ factor(disturbance), data=centroidshifts %>% filter(!species.name %in% c(insectonlyspp,fireonlyspp)))
par(mfrow=c(2,2))
plot(pc1.modcent)
leveneTest(pc1centroiddiff ~ factor(disturbance), data=centroidshifts %>% filter(!species.name %in% c(insectonlyspp,fireonlyspp)))
anova(pc1.modcent)
TukeyHSD(aov(pc1centroiddiff ~ factor(disturbance), data=centroidshifts %>% filter(!species.name %in% c(insectonlyspp,fireonlyspp))))
#check for significant differences in PC2 centroid by disturbance type
pc2.modcent <- lm(pc2centroiddiff ~ factor(disturbance), data=centroidshifts %>% filter(!species.name %in% c(insectonlyspp,fireonlyspp)))
par(mfrow=c(2,2))
plot(pc2.modcent)
leveneTest(pc2centroiddiff ~ factor(disturbance), data=centroidshifts %>% filter(!species.name %in% c(insectonlyspp,fireonlyspp)))
anova(pc2.modcent)
TukeyHSD(aov(pc2centroiddiff ~ factor(disturbance), data=centroidshifts %>% filter(!species.name %in% c(insectonlyspp,fireonlyspp))))

#check for significant differences in PC1 97.5 by disturbance type
pc1.mod97.5 <- lm(pc1diff_97.5 ~ factor(disturbance), data=centroidshifts %>% filter(!species.name %in% c(insectonlyspp,fireonlyspp)))
par(mfrow=c(2,2))
plot(pc1.mod97.5)
leveneTest(pc1diff_97.5 ~ factor(disturbance), data=centroidshifts %>% filter(!species.name %in% c(insectonlyspp,fireonlyspp)))
anova(pc1.mod97.5)
#check for significant differences in PC2 97.5 by disturbance type
pc2.mod97.5 <- lm(pc2diff_97.5 ~ factor(disturbance), data=centroidshifts %>% filter(!species.name %in% c(insectonlyspp,fireonlyspp)))
par(mfrow=c(2,2))
plot(pc2.mod97.5)
leveneTest(pc2diff_97.5 ~ factor(disturbance), data=centroidshifts %>% filter(!species.name %in% c(insectonlyspp,fireonlyspp)))
anova(pc2.mod97.5)

```

```{r}
#plot new figure 2

##dataframe for stars
pvalues.percentileshifts<- percentile.results %>% 
  mutate(sym.pos = ifelse(p.value<0.05,0.15,NA))
##

ggplot(percentile.results %>% filter(pc=="pc1"), aes(x=agent, y=estimate, color = agent, fill=agent))+
  geom_point(shape = 23, size = 3.5)+
  geom_errorbar(aes(ymin=lowerci, ymax=upperci),width=0.3, size=1)+
  #scale_color_manual(values = c("#d7191c", "#fdae61","#2b83ba"))+
  #scale_fill_manual(values = c("#d7191c", "#fdae61","#2b83ba"))+
  scale_color_manual(values = disturbance.colors)+
  scale_fill_manual(values = disturbance.colors)+
  geom_text(data = subset(pvalues.percentileshifts %>% filter(pc=="pc1"), !is.na(sym.pos)), aes(x=agent, y=sym.pos), label="*", color="black",size=8)+
  geom_hline(yintercept=0, color="black", lty=2, size=1)+
  ylim(c(-.3,.3))+
  coord_flip()+
facet_wrap(~ factor(quant,levels=c("2.5","5","centroid","95","97.5")))+
  ggtitle("Shifts on PC1")+
  ylab("Difference on PC1")+
  xlab("")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust = 1), axis.text = element_text(size=12, color="black"), text = element_text(size=12),legend.text = element_text(size=12),strip.text = element_text(color="black",size=12), legend.position = "none")

ggplot(percentile.results %>% filter(pc=="pc2"), aes(x=agent, y=estimate, color = agent, fill=agent))+
  geom_point(shape = 23, size = 3.5)+
  geom_errorbar(aes(ymin=lowerci, ymax=upperci),width=0.3, size=1)+
  #scale_color_manual(values = c("#d7191c", "#fdae61","#2b83ba"))+
  #scale_fill_manual(values = c("#d7191c", "#fdae61","#2b83ba"))+
  scale_color_manual(values = disturbance.colors)+
  scale_fill_manual(values = disturbance.colors)+
  geom_text(data = subset(pvalues.percentileshifts %>% filter(pc=="pc2"), !is.na(sym.pos)), aes(x=agent, y=sym.pos), label="*", color="black",size=8)+
  geom_hline(yintercept=0, color="black", lty=2, size=1)+
  ylim(c(-.3,.3))+
  facet_wrap(~ factor(quant,levels=c("2.5","5","centroid","95","97.5")))+
  ggtitle("Shifts on PC2")+
  ylab("Difference on PC2")+
  xlab("")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust = 1), axis.text = element_text(size=12, color="black"), text = element_text(size=12),legend.text = element_text(size=12),strip.text = element_text(color="black",size=12), legend.position = "none")

##plot just 95% estimates

anova.letters.pc1<- data.frame(quant = rep(c("2.5","centroid","97.5"),3), agent=c(rep("fire",3),rep("insect.disease",3),rep("none",3)), letters=c("A","A","A","A","AB","A","A","B","A"))

anova.letters.pc2<- data.frame(quant = rep(c("2.5","centroid","97.5"),3), agent=c(rep("fire",3),rep("insect.disease",3),rep("none",3)), letters=c("A","AB","A","A","A","A","A","B","A"))

pc1nicheshiftsplot<- ggplot(percentile.results %>% filter(pc=="pc1") %>% filter(!quant %in% c("5","95")), aes(x=agent, y=estimate, color = agent, fill=agent))+
  geom_point(shape = 23, size = 3.5)+
  geom_errorbar(aes(ymin=lowerci, ymax=upperci),width=0.3, size=1)+
  #scale_color_manual(values = c("#d7191c", "#fdae61","#2b83ba"))+
  #scale_fill_manual(values = c("#d7191c", "#fdae61","#2b83ba"))+
  scale_color_manual(values = disturbance.colors)+
  scale_fill_manual(values = disturbance.colors)+
  geom_text(data = subset(pvalues.percentileshifts %>% filter(pc=="pc1") %>% filter(!quant %in% c("5","95")), !is.na(sym.pos)), aes(x=agent, y=sym.pos), label="*", color="black",size=8)+
  geom_hline(yintercept=0, color="black", lty=2, size=1)+
  ylim(c(-.21,.2))+
  #coord_flip()+
facet_wrap(~ factor(quant,levels=c("2.5","centroid","97.5"),labels=c("hot/dry edge (2.5)", "centroid", "cold/wet edge (97.5)")))+
  geom_segment(aes(x=0.8,xend=3.2,y=-.15,yend=-.15),size=1,color="azure4")+
  geom_text(data=anova.letters.pc1, aes(x=agent, y=-0.19,fill=NULL, label=letters), parse=TRUE, col="azure4", size=5)+
  ggtitle("Shifts on PC1")+
  ylab("Difference on PC1")+
  xlab("")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust = 1), axis.text = element_text(size=12, color="black"), text = element_text(size=12),legend.text = element_text(size=12),strip.text = element_text(color="black",size=12), legend.position = "none")

percentile.results_blank = percentile.results %>% 
  mutate(estimate=5)

ggplot(percentile.results_blank %>% filter(pc=="pc1") %>% filter(!quant %in% c("5","95")), aes(x=agent, y=estimate, color = agent, fill=agent))+
  geom_point(shape = 23, size = 3.5)+
  #geom_errorbar(aes(ymin=lowerci, ymax=upperci),width=0.3, size=1)+
  #scale_color_manual(values = c("#d7191c", "#fdae61","#2b83ba"))+
  #scale_fill_manual(values = c("#d7191c", "#fdae61","#2b83ba"))+
  scale_color_manual(values = disturbance.colors)+
  scale_fill_manual(values = disturbance.colors)+
  #geom_text(data = subset(pvalues.percentileshifts %>% filter(pc=="pc1") %>% filter(!quant %in% c("5","95")), !is.na(sym.pos)), aes(x=agent, y=sym.pos), label="*", color="black",size=8)+
  geom_hline(yintercept=0, color="black", lty=2, size=1)+
  ylim(c(-.21,.2))+
  coord_flip()+
facet_wrap(~ factor(quant,levels=c("2.5","centroid","97.5")))+
  #geom_segment(aes(x=0.8,xend=3.2,y=-.15,yend=-.15),size=1,color="azure4")+
  #geom_text(data=anova.letters.pc1, aes(x=agent, y=-0.19,fill=NULL, label=letters), parse=TRUE, col="azure4", size=5)+
  ggtitle("Shifts on PC1")+
  ylab("Difference on PC1")+
  xlab("")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust = 1), axis.text = element_text(size=12, color="black"), text = element_text(size=12),legend.text = element_text(size=12),strip.text = element_text(color="black",size=12), legend.position = "none")

pc2nicheshiftsplot<- ggplot(percentile.results %>% filter(pc=="pc2") %>% filter(!quant %in% c("5","95")), aes(x=agent, y=estimate, color = agent, fill=agent))+
  geom_point(shape = 23, size = 3.5)+
  geom_errorbar(aes(ymin=lowerci, ymax=upperci),width=0.3, size=1)+
  #scale_color_manual(values = c("#d7191c", "#fdae61","#2b83ba"))+
  #scale_fill_manual(values = c("#d7191c", "#fdae61","#2b83ba"))+
  scale_color_manual(values = disturbance.colors)+
  scale_fill_manual(values = disturbance.colors)+
  geom_text(data = subset(pvalues.percentileshifts %>% filter(pc=="pc2") %>% filter(!quant %in% c("5","95")), !is.na(sym.pos)), aes(x=agent, y=sym.pos), label="*", color="black",size=8)+
  geom_hline(yintercept=0, color="black", lty=2, size=1)+
  ylim(c(-.135,.17))+
  facet_wrap(~ factor(quant,levels=c("2.5","centroid","97.5"),labels=c("hot/wet edge (2.5)", "centroid", "cold/dry edge (97.5)")))+
  geom_segment(aes(x=0.8,xend=3.2,y=-.09,yend=-.09),size=1,color="azure4")+
  geom_text(data=anova.letters.pc2, aes(x=agent, y=-0.12,fill=NULL, label=letters), parse=TRUE, col="azure4", size=5)+
  #coord_flip()+
  ggtitle("Shifts on PC2")+
  ylab("Difference on PC2")+
  xlab("")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust = 1), axis.text = element_text(size=12, color="black"), text = element_text(size=12),legend.text = element_text(size=12),strip.text = element_text(color="black",size=12),legend.position = 'none' #,plot.margin=margin(5,5,5,60)
        )

ggplot(percentile.results_blank %>% filter(pc=="pc2") %>% filter(!quant %in% c("5","95")), aes(x=agent, y=estimate, color = agent, fill=agent))+
  geom_point(shape = 23, size = 3.5)+
  #geom_errorbar(aes(ymin=lowerci, ymax=upperci),width=0.3, size=1)+
  #scale_color_manual(values = c("#d7191c", "#fdae61","#2b83ba"))+
  #scale_fill_manual(values = c("#d7191c", "#fdae61","#2b83ba"))+
  scale_color_manual(values = disturbance.colors)+
  scale_fill_manual(values = disturbance.colors)+
  #geom_text(data = subset(pvalues.percentileshifts %>% filter(pc=="pc2") %>% filter(!quant %in% c("5","95")), !is.na(sym.pos)), aes(x=agent, y=sym.pos), label="*", color="black",size=8)+
  geom_hline(yintercept=0, color="black", lty=2, size=1)+
  ylim(c(-.135,.17))+
  facet_wrap(~ factor(quant,levels=c("2.5","centroid","97.5")))+
  #geom_segment(aes(x=0.8,xend=3.2,y=-.09,yend=-.09),size=1,color="azure4")+
  #geom_text(data=anova.letters.pc2, aes(x=agent, y=-0.12,fill=NULL, label=letters), parse=TRUE, col="azure4", size=5)+
  coord_flip()+
  ggtitle("Shifts on PC2")+
  ylab("Difference on PC2")+
  xlab("")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust = 1), axis.text = element_text(size=12, color="black"), text = element_text(size=12),legend.text = element_text(size=12),strip.text = element_text(color="black",size=12),legend.position = 'none' #,plot.margin=margin(5,5,5,60)
        )

#put them together
png("figures/percentile_shifts_figure_102722.png", width= 600, height = 600)

 ggarrange(pc1nicheshiftsplot, pc2nicheshiftsplot, nrow=2)

dev.off()
```


###Traits

Now I will look at the effect of species traits on the amount of expansion/contraction experienced between the adult and seedling niche. 

```{r, echo=FALSE, message=FALSE, results='hide'}
#first, make trait dataframe

#make shade tolerance dataframe from https://figshare.com/collections/TOLERANCE_TO_SHADE_DROUGHT_AND_WATERLOGGING_OF_TEMPERATE_NORTHERN_HEMISPHERE_TREES_AND_SHRUBS/3309258

trait.data <- data.frame(species.name = species.names$species.name,
                         shade.tolerance=c(4.33,4.01,4.83,1.48,1.35,4.53,1,1.44,1.48,1.56,1.64,1.17,2.78,4.73,1.21,2.09),
                         dispersal = c(rep("wind",3),"animal","wind","wind",rep("animal",2),"wind","animal","wind","animal",rep("wind",3),"animal"))

#read in seed weight data
#need to get weights for new species
seed.weight <- read_csv("TRY data/TRY_seed_edited.csv")

try.names <- data.frame(scientific.name = unique(seed.weight$AccSpeciesName), species.name = c("white fir","grand fir","subalpine fir","alligator juniper","Engelmann spruce","lodgepole pine","two needle pinyon","ponderosa pine","trembling aspen","Douglas-fir","Gambel oak","western redcedar","western larch","whitebark pine","limber pine"))

seed.weight.filter <- seed.weight %>%
  filter(is.na(OrigObsDataID)) %>% 
  filter(ErrorRisk < 4) %>% 
  distinct(ObservationID, .keep_all = TRUE) %>% 
  left_join(try.names, by=c("AccSpeciesName" = "scientific.name"))

nrow(seed.weight.filter)
length(unique(seed.weight.filter$ObservationID))

seed.weight.summary <- seed.weight.filter %>% 
  group_by(species.name) %>% 
  dplyr::summarise(mean.weight.mg = mean(StdValue))

seed.weight.filter %>% 
  group_by(species.name) %>% 
  dplyr::summarise(n=n()) %>% 
  arrange(desc(n))


#combine
trait.data<- trait.data %>% 
  left_join(seed.weight.summary, by="species.name") %>% 
  filter(!species.name == "singleleaf pinyon")

```

Let's look at the correlations between trait data.

```{r, echo=FALSE,}
#look at correlations between traits
cor(trait.data[,c(2,4)])
ggplot(trait.data,aes(x=shade.tolerance,y=mean.weight.mg))+
  geom_point()+
  geom_smooth(method="lm")
cor.test(trait.data$shade.tolerance,trait.data$mean.weight.mg)
#shade tolerance and mean weight are not significantly correlated.
```

My hypotheses are that shade tolerance and seed weight will significantly affect shift magnitude and that they will interact with disturbance type to produce different magnitude shifts. For example, I expect greater magnitude shifts for species with low shade tolerance in plots that have been burned or harvested, and to a lesser degree in plots that have been impacted by insects or disease. In contrast I expect that species with low shade tolerance will show lower magnitude shifts in areas that have not been disturbed compared to those with high shade tolerance. I expect seed weight to have a negative relationship with shift magnitude for all disturbance types, but I expect this relationship to be more pronounced in plots that have been burned or harvested. 

In order to test these hypotheses, I will model the amount of expansion and contraction for each species in each disturbance type as a function of either shade tolerance or seed weight, disturbance, and the interaction between the trait and disturbance category.

####9 species analysis

First I will only include the eight species that have big enough sample sizes for all 3 disturbance categories. 

```{r}
##full set analysis 

niche.shift.trait.table <- overlap.table.fullset %>% 
  left_join(trait.data,by="species.name")

```


```{r}
#thinking about using some kind of indication of cold vs. warm adapted species. Let's look at mean position of adults on PC1 for each species to rank them

pc1.means<- vector()
for(i in 1:length(adult_data_pcapts)){
  temp.mean <- adult_data_pcapts[[i]] %>% 
    dplyr::summarise(mean(PC1))
  pc1.means[i]<-temp.mean 
}
unlist(pc1.means)

pc1means.table <- as.data.frame(matrix(unlist(pc1.means),nrow=length(pc1.means),ncol=1))
pc1means.table<- pc1means.table %>% 
  mutate(names =  names(adult_data_pcapts)) %>% 
  tidyr::separate(names,into=c("species.code","disturbance","extra"),sep="_") %>% 
  #mutate(species.code = as.numeric(species.code)) %>% 
  left_join(species.names) %>% 
  dplyr::rename(pc1mean = V1) %>% 
  group_by(species.name) %>% 
  dplyr::summarise(pc1speciesmean = mean(pc1mean)) %>% ungroup()

ggplot(pc1means.table,aes(x=pc1speciesmean,y=rep(1,nrow(pc1means.table)),color=factor(species.name)))+
  geom_point()

ggplot(pc1means.table,aes(x=factor(species.name),y=pc1speciesmean))+
  geom_boxplot()+
  coord_flip()

niche.shift.trait.table<-
  niche.shift.trait.table %>% 
  left_join(pc1means.table,by=c("species.name"))


##include sample size as effect
#looking at sample size 
sample.append<- niche.shift.trait.table %>% 
  mutate(species = as.character(species)) %>% 
  left_join(sample_size %>% 
              mutate(SPCD = as.character(SPCD)), by = c("species"="SPCD","disturbance"="Agent")) %>% 
  left_join(niche.areas %>% 
  separate(col= name, into=c("species","disturbance","extra"), sep="_",remove = TRUE), by=c("species","disturbance","extra"))
```


```{r}
#overall expansion and contraction by disturbance type WITH sample size

#expansion
dist.exp.anova2 <- lm(log(mean.exp.prop.adult)~factor(disturbance)*n.adult,data=sample.append)
par(mfrow=c(2,2))
plot(dist.exp.anova)
anova(dist.exp.anova2)#no significant interaction
dist.exp.anova2_noint <- lm(log(mean.exp.prop.adult)~factor(disturbance)+n.adult,data=sample.append)
Anova(dist.exp.anova2_noint, type='2')

AICc(dist.exp.anova, dist.exp.anova2)

ggplot(sample.append, aes(x=n.seed,y=log(mean.exp.prop.adult)))+
  geom_point()+
  geom_smooth(method="lm")+
  facet_wrap(~disturbance)
  
#contraction
dist.contr.anova2<- lm(log(mean.contr.prop.adult)~factor(disturbance)*n.seed, data=sample.append)
plot(dist.contr.anova2)
anova(dist.contr.anova2)#no significant interaction
dist.contr.anova2_noint<- lm(log(mean.contr.prop.adult)~factor(disturbance)+n.seed, data=sample.append)
Anova(dist.contr.anova2_noint, type= '2')

AICc(dist.contr.anova,dist.contr.anova2)
```


```{r}
#models for expansion
#shade tolerance
exp.shade.mod<- lm(log(mean.exp.prop.adult)~shade.tolerance*disturbance,data=niche.shift.trait.table)
par(mfrow=c(2,2))
plot(exp.shade.mod)
Anova(exp.shade.mod, type='3')
summary(exp.shade.mod)
###sample size as fixed
exp.shade.mod2<- lm(log(mean.exp.prop.adult)~shade.tolerance*disturbance + shade.tolerance*n.adult,data=sample.append)
par(mfrow=c(2,2))
plot(exp.shade.mod2)
anova(exp.shade.mod2)#no effect of interactions
exp.shade.mod2_noint<- lm(log(mean.exp.prop.adult)~shade.tolerance + disturbance + n.adult,data=sample.append)
Anova(exp.shade.mod2_noint, type='2')

###shade tolerance as quadratic effect
shade.tolerance2 <- sample.append$shade.tolerance^2

exp.shade.mod3 <- lm(log(mean.exp.prop.adult)~shade.tolerance+disturbance+ shade.tolerance2+n.adult,data=sample.append)
par(mfrow=c(2,2))
plot(exp.shade.mod3)
Anova(exp.shade.mod3, type='2')

AICc(exp.shade.mod,exp.shade.mod2) #model 2 is lower

ggplot(niche.shift.trait.table,aes(x=shade.tolerance,y=mean.exp.prop.adult))+
  geom_point()+
  geom_smooth(method = "lm", se = FALSE, fullrange = T, size=1)+
  facet_wrap(~disturbance)

##get data for plotting
shadeexp_preddat = expand.grid(shade.tolerance = seq(min(sample.append$shade.tolerance), max(sample.append$shade.tolerance)+.07, by = .1),
                     n.adult =  mean(sample.append$n.adult),
                     disturbance = unique(sample.append$disturbance))

shadeexppred <- predict(exp.shade.mod2, newdata = shadeexp_preddat, interval = "confidence")

shadeexp_preddat = cbind(shadeexp_preddat, shadeexppred)

#plot holding sample size at it's mean with lines and CI's predicting expansion based on pc1 mean score and disturbance category.  
shade.exp.plot_new <- ggplot(data= sample.append)+
  geom_point(size=2.5,aes(x=shade.tolerance,y=log(mean.exp.prop.adult),color=disturbance))+
  geom_ribbon(data=shadeexp_preddat, aes(ymin = lwr, ymax = upr, x=shade.tolerance, fill=disturbance), alpha = .15) +
  geom_line(data=shadeexp_preddat, aes(x = shade.tolerance, y=fit, color=disturbance), size=1)+
  theme_bw()+
  xlab("Shade Tolerance")+ ylab("Log(Expansion)")+
  scale_color_manual(values=disturbance.colors)+
  scale_fill_manual(values=disturbance.colors)+
  #ylim(c(-10,0))+
  # annotate("text", label=("shadetol~~italic(p) == 0.6"),parse=TRUE, x=1, y=-7,size=8,hjust=0)+
  #   annotate("text", label=("disturbance ~~italic(p) == 1.0"), x=1, y=-7.6,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("samplesize ~~italic(p) == 0.1"), x=1, y=-8.2,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("shadetol~x~disturbance ~~italic(p) == 0.8"), x=1, y=-8.8,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("shadetol~x~samplesize ~~italic(p) == 0.8"), x=1, y=-9.4,parse=TRUE,  size=8,hjust=0)+
  theme(axis.text = element_text(size=24),axis.title = element_text(size=24),legend.text = element_text(size=24),legend.title = element_text(size=24),plot.margin = unit(c(0.5, 0.5, 0, 0.5), "cm"))


#dispersal mode
exp.disp.mod<- lm(log(mean.exp.prop.adult)~dispersal*disturbance,data=niche.shift.trait.table)

##sample size as effect
exp.disp.mod2<- lm(log(mean.exp.prop.adult)~dispersal*disturbance + dispersal*n.adult,data=sample.append)

AICc(exp.disp.mod,exp.disp.mod2) #mod2 is lower

par(mfrow=c(2,2))
plot(exp.disp.mod2)
anova(exp.disp.mod2) #no significant interactions
exp.disp.mod2_noint<- lm(log(mean.exp.prop.adult)~dispersal+disturbance + n.adult,data=sample.append)
Anova(exp.disp.mod2_noint, type="2")

ggplot(data=niche.shift.trait.table,aes(x=dispersal,y=mean.exp.prop.adult))+
  geom_boxplot()+
  facet_wrap(~disturbance)

#average PC1 score 
pc1.exp.mod <- lm(log(mean.exp.prop.adult)~ pc1speciesmean*disturbance, data=niche.shift.trait.table)

##sample size as effect
pc1.exp.mod2 <- lm(log(mean.exp.prop.adult)~ pc1speciesmean*disturbance + n.adult*disturbance, data=sample.append)

AICc(pc1.exp.mod,pc1.exp.mod2) #second model has lower AIC

Anova(pc1.exp.mod2, type="3")
summary(pc1.exp.mod2)

par(mfrow=c(2,2))
plot(pc1.exp.mod2)
anova(pc1.exp.mod2)#significant pc1score:disturbance interaction

pc1.exp.mod2_sigint <- lm(log(mean.exp.prop.adult)~ n.adult+ disturbance*pc1speciesmean , data=sample.append)
anova(pc1.exp.mod2_sigint)
emtrends(pc1.exp.mod2_sigint, pairwise ~ disturbance, var = "pc1speciesmean")
emtrends(pc1.exp.mod2_sigint, "n.adult", va="n.adult")
summary(pc1.exp.mod2_sigint)

pc1.exp.mod2_noint <- lm(log(mean.exp.prop.adult)~ n.adult+ disturbance + pc1speciesmean , data=sample.append)
Anova(pc1.exp.mod2_noint, type="2")

pc1.exp.mod2_fire <- lm(log(mean.exp.prop.adult)~n.adult+pc1speciesmean, data=sample.append %>% filter(disturbance=="fire"))
Anova(pc1.exp.mod2_fire, type="2")
pc1.exp.mod2_ID <- lm(log(mean.exp.prop.adult)~n.adult+pc1speciesmean, data=sample.append %>% filter(disturbance=="insect.disease"))
Anova(pc1.exp.mod2_ID, type="2")
pc1.exp.mod2_none <- lm(log(mean.exp.prop.adult)~n.adult+pc1speciesmean, data=sample.append %>% filter(disturbance=="none"))
Anova(pc1.exp.mod2_none, type="2")

##Results summary: There was a significant interaction between disturbance type and pc1 mean score in predicting the amount of niche expansion (ANOVA, F = 4.19, P = 0.03). In burned areas, species from cooler and wetter (lower CWD) habitats expanded less than those from hotter and drier habitats (slope CI = [-1.5, -0.3]), whereas there were no significant trends detected in insect/disease affected areas and undisturbed areas.   

##get data for plotting
pc1exp_preddat = expand.grid(pc1speciesmean = seq(min(sample.append$pc1speciesmean), max(sample.append$pc1speciesmean)+.05, by = .1),
                     n.adult =  mean(sample.append$n.adult),
                     disturbance = unique(sample.append$disturbance))

pc1exppred <- predict(pc1.exp.mod2, newdata = pc1exp_preddat, interval = "confidence")

pc1exp_preddat = cbind(pc1exp_preddat, pc1exppred)


#plot holding sample size at it's mean with lines and CI's predicting expansion based on pc1 mean score and disturbance category.  
pc1.exp.plot_new <- ggplot(data= sample.append)+
  geom_point(size=2.5,aes(x=pc1speciesmean,y=log(mean.exp.prop.adult),color=disturbance))+
  geom_ribbon(data=pc1exp_preddat, aes(ymin = lwr, ymax = upr, x=pc1speciesmean, fill=disturbance), alpha = .15) +
  geom_line(data=pc1exp_preddat, aes(x = pc1speciesmean, y=fit, color=disturbance), size=1)+
  #geom_text(aes(x=pc1speciesmean,y=log(mean.exp.prop.adult),label=species))+
  theme_bw()+
  xlab("PC1 Mean Score")+ ylab("Log(Expansion)")+
  scale_color_manual(values=disturbance.colors)+
  scale_fill_manual(values=disturbance.colors)+
  #ylim(c(-10,0))+
  #annotate("text", label=("bold(PC1score~~italic(p) == 0.04)"),parse=TRUE, x=-0.7, y=-7,size=8,hjust=0)+
    #annotate("text", label=("disturbance ~~italic(p) == 0.5"), x=-0.5, y=-6.3,parse=TRUE,  size=8,hjust=0)+
    #annotate("text", label=("samplesize ~~italic(p) == 0.2"), x=-0.5, y=-6.6,parse=TRUE,  size=8,hjust=0)+
    annotate("text", label=("bold(PC1score~x~dist ~~italic(p) == 0.03)"), x=-0.7, y=-7.3,parse=TRUE,  size=8,hjust=0)+
    #annotate("text", label=("PC1score~x~samplesize ~~italic(p) == 0.9"), x=-0.5, y=-7.2,parse=TRUE,  size=8,hjust=0)+
  theme(axis.text = element_text(size=24),axis.title = element_text(size=24),legend.text = element_text(size=24),legend.title = element_text(size=24),plot.margin = unit(c(0.5, 0.5, 0, 0.5), "cm"))

#all models against each other
AICc(exp.shade.mod2,exp.disp.mod2,pc1.exp.mod2)
```

```{r}
#models for contraction

#shade tolerance
contr.shade.mod<- lm(log(mean.contr.prop.adult)~shade.tolerance*disturbance,data=niche.shift.trait.table)
par(mfrow=c(2,2))
plot(contr.shade.mod)
Anova(contr.shade.mod, type='3')
summary(contr.shade.mod)

##sample size as effect
contr.shade.mod2<- lm(log(mean.contr.prop.adult)~shade.tolerance*disturbance + disturbance*n.adult,data=sample.append)
par(mfrow=c(2,2))
plot(contr.shade.mod2)
anova(contr.shade.mod2)#no significant interactions

contr.shade.mod2_noint<- lm(log(mean.contr.prop.adult)~shade.tolerance+ disturbance+n.adult,data=sample.append)
plot(contr.shade.mod2_noint)
Anova(contr.shade.mod2_noint, type="2")
summary(contr.shade.mod2_noint)

##shade tolerance as quadratic effect
contr.shade.mod2<- lm(log(mean.contr.prop.adult)~shade.tolerance*disturbance  + shade.tolerance2,data=sample.append)
par(mfrow=c(2,2))
plot(contr.shade.mod2)
Anova(contr.shade.mod2, type="3")
summary(contr.shade.mod2)

AICc(contr.shade.mod,contr.shade.mod2) #first model has lower AIC


ggplot(niche.shift.trait.table, aes(x=shade.tolerance,y=mean.contr.prop.adult))+
  geom_point()+
  geom_smooth(method = "lm", se = FALSE, fullrange = T, size=1)+
  facet_wrap(~disturbance)


##get data for plotting
shadecontr_preddat = expand.grid(shade.tolerance = seq(min(sample.append$shade.tolerance), max(sample.append$shade.tolerance)+.07, by = .1),
                     disturbance = unique(sample.append$disturbance))

shadecontrpred <- predict(contr.shade.mod, newdata = shadecontr_preddat, interval = "confidence")

shadecontr_preddat = cbind(shadecontr_preddat, shadecontrpred)

shade.contr.plot_new<- ggplot(data=niche.shift.trait.table)+
  geom_point(size=2.5,aes(x=shade.tolerance,y=log(mean.contr.prop.adult),color=disturbance))+
  geom_ribbon(data=shadecontr_preddat, aes(ymin = lwr, ymax = upr, x=shade.tolerance,  fill=disturbance), alpha = .15) +
  geom_line(data=shadecontr_preddat, aes(x = shade.tolerance, y=fit, color=disturbance), size=1)+
  theme_bw()+
  xlab("Shade Tolerance")+ ylab("Log(Contraction)")+
  scale_color_manual(values=disturbance.colors)+
  scale_fill_manual(values=disturbance.colors)+
  #ylim(c(-10,0))+
    # annotate("text", label=("shadetol~~italic(p) == 0.9"),parse=TRUE, x=1, y=-4.6,size=8,hjust=0)+
    # annotate("text", label=("disturbance ~~italic(p) == 0.8"), x=1, y=-4.9,parse=TRUE,  size=8,hjust=0)+
    # annotate("text", label=("shadetol~x~disturbance ~~italic(p) == 0.5"), x=1, y=-5.2,parse=TRUE,  size=8,hjust=0)+
  theme(axis.text = element_text(size=24),axis.title = element_text(size=24),legend.text = element_text(size=24),legend.title = element_text(size=24),plot.margin = unit(c(0.5, 0.5, 0, 0.5), "cm"))

#dispersal mode
contr.disp.mod<- lm(log(mean.contr.prop.adult)~dispersal*disturbance,data=niche.shift.trait.table)
par(mfrow=c(2,2))
plot(contr.disp.mod)
Anova(contr.disp.mod, type='3')
summary(contr.disp.mod)

##sample size as effect
contr.disp.mod2<- lm(log(mean.contr.prop.adult)~dispersal*disturbance+disturbance*n.adult,data=sample.append)
par(mfrow=c(2,2))
plot(contr.disp.mod2)
anova(contr.disp.mod2)#no significant interactions

contr.disp.mod2_noint<- lm(log(mean.contr.prop.adult)~dispersal+disturbance+n.adult,data=sample.append)
Anova(contr.disp.mod2_noint, type="2")
summary(contr.disp.mod2_noint)

AICc(contr.disp.mod,contr.disp.mod2) #first model has lower AIC


ggplot(data=niche.shift.trait.table,aes(x=dispersal,y=mean.contr.prop.adult))+
  geom_boxplot()+
  facet_wrap(~disturbance)

#pc1 mean score
pc1.contr.mod <- lm(log(mean.contr.prop.adult)~ pc1speciesmean*disturbance, data=niche.shift.trait.table)
plot(pc1.contr.mod)
Anova(pc1.contr.mod, type='3')
summary(pc1.contr.mod)
emtrends(pc1.contr.mod, pairwise ~ disturbance, var = "pc1speciesmean")

##sample size as effect
pc1.contr.mod2 <- lm(log(mean.contr.prop.adult)~ pc1speciesmean*disturbance+ disturbance*n.adult, data=sample.append)
par(mfrow=c(2,2))
plot(pc1.contr.mod2)
anova(pc1.contr.mod2)#significant pc1 score:disturbance interaction

pc1.contr.mod2_sigint <-  lm(log(mean.contr.prop.adult)~ pc1speciesmean*disturbance+ n.adult, data=sample.append)
anova(pc1.contr.mod2_sigint)
emtrends(pc1.contr.mod2_sigint, pairwise ~ disturbance, var = "pc1speciesmean")

pc1.contr.mod2_noint <-  lm(log(mean.contr.prop.adult)~ pc1speciesmean+disturbance+ n.adult, data=sample.append)
Anova(pc1.contr.mod2_noint, type="2")

#Results: There was a significant interaction between disturbance and pc1 score in predicting the amount of contraction (F = 6.9, P = 0.005). In burned areas, species from cooler and wetter (lower CWD) habitats contracted more than those from hotter and drier habitats (slope CI = [0.26, 0.88]), whereas there were no significant trends detected in insect/disease affected areas and undisturbed areas.   

##get data for plotting
pc1contr_preddat = expand.grid(pc1speciesmean = seq(min(sample.append$pc1speciesmean), max(sample.append$pc1speciesmean)+0.05, by = .1),
                     disturbance = unique(sample.append$disturbance))

pc1contrpred <- predict(pc1.contr.mod, newdata = pc1contr_preddat, interval = "confidence")

pc1contr_preddat = cbind(pc1contr_preddat, pc1contrpred)

pc1.contr.plot_new<- ggplot(data=niche.shift.trait.table)+
  geom_point(size=2.5,aes(x=pc1speciesmean,y=log(mean.contr.prop.adult),color=disturbance))+
  geom_ribbon(data=pc1contr_preddat, aes(ymin = lwr, ymax = upr, x=pc1speciesmean,  fill=disturbance), alpha = .15) +
  geom_line(data=pc1contr_preddat, aes(x = pc1speciesmean, y=fit, color=disturbance), size=1)+
  theme_bw()+
  xlab("PC1 Mean Score")+ ylab("Log(Contraction)")+
  scale_color_manual(values=disturbance.colors)+
  scale_fill_manual(values=disturbance.colors)+
  #ylim(c(-10,0))+
    #annotate("text", label=("bold(PC1score~~italic(p) == 0.001)"),parse=TRUE, x=-0.8, y=-4.8,size=8,hjust=0)+
    #annotate("text", label=("disturbance ~~italic(p) == 0.06"), x=-0.8, y=-5.2,parse=TRUE,  size=8,hjust=0)+
    annotate("text", label=("bold(PC1score~x~dist ~~italic(p) == 0.005)"), x=-0.8, y=-4.8,parse=TRUE,  size=8,hjust=0)+
  theme(axis.text = element_text(size=24),axis.title = element_text(size=24),legend.text = element_text(size=24),legend.title = element_text(size=24),plot.margin = unit(c(0.5, 0.5, 0, 0.5), "cm"))


##take out outlier
pc1.contr.mod3 <- lm(log(mean.contr.prop.adult)~ pc1speciesmean*disturbance, data=niche.shift.trait.table %>% filter(!(species.name=="Gambel oak")))
Anova(pc1.contr.mod3, type="3")


AICc(contr.shade.mod,contr.disp.mod,pc1.contr.mod)

```

```{r}
#plots
shade.contr.plot <- ggplot(data=niche.shift.trait.table,aes(x=shade.tolerance,y=log(mean.contr.prop.adult),color=disturbance))+
  geom_point(size=2.5)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T, size=1.5)+
  theme_bw()+
  xlab("Shade Tolerance")+ ylab("Log(Contraction)")+
    scale_color_manual(values=disturbance.colors)+
  #ylim(c(-10,0))+
  theme(axis.text = element_text(size=24),axis.title = element_text(size=24),legend.text = element_text(size=24),legend.title = element_text(size=24),plot.margin = unit(c(0.5, 0.5, 0, 0.5), "cm"))

shade.exp.plot <- ggplot(data=niche.shift.trait.table,aes(x=shade.tolerance,y=log(mean.exp.prop.adult),color=disturbance))+
  geom_point(size=2.5)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T, size=1.5)+
  theme_bw()+
  xlab("Shade Tolerance")+ ylab("Log(Expansion)")+
  scale_color_manual(values=disturbance.colors)+
  #ylim(c(-10,0))+
  theme(axis.text = element_text(size=24),axis.title = element_text(size=24),legend.text = element_text(size=24),legend.title = element_text(size=24),plot.margin = unit(c(0.5, 0.5, 0, 0.5), "cm"))


disp.exp.plot <- ggplot(data=niche.shift.trait.table, aes(x=dispersal,y=log(mean.exp.prop.adult),fill=disturbance))+
  geom_boxplot()+
  xlab("Dispersal Mode")+ ylab("Log(Expansion)")+
  scale_fill_manual(values=disturbance.colors)+
  # annotate("text", label=("dispersal~~italic(p) == 0.8"),parse=TRUE, x=0.5, y=-6,size=8,hjust=0)+
  # annotate("text", label=("disturbance ~~italic(p) == 1.0"), x=0.5, y=-6.3,parse=TRUE,  size=8,hjust=0)+
  # annotate("text", label=("samplesize ~~italic(p) == 0.7"), x=0.5, y=-6.6,parse=TRUE,  size=8,hjust=0)+
  # annotate("text", label=("dispersal~x~disturbance ~~italic(p) == 0.8"), x=0.5, y=-6.9,parse=TRUE,  size=8,hjust=0)+
  # annotate("text", label=("dispersal~x~samplesize ~~italic(p) == 1.0"), x=0.5, y=-7.2,parse=TRUE,  size=8,hjust=0)+
  theme_bw()+
  theme(axis.text = element_text(size=24),axis.title = element_text(size=24),legend.text = element_text(size=24),legend.title = element_text(size=24),plot.margin = unit(c(0.5, 0.5, 0, 0.5), "cm"))
  

disp.contr.plot <- ggplot(data=niche.shift.trait.table, aes(x=dispersal,y=log(mean.contr.prop.adult),fill=disturbance))+
  geom_boxplot()+
  xlab("Dispersal Mode")+ ylab("Log(Contraction)")+
  scale_fill_manual(values=disturbance.colors)+
  #   annotate("text", label=("dispersal~~italic(p) == 0.06"),parse=TRUE, x=0.5, y=-4.5,size=8,hjust=0)+
  # annotate("text", label=("disturbance ~~italic(p) == 0.4"), x=0.5, y=-4.8,parse=TRUE,  size=8,hjust=0)+
  # annotate("text", label=("dispersal~x~disturbance ~~italic(p) == 0.3"), x=0.5, y=-5.1,parse=TRUE,  size=8,hjust=0)+
  theme_bw()+
  theme(axis.text = element_text(size=24),axis.title = element_text(size=24),legend.text = element_text(size=24),legend.title = element_text(size=24),plot.margin = unit(c(0.5, 0.5, 0, 0.5), "cm"))


pc1.contr.plot<- ggplot(data=niche.shift.trait.table,aes(x=pc1speciesmean,y=log(mean.contr.prop.adult),color=disturbance, fill=disturbance))+
  geom_point(size=2.5)+
  geom_smooth(method = "lm", se = TRUE, fullrange = T, size=1.5)+
  theme_bw()+
  xlab("PC1 Mean Score")+ ylab("Log(Contraction)")+
  scale_color_manual(values=disturbance.colors)+
  scale_fill_manual(values=disturbance.colors)+
  #ylim(c(-10,0))+
  
  theme(axis.text = element_text(size=24),axis.title = element_text(size=24),legend.text = element_text(size=24),legend.title = element_text(size=24),plot.margin = unit(c(0.5, 0.5, 0, 0.5), "cm"))

pc1.exp.plot <- ggplot(data=niche.shift.trait.table,aes(x=pc1speciesmean,y=log(mean.exp.prop.adult),color=disturbance))+
  geom_point(size=2.5)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T, size=1.5)+
  theme_bw()+
  xlab("PC1 Mean Score")+ ylab("Log(Expansion)")+
  scale_color_manual(values=disturbance.colors)+
  #ylim(c(-10,0))+
  theme(axis.text = element_text(size=24),axis.title = element_text(size=24),legend.text = element_text(size=24),legend.title = element_text(size=24),plot.margin = unit(c(0.5, 0.5, 0, 0.5), "cm"))
```


```{r}
traits.plot8 <- ggarrange(shade.exp.plot,shade.contr.plot , pc1.exp.plot, pc1.contr.plot, nrow=4, ncol=2, disp.exp.plot,disp.contr.plot,  common.legend = TRUE, labels= c("a","","b","","c","","d"), font.label = list(size=24))

traits.plot8_new <- ggarrange(shade.exp.plot_new,shade.contr.plot_new , pc1.exp.plot_new, pc1.contr.plot_new, nrow=4, ncol=2, disp.exp.plot,disp.contr.plot,  common.legend = TRUE, labels= c("a","","b","","c","","d"), font.label = list(size=24))


ggarrange(diff.plot,traits.plot8_new,ncol=1,nrow=2,heights=c(0.8,3),labels=c("a",""),font.label = list(size=24))

# 
# png("traitplot22.subset.png", width = 800, height = 1600)
# 
# ggarrange(diff.plot,traits.plot8,ncol=1,nrow=2,heights=c(1.5,4))

# 
# dev.off()

png("figures/traitplot22.subset_CIs.png", width = 900, height = 1500)
traits.plot8_new
dev.off()
```

```{r}
##try all traits in one model 
all.traits.mod.contr <- lm(log(mean.contr.prop.adult)~ pc1speciesmean*disturbance+dispersal*disturbance + shade.tolerance*disturbance, data=niche.shift.trait.table)
plot(all.traits.mod.contr)
Anova(all.traits.mod.contr, type='3')
summary(all.traits.mod.contr)
emtrends(all.traits.mod.contr, pairwise ~ disturbance, var = "pc1speciesmean")
emtrends(all.traits.mod.contr, pairwise ~ disturbance, var = "shade.tolerance")


all.traits.mod.exp <- lm(log(mean.exp.prop.adult)~ pc1speciesmean*disturbance+dispersal*disturbance + shade.tolerance*disturbance, data=niche.shift.trait.table)
plot(all.traits.mod.exp)
Anova(all.traits.mod.exp, type='3')
summary(all.traits.mod.exp)
emtrends(all.traits.mod.exp, pairwise ~ disturbance, var = "pc1speciesmean")
emtrends(all.traits.mod.exp, pairwise ~ disturbance, var = "shade.tolerance")
```

```{r}
#look at climate niche size as a predictor of expansion/contraction
niche.areas %>% 
  separate(col= name, into=c("species","disturbance","extra"), sep="_",remove = TRUE)
  

area.append<- niche.shift.trait.table %>% 
  mutate(species = as.character(species)) %>% 
  left_join( niche.areas %>% 
  separate(col= name, into=c("species","disturbance","extra"), sep="_",remove = TRUE), by=c("species","disturbance","extra"))

#expansion model
exp.area.mod <- lm(log(mean.exp.prop.adult)~ adult_area*disturbance, data=area.append)
plot(exp.area.mod)
Anova(exp.area.mod)
summary(exp.area.mod)

#contraction model
cont.area.mod <- lm(log(mean.contr.prop.adult)~ adult_area*disturbance, data=area.append)
plot(cont.area.mod)
Anova(cont.area.mod)
summary(cont.area.mod)

```

```{r}
#looking just at sample size effects
#expansion model
exp.samp.mod <- lm(log(mean.exp.prop.adult)~ n.seed*disturbance, data=sample.append)
plot(exp.samp.mod)
Anova(exp.samp.mod)
summary(exp.samp.mod)
ggplot(sample.append, aes(x=n.adult, y=mean.exp.prop.adult, col=disturbance))+
  geom_point()
######there is a negative correlation between the adult sample size and the amount of niche expansion, which may indicate that species that are widespread have a harder time expanding into new climate zones (because they likely already occupy a large climate zone). However, niche area is not correlated with the amount of expansion or contraction so I'm not sure if this is actually the reason. 

cor.test(sample.append$n.adult,sample.append$pc1speciesmean)
cor.test(sample.append$n.adult,sample.append$adult_area)

#contraction model
cont.samp.mod <- lm(log(mean.contr.prop.adult)~ n.seed*disturbance, data=sample.append)
plot(cont.samp.mod)
Anova(cont.samp.mod)
summary(cont.samp.mod)

```


####13 species analysis

Now I will do the trait analysis on the data set with all 13 species, including the 5 that only have data for undisturbed and insect/disease plots. 

```{r}
##all data analysis 
#expansion

niche.shift.trait.table.all <- overlap.table.partialset %>% 
  left_join(trait.data,by="species.name") %>% 
  left_join(sample_size, by = c("species"="SPCD","disturbance"="Agent"))%>% 
  left_join(pc1means.table,by=c("species.name.y"="species.name"))


#models for expansion
#shade tolerance
exp.shade.mod.all<- lm(log(mean.exp.prop.adult)~shade.tolerance*disturbance,data=niche.shift.trait.table.all)
par(mfrow=c(2,2))
plot(exp.shade.mod.all)
Anova(exp.shade.mod.all, type='3')
summary(exp.shade.mod.all)

#with sample size
exp.shade.mod.all2<- lm(log(mean.exp.prop.adult)~shade.tolerance*disturbance + n.adult*disturbance,data=niche.shift.trait.table.all)
par(mfrow=c(2,2))
plot(exp.shade.mod.all2)
Anova(exp.shade.mod.all2, type='3')
summary(exp.shade.mod.all2)

AICc(exp.shade.mod.all,exp.shade.mod.all2)

##get data for plotting
shadeexp_preddat_all = expand.grid(shade.tolerance = seq(min(niche.shift.trait.table.all$shade.tolerance), max(niche.shift.trait.table.all$shade.tolerance)+0.07, by = .1),
                    n.adult=mean(niche.shift.trait.table.all$n.adult),
                     disturbance = unique(niche.shift.trait.table.all$disturbance))

shadeexp_pred <- predict(exp.shade.mod.all2, newdata = shadeexp_preddat_all, interval = "confidence")

shadeexp_preddat_all = cbind(shadeexp_preddat_all, shadeexp_pred)


#plot holding sample size at it's mean with lines and CI's predicting expansion based on pc1 mean score and disturbance category.  
shade.exp.plot_new_all <- ggplot(data= niche.shift.trait.table.all)+
  geom_point(size=2.5,aes(x=shade.tolerance,y=log(mean.exp.prop.adult),color=factor(disturbance,levels=c("fire","insect.disease","none"))))+
  geom_ribbon(data=shadeexp_preddat_all, aes(ymin = lwr, ymax = upr, x=shade.tolerance, fill=factor(disturbance,levels=c("fire","insect.disease","none"))), alpha = .15) +
  geom_line(data=shadeexp_preddat_all, aes(x = shade.tolerance, y=fit, color=factor(disturbance,levels=c("fire","insect.disease","none"))), size=1)+
  theme_bw()+
  xlab("Shade Tolerance")+ ylab("Log(Expansion)")+
  scale_color_manual(values=disturbance.colors,name="Disturbance")+
  scale_fill_manual(values=disturbance.colors,name="Disturbance")+
  #ylim(c(-10,0))+
  # annotate("text", label=("shadetol~~italic(p) == 0.6"),parse=TRUE, x=1, y=-7,size=8,hjust=0)+
  #   annotate("text", label=("disturbance ~~italic(p) == 1.0"), x=1, y=-7.6,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("samplesize ~~italic(p) == 0.1"), x=1, y=-8.2,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("shadetol~x~disturbance ~~italic(p) == 0.8"), x=1, y=-8.8,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("shadetol~x~samplesize ~~italic(p) == 0.8"), x=1, y=-9.4,parse=TRUE,  size=8,hjust=0)+
  theme(axis.text = element_text(size=24),axis.title = element_text(size=24),legend.text = element_text(size=24),legend.title = element_text(size=24),plot.margin = unit(c(0.5, 0.5, 0, 0.5), "cm"))


##get data for plotting - with varying sample size
shadeexp_preddat_all2 = expand.grid(shade.tolerance = mean(niche.shift.trait.table.all$shade.tolerance),                    n.adult=seq(min(niche.shift.trait.table.all$n.adult),max(niche.shift.trait.table.all$n.adult),by=1),
                     disturbance = unique(niche.shift.trait.table.all$disturbance))

shadeexp_pred2 <- predict(exp.shade.mod.all2, newdata = shadeexp_preddat_all2, interval = "confidence")

shadeexp_preddat_all2 = cbind(shadeexp_preddat_all2, shadeexp_pred2)


#plot  
shade.exp.plot_new_all2 <- ggplot(data= niche.shift.trait.table.all)+
  geom_point(size=2.5,aes(x=n.adult,y=log(mean.exp.prop.adult),color=factor(disturbance,levels=c("fire","insect.disease","none"))))+
  geom_ribbon(data=shadeexp_preddat_all2, aes(ymin = lwr, ymax = upr, x=n.adult, fill=factor(disturbance,levels=c("fire","insect.disease","none"))), alpha = .15) +
  geom_line(data=shadeexp_preddat_all2, aes(x = n.adult, y=fit, color=factor(disturbance,levels=c("fire","insect.disease","none"))), size=1)+
  theme_bw()+
  xlab("Sample Size")+ ylab("Log(Expansion)")+
  scale_color_manual(values=disturbance.colors,name="Disturbance")+
  scale_fill_manual(values=disturbance.colors,name="Disturbance")+
  #ylim(c(-10,0))+
  # annotate("text", label=("shadetol~~italic(p) == 0.6"),parse=TRUE, x=1, y=-7,size=8,hjust=0)+
  #   annotate("text", label=("disturbance ~~italic(p) == 1.0"), x=1, y=-7.6,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("samplesize ~~italic(p) == 0.1"), x=1, y=-8.2,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("shadetol~x~disturbance ~~italic(p) == 0.8"), x=1, y=-8.8,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("shadetol~x~samplesize ~~italic(p) == 0.8"), x=1, y=-9.4,parse=TRUE,  size=8,hjust=0)+
  theme(axis.text = element_text(size=24),axis.title = element_text(size=24),legend.text = element_text(size=24),legend.title = element_text(size=24),plot.margin = unit(c(0.5, 0.5, 0, 0.5), "cm"))


#dispersal mode
exp.disp.mod.all<- lm(log(mean.exp.prop.adult)~dispersal*disturbance,data=niche.shift.trait.table.all)
par(mfrow=c(2,2))
plot(exp.disp.mod.all)
Anova(exp.disp.mod.all, type='3')
summary(exp.disp.mod.all)

#with sample size
exp.disp.mod.all2<- lm(log(mean.exp.prop.adult)~dispersal*disturbance + n.adult*disturbance,data=niche.shift.trait.table.all)
par(mfrow=c(2,2))
plot(exp.disp.mod.all2)
Anova(exp.disp.mod.all2, type='3')
summary(exp.disp.mod.all2)

AICc(exp.disp.mod.all,exp.disp.mod.all2)


#plot 
disp.exp.plot_new_all <- ggplot(data= niche.shift.trait.table.all)+
  geom_boxplot(aes(x=dispersal,y=log(mean.exp.prop.adult),fill=factor(disturbance,levels=c("fire","insect.disease","none"))))+
  theme_bw()+
  xlab("Dispersal Mode")+ ylab("Log(Expansion)")+
  scale_fill_manual(values=disturbance.colors,name="Disturbance")+
  #ylim(c(-10,0))+
  # annotate("text", label=("shadetol~~italic(p) == 0.6"),parse=TRUE, x=1, y=-7,size=8,hjust=0)+
  #   annotate("text", label=("disturbance ~~italic(p) == 1.0"), x=1, y=-7.6,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("samplesize ~~italic(p) == 0.1"), x=1, y=-8.2,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("shadetol~x~disturbance ~~italic(p) == 0.8"), x=1, y=-8.8,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("shadetol~x~samplesize ~~italic(p) == 0.8"), x=1, y=-9.4,parse=TRUE,  size=8,hjust=0)+
  theme(axis.text = element_text(size=24),axis.title = element_text(size=24),legend.text = element_text(size=24),legend.title = element_text(size=24),plot.margin = unit(c(0.5, 0.5, 0, 0.5), "cm"))

#pc1 score
pc1.exp.mod.all <- lm(log(mean.exp.prop.adult)~ pc1speciesmean*disturbance, data=niche.shift.trait.table.all)
plot(pc1.exp.mod.all)
Anova(pc1.exp.mod.all, type='3')
summary(pc1.exp.mod.all)

pc1.exp.mod.all2 <- lm(log(mean.exp.prop.adult)~ pc1speciesmean*disturbance + n.adult*disturbance, data=niche.shift.trait.table.all)
plot(pc1.exp.mod.all2)
Anova(pc1.exp.mod.all2, type='3')
summary(pc1.exp.mod.all2)

AICc(pc1.exp.mod.all,pc1.exp.mod.all2)

##get data for plotting
pc1exp_preddat_all = expand.grid(pc1speciesmean = seq(min(niche.shift.trait.table.all$pc1speciesmean), max(niche.shift.trait.table.all$pc1speciesmean)+0.05, by = .1),
                    n.adult=mean(niche.shift.trait.table.all$n.adult),
                     disturbance = unique(niche.shift.trait.table.all$disturbance))

pc1exp_pred_all <- predict(pc1.exp.mod.all2, newdata = pc1exp_preddat_all, interval = "confidence")

pc1exp_preddat_all = cbind(pc1exp_preddat_all, pc1exp_pred_all)


#plot holding sample size at it's mean with lines and CI's predicting expansion based on pc1 mean score and disturbance category.  
pc1.exp.plot_new_all <- ggplot(data= niche.shift.trait.table.all)+
  geom_point(size=2.5,aes(x=pc1speciesmean,y=log(mean.exp.prop.adult),color=factor(disturbance,levels=c("fire","insect.disease","none"))))+
  geom_ribbon(data=pc1exp_preddat_all, aes(ymin = lwr, ymax = upr, x=pc1speciesmean, fill=factor(disturbance,levels=c("fire","insect.disease","none"))), alpha = .15) +
  geom_line(data=pc1exp_preddat_all, aes(x = pc1speciesmean, y=fit, color=factor(disturbance,levels=c("fire","insect.disease","none"))), size=1)+
  theme_bw()+
  xlab("PC1 Mean Score")+ ylab("Log(Expansion)")+
  scale_color_manual(values=disturbance.colors,name="Disturbance")+
  scale_fill_manual(values=disturbance.colors,name="Disturbance")+
  #ylim(c(-10,0))+
  # annotate("text", label=("shadetol~~italic(p) == 0.6"),parse=TRUE, x=1, y=-7,size=8,hjust=0)+
  #   annotate("text", label=("disturbance ~~italic(p) == 1.0"), x=1, y=-7.6,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("samplesize ~~italic(p) == 0.1"), x=1, y=-8.2,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("shadetol~x~disturbance ~~italic(p) == 0.8"), x=1, y=-8.8,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("shadetol~x~samplesize ~~italic(p) == 0.8"), x=1, y=-9.4,parse=TRUE,  size=8,hjust=0)+
  theme(axis.text = element_text(size=24),axis.title = element_text(size=24),legend.text = element_text(size=24),legend.title = element_text(size=24),plot.margin = unit(c(0.5, 0.5, 0, 0.5), "cm"))

```

```{r}
#models for contraction
#shade tolerance
contr.shade.mod.all<- lm(log(mean.contr.prop.adult)~shade.tolerance*disturbance,data=niche.shift.trait.table.all)
par(mfrow=c(2,2))
plot(contr.shade.mod.all)
Anova(contr.shade.mod.all, type='3')
summary(contr.shade.mod.all)

#with sample size
contr.shade.mod.all2<- lm(log(mean.contr.prop.adult)~shade.tolerance*disturbance + n.adult*disturbance,data=niche.shift.trait.table.all)
par(mfrow=c(2,2))
plot(contr.shade.mod.all2)
Anova(contr.shade.mod.all2, type='3')
summary(contr.shade.mod.all2)

AICc(contr.shade.mod.all,contr.shade.mod.all2)

##get data for plotting
shadecontr_preddat_all = expand.grid(shade.tolerance = seq(min(niche.shift.trait.table.all$shade.tolerance), max(niche.shift.trait.table.all$shade.tolerance)+0.07, by = .1),
                    n.adult=mean(niche.shift.trait.table.all$n.adult),
                     disturbance = unique(niche.shift.trait.table.all$disturbance))

shadecontr_pred <- predict(contr.shade.mod.all, newdata = shadecontr_preddat_all, interval = "confidence")

shadecontr_preddat_all = cbind(shadecontr_preddat_all, shadecontr_pred)


#plot holding sample size at it's mean with lines and CI's predicting contransion based on pc1 mean score and disturbance category.  
shade.contr.plot_new_all <- ggplot(data= niche.shift.trait.table.all)+
  geom_point(size=2.5,aes(x=shade.tolerance,y=log(mean.contr.prop.adult),color=factor(disturbance,levels=c("fire","insect.disease","none"))))+
  geom_ribbon(data=shadecontr_preddat_all, aes(ymin = lwr, ymax = upr, x=shade.tolerance, fill=factor(disturbance,levels=c("fire","insect.disease","none"))), alpha = .15) +
  geom_line(data=shadecontr_preddat_all, aes(x = shade.tolerance, y=fit, color=factor(disturbance,levels=c("fire","insect.disease","none"))), size=1)+
  theme_bw()+
  xlab("Shade Tolerance")+ ylab("Log(Contraction)")+
  scale_color_manual(values=disturbance.colors,name="Disturbance")+
  scale_fill_manual(values=disturbance.colors,name="Disturbance")+
  #ylim(c(-10,0))+
  # annotate("text", label=("shadetol~~italic(p) == 0.6"),parse=TRUE, x=1, y=-7,size=8,hjust=0)+
  #   annotate("text", label=("disturbance ~~italic(p) == 1.0"), x=1, y=-7.6,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("samplesize ~~italic(p) == 0.1"), x=1, y=-8.2,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("shadetol~x~disturbance ~~italic(p) == 0.8"), x=1, y=-8.8,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("shadetol~x~samplesize ~~italic(p) == 0.8"), x=1, y=-9.4,parse=TRUE,  size=8,hjust=0)+
  theme(axis.text = element_text(size=24),axis.title = element_text(size=24),legend.text = element_text(size=24),legend.title = element_text(size=24),plot.margin = unit(c(0.5, 0.5, 0, 0.5), "cm"))


#dispersal mode
contr.disp.mod.all<- lm(log(mean.contr.prop.adult)~dispersal*disturbance,data=niche.shift.trait.table.all)
par(mfrow=c(2,2))
plot(contr.disp.mod.all)
Anova(contr.disp.mod.all, type='3')
summary(contr.disp.mod.all)

#with sample size
contr.disp.mod.all2<- lm(log(mean.contr.prop.adult)~dispersal*disturbance + n.adult*disturbance,data=niche.shift.trait.table.all)
par(mfrow=c(2,2))
plot(contr.disp.mod.all2)
Anova(contr.disp.mod.all2, type='3')
summary(contr.disp.mod.all2)

AICc(contr.disp.mod.all,contr.disp.mod.all2)


#plot 
disp.contr.plot_new_all <- ggplot(data= niche.shift.trait.table.all)+
  geom_boxplot(aes(x=dispersal,y=log(mean.contr.prop.adult),fill=factor(disturbance,levels=c("fire","insect.disease","none"))))+
  theme_bw()+
  xlab("Dispersal Mode")+ ylab("Log(Contraction)")+
  scale_fill_manual(values=disturbance.colors,name="Disturbance")+
  #ylim(c(-10,0))+
  # annotate("text", label=("shadetol~~italic(p) == 0.6"),parse=TRUE, x=1, y=-7,size=8,hjust=0)+
  #   annotate("text", label=("disturbance ~~italic(p) == 1.0"), x=1, y=-7.6,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("samplesize ~~italic(p) == 0.1"), x=1, y=-8.2,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("shadetol~x~disturbance ~~italic(p) == 0.8"), x=1, y=-8.8,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("shadetol~x~samplesize ~~italic(p) == 0.8"), x=1, y=-9.4,parse=TRUE,  size=8,hjust=0)+
  theme(axis.text = element_text(size=24),axis.title = element_text(size=24),legend.text = element_text(size=24),legend.title = element_text(size=24),plot.margin = unit(c(0.5, 0.5, 0, 0.5), "cm"))

#pc1 score
pc1.contr.mod.all <- lm(log(mean.contr.prop.adult)~ pc1speciesmean*disturbance, data=niche.shift.trait.table.all)
plot(pc1.contr.mod.all)
Anova(pc1.contr.mod.all, type='3')
summary(pc1.contr.mod.all)

pc1.contr.mod.all2 <- lm(log(mean.contr.prop.adult)~ pc1speciesmean*disturbance + n.adult*disturbance, data=niche.shift.trait.table.all)
plot(pc1.contr.mod.all2)
Anova(pc1.contr.mod.all2, type='3')
summary(pc1.contr.mod.all2)

AICc(pc1.contr.mod.all,pc1.contr.mod.all2)

##get data for plotting
pc1contr_preddat_all = expand.grid(pc1speciesmean = seq(min(niche.shift.trait.table.all$pc1speciesmean), max(niche.shift.trait.table.all$pc1speciesmean)+0.05, by = .1),
                    n.adult=mean(niche.shift.trait.table.all$n.adult),
                     disturbance = unique(niche.shift.trait.table.all$disturbance))

pc1contr_pred_all <- predict(pc1.contr.mod.all, newdata = pc1contr_preddat_all, interval = "confidence")

pc1contr_preddat_all = cbind(pc1contr_preddat_all, pc1contr_pred_all)


#plot holding sample size at it's mean with lines and CI's predicting contransion based on pc1 mean score and disturbance category.  
pc1.contr.plot_new_all <- ggplot(data= niche.shift.trait.table.all)+
  geom_point(size=2.5,aes(x=pc1speciesmean,y=log(mean.contr.prop.adult),color=factor(disturbance,levels=c("fire","insect.disease","none"))))+
  geom_ribbon(data=pc1contr_preddat_all, aes(ymin = lwr, ymax = upr, x=pc1speciesmean, fill=factor(disturbance,levels=c("fire","insect.disease","none"))), alpha = .15) +
  geom_line(data=pc1contr_preddat_all, aes(x = pc1speciesmean, y=fit, color=factor(disturbance,levels=c("fire","insect.disease","none"))), size=1)+
  theme_bw()+
  xlab("PC1 Mean Score")+ ylab("Log(Contraction)")+
  scale_color_manual(values=disturbance.colors,name="Disturbance")+
  scale_fill_manual(values=disturbance.colors,name="Disturbance")+
  #ylim(c(-10,0))+
  # annotate("text", label=("shadetol~~italic(p) == 0.6"),parse=TRUE, x=1, y=-7,size=8,hjust=0)+
  #   annotate("text", label=("disturbance ~~italic(p) == 1.0"), x=1, y=-7.6,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("samplesize ~~italic(p) == 0.1"), x=1, y=-8.2,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("shadetol~x~disturbance ~~italic(p) == 0.8"), x=1, y=-8.8,parse=TRUE,  size=8,hjust=0)+
  #   annotate("text", label=("shadetol~x~samplesize ~~italic(p) == 0.8"), x=1, y=-9.4,parse=TRUE,  size=8,hjust=0)+
  theme(axis.text = element_text(size=24),axis.title = element_text(size=24),legend.text = element_text(size=24),legend.title = element_text(size=24),plot.margin = unit(c(0.5, 0.5, 0, 0.5), "cm"))

```


####Percentile shifts ~ Traits

```{r}
comb_table <- sample.append %>% 
  mutate(species = as.character(species)) %>% 
  left_join(quants_wide)

colnames(comb_table)

#shade tolerance
shade.pc1.2.5mod <- lm(pc1diff_2.5 ~ shade.tolerance*disturbance + n.adult*disturbance, data=comb_table)
par(mfrow=c(2,2))
plot(shade.pc1.2.5mod)
Anova(shade.pc1.2.5mod,type="3")#sample size significant
summary(shade.pc1.2.5mod)

ggplot(comb_table, aes(x=n.adult, y=pc1diff_2.5,color=disturbance))+
  geom_point()+
  geom_smooth(method="lm")

shade.pc1.97.5mod <- lm(pc1diff_97.5 ~ shade.tolerance*disturbance + n.adult*disturbance, data=comb_table)
par(mfrow=c(2,2))
plot(shade.pc1.97.5mod)
Anova(shade.pc1.97.5mod,type="3")#nothing significant

shade.pc2.2.5mod <- lm(pc2diff_2.5 ~ shade.tolerance*disturbance + n.adult*disturbance, data=comb_table)
par(mfrow=c(2,2))
plot(shade.pc2.2.5mod)
Anova(shade.pc2.2.5mod,type="3")#nothing significant

shade.pc2.97.5mod <- lm(pc2diff_97.5 ~ shade.tolerance*disturbance + n.adult*disturbance, data=comb_table)
par(mfrow=c(2,2))
plot(shade.pc2.97.5mod)
Anova(shade.pc2.97.5mod,type="3")#nothing significant

#dispersal
disp.pc1.2.5mod <- lm(pc1diff_2.5 ~ dispersal*disturbance + n.adult*disturbance, data=comb_table)
par(mfrow=c(2,2))
plot(disp.pc1.2.5mod)
Anova(disp.pc1.2.5mod,type="3")#sample size significant
summary(disp.pc1.2.5mod)

disp.pc1.97.5mod <- lm(pc1diff_97.5 ~ dispersal*disturbance + n.adult*disturbance, data=comb_table)
par(mfrow=c(2,2))
plot(disp.pc1.97.5mod)
Anova(disp.pc1.97.5mod,type="3")#nothing significant

disp.pc2.2.5mod <- lm(pc2diff_2.5 ~ dispersal*disturbance + n.adult*disturbance, data=comb_table)
par(mfrow=c(2,2))
plot(disp.pc2.2.5mod)
Anova(disp.pc2.2.5mod,type="3")#nothing significant

disp.pc2.97.5mod <- lm(pc2diff_97.5 ~ dispersal*disturbance + n.adult*disturbance, data=comb_table)
par(mfrow=c(2,2))
plot(disp.pc2.97.5mod)
Anova(disp.pc2.97.5mod,type="3")#nothing significant

#pc1meanscore

score.pc1.2.5mod <- lm(pc1diff_2.5 ~ pc1speciesmean*disturbance + n.adult*disturbance, data=comb_table)
par(mfrow=c(2,2))
plot(score.pc1.2.5mod)
Anova(score.pc1.2.5mod,type="3")#sample size & disturbance significant
summary(score.pc1.2.5mod)

ggplot(comb_table, aes(x=pc1speciesmean, y=pc1diff_2.5,color=disturbance))+
  geom_point()+
  geom_smooth(method="lm")

ggplot(comb_table, aes(x=disturbance, y=pc1diff_2.5))+
  geom_boxplot()

score.pc1.97.5mod <- lm(pc1diff_97.5 ~ pc1speciesmean*disturbance + n.adult*disturbance, data=comb_table)
par(mfrow=c(2,2))
plot(score.pc1.97.5mod)
Anova(score.pc1.97.5mod,type="3")#disturbance significant
summary(score.pc1.97.5mod)

ggplot(comb_table, aes(x=disturbance, y=pc1diff_97.5))+
  geom_boxplot()

score.pc2.2.5mod <- lm(pc2diff_2.5 ~ pc1speciesmean*disturbance + n.adult*disturbance, data=comb_table)
par(mfrow=c(2,2))
plot(score.pc2.2.5mod)
Anova(score.pc2.2.5mod,type="3")#nothing significant

score.pc2.97.5mod <- lm(pc2diff_97.5 ~ pc1speciesmean*disturbance + n.adult*disturbance, data=comb_table)
par(mfrow=c(2,2))
plot(score.pc2.97.5mod)
Anova(score.pc2.97.5mod,type="3")#nothing significant

ggplot(comb_table, aes(x=pc1speciesmean, y=pc2diff_97.5,color=disturbance))+
  geom_point()+
  geom_smooth(method="lm")
```


#Other

##Effects of disturbance on range shifts across species 

###PC1
First I am going to look at whether changes in 5th, 50th and 95th percentiles of positions on both PC1 and PC2 were significantly different from zero across all species, for each disturbance type.

Let's look at changes in PC1, which corresponds most strongly with climatic moisture deficit (CMD) and temperature variables. Shifts in the negative direction on PC1 indicate shifts in the seedling climatic niche to hotter climates, whereas positive shifts would represent shifts in seedling niche to cooler climates.

```{r, warning = FALSE, echo = FALSE, results='hide'}
##### make function for calculating differences in quantiles of adults vs. seedlings
#' get_diff
#'
#' @param adult_data list of dataframes that are separated by species and agent. Dataframes contain info on climate variables of plots with adults of each species/agent.  
#' @param seedling_data list of dataframes that are separated by species and agent. Dataframes contain info on climate variables of plots with seedlings of each species/agent. 
#' @param var name of the desired climate variable for which the difference should be calculated. This can also be a principal component, or any column that exists in every dataframe in the list.  
#' @param quant The quantile for which you want to calculate the difference between adults and seedlings.
#'
#' @return a dataframe with the difference in chosen quantile between adults and seedlings, for the chosen climate variable, under each disturbance agent.  
#' @export
#'
#' @examples
get_diff <- function(adult_data, seedling_data, var,quant){
q.table = data.frame()
for(i in 1:length(names(adult_data))){
  adult.q<- adult_data %>% 
    pluck(names(adult_data)[i]) %>% 
    ungroup() %>% 
    dplyr::select(!!var) %>% 
    dplyr::summarise(quantile = quantile(!!var,probs = quant))
  seed.q<- seedling_data %>% 
    pluck(names(adult_data)[i]) %>% 
    ungroup() %>% 
    dplyr::select(!!var) %>% 
    dplyr::summarise(quantile = quantile(!!var,probs = quant))
  adult.95<- adult_data %>% 
    pluck(names(adult_data)[i]) %>% 
    ungroup() %>% 
    dplyr::select(!!var) %>% 
    dplyr::summarise(quantile = quantile(!!var,probs = 0.95))
  adult.05 <- adult_data %>% 
    pluck(names(adult_data)[i]) %>% 
    ungroup() %>% 
    dplyr::select(!!var) %>% 
    dplyr::summarise(quantile = quantile(!!var,probs = 0.05))
  adult.tolerance <- adult.95 - adult.05
  q_diff = (seed.q - adult.q)/adult.tolerance
  q.table <- bind_rows(q.table, data.frame(names(adult_data)[i],q_diff) )
}
q.table <- q.table %>% 
  dplyr::rename(!!paste("quant",quant*100,sep="_") := quantile,
         uid = names.adult_data..i.)
return(q.table) 
}
```

```{r, echo=FALSE, results='hide'}
#calculate difference in 5th, 50th and 95th percentiles in PC1 position between adults and seedlings of each species and disturbance type 
pc1.diffs.table <- data.frame(uid = names(adult_data_pcapts))
for(i in c(0.05, 0.5, 0.95)){
diffs <- get_diff(adult_data_pcapts, seedling_data_pcapts, quo(PC1), i) 
  pc1.diffs.table <- pc1.diffs.table %>% 
    left_join(diffs, by = "uid")
}

#get dataframe in order for analysis and plotting
pc1.diffs.sep <- pc1.diffs.table %>% 
  separate(uid, c("species","agent"),sep="_",remove=FALSE, extra="drop") %>% 
  #mutate(species = as.numeric(species)) %>% 
  left_join(species.names,by=c("species" = "species.code")) %>% 
  filter(!(agent == "fire" & species.name %in% insectonlyspp)) %>% 
  filter(!(agent == "insect.disease" & species.name %in% fireonlyspp)) %>% 
  filter(!species.name=="singleleaf pinyon")
```


```{r}
#spot check diffs
i = 10
adult.q<- adult_data_pcapts %>% 
    pluck(names(adult_data)[i]) %>% 
    ungroup() %>% 
    dplyr::select(PC1) %>% 
    dplyr::summarise(quantile = quantile(PC1,probs = .95))
  seed.q<- seedling_data_pcapts %>% 
    pluck(names(adult_data)[i]) %>% 
    ungroup() %>% 
    dplyr::select(PC1) %>% 
    dplyr::summarise(quantile = quantile(PC1,probs = .95))
  adult.95<- adult_data_pcapts %>% 
    pluck(names(adult_data)[i]) %>% 
    ungroup() %>% 
    dplyr::select(PC1) %>% 
    dplyr::summarise(quantile = quantile(PC1,probs = 0.95))
  adult.05 <- adult_data_pcapts %>% 
    pluck(names(adult_data)[i]) %>% 
    ungroup() %>% 
    dplyr::select(PC1) %>% 
    dplyr::summarise(quantile = quantile(PC1,probs = 0.05))
  adult.tolerance <- adult.95 - adult.05
  q_diff = (seed.q - adult.q)/adult.tolerance

```

First I will assess whether the shifts in each quantile and disturbance are normally distributed, in which case a t-test will be used, otherwise a Wilcoxon signed rank test will be used. 

```{r, echo=FALSE, eval=FALSE, results="hide"}
##checking for normality
fire.diffs.pc1<- pc1.diffs.sep %>% 
  filter(agent == "fire") 
id.diffs.pc1<- pc1.diffs.sep %>% 
  filter(agent == "insect.disease") 
none.diffs.pc1<- pc1.diffs.sep %>% 
  filter(agent == "none")

#fire 5th quantile 
ggqqplot(fire.diffs.pc1$quant_5)
shapiro.test(fire.diffs.pc1$quant_5)

#fire 50th quantile
ggqqplot(fire.diffs.pc1$quant_50)
shapiro.test(fire.diffs.pc1$quant_50)

#fire 95th quantile 
ggqqplot(fire.diffs.pc1$quant_95)
shapiro.test(fire.diffs.pc1$quant_95)

#id 5th quantile
ggqqplot(id.diffs.pc1$quant_5)
shapiro.test(id.diffs.pc1$quant_5)

#id 50th quantile
ggqqplot(id.diffs.pc1$quant_50)
shapiro.test(id.diffs.pc1$quant_50)

#id 95th quantile
ggqqplot(id.diffs.pc1$quant_95)
shapiro.test(id.diffs.pc1$quant_95)

#none 5th quantile -- NOT NORMAL
ggqqplot(none.diffs.pc1$quant_5)
shapiro.test(none.diffs.pc1$quant_5)

#none 50th quantile
ggqqplot(none.diffs.pc1$quant_50)
shapiro.test(none.diffs.pc1$quant_50)

#none 95th quantile
ggqqplot(none.diffs.pc1$quant_95)
shapiro.test(none.diffs.pc1$quant_95)

```

All difference data is normal except 5th qunatile of undisturbed, so we will use a Wilcoxon test for this set and t-tests for the rest.


```{r,echo=FALSE, results="hide"}
#test for significant shifts from zero

#' get_pvalues
#'
#' @param pc1.diffs.sep dataframe containing the calculated differences in 5th, 50th and 95th quantiles of one climate variable or principal component for each species/agent 
#'
#' @return a dataframe with the results of a T-test, testing if the difference in quantile is significantly different from 0.   
#' @export
#'
#' @examples
get_pvalues <- function(pc1.diffs.sep){
agent.list = unique(pc1.diffs.sep$agent)
quant.list = c(5, 50, 95)
pc1.p.table = data.frame()
for(i in 1:length(agent.list)){
  cat(i, "\n")
  for(j in 1:length(quant.list)){
    cat(j, "\n")
test.temp <- pc1.diffs.sep %>% 
  filter(agent == agent.list[i]) %>% 
  pull(!!paste("quant",quant.list[j],sep="_")) %>% 
  t.test(y = NULL, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95)
    
p.temp <- test.temp$p.value
est.temp <- test.temp$estimate
lower.temp <- test.temp$conf.int[1]
upper.temp <- test.temp$conf.int[2]
tvalue.temp <- test.temp$statistic
df.temp <- test.temp$parameter
p.df <- data.frame(agent = paste(agent.list[i]), quant = paste("quant",quant.list[j],sep="_"), p.value = p.temp, estimate = est.temp, lowerci = lower.temp, upperci = upper.temp,t=tvalue.temp, df=df.temp)
pc1.p.table <- bind_rows(pc1.p.table, p.df)
  }
}
  return(pc1.p.table)
}

#this call to options will tell me where the warnings occur. In this case, for the comparison of medians in the fire plots and harvest plots, the exact p-value cannot be calculated because there are values that are exactly equal to 0 in the data. I think this is ok.  
options(warn = 1)
pvalues.pc1 <- get_pvalues(pc1.diffs.sep)
pvalues.pc1
pvalues.pc1<- pvalues.pc1 %>% 
  mutate(wilcoxon = "n")

#need to do Wilcoxon Signed Rank Test for 5th percentile of none because data is not normal 
none5.test <- pc1.diffs.sep %>%
  filter(agent == "none") %>%
  pull("quant_5") %>%
  wilcox.test(y = NULL, alternative = "two.sided", mu = 0, conf.int = TRUE, conf.level = 0.95)

none5.p.df <- data.frame(agent = "none", quant = "quant_5", p.value = none5.test$p.value, estimate = none5.test$estimate, lowerci = none5.test$conf.int[1], upperci = none5.test$conf.int[2], t=none5.test$statistic, df=NA, wilcoxon="y")

##replace 5th percentile of none with wilcoxon test values
pvalues.pc1<- pvalues.pc1 %>%
              rows_update(none5.p.df,by=c("agent","quant"))
pvalues.pc1
```

The first plot here shows shifts on the 5th, 50th and 95th percentiles for all species combined, with individual species shifts plotted as dots. The second plot is the result of T-tests and Wilcoxon Signed Rank tests (non-parametric t-test), showing the estimate as a diamond and the 95% confidence interval as lines. Asterisks indicate whether the shift was significantly different from zero (p\<0.05).

```{r, echo=FALSE}
#make data long for plotting
pc1.diffs.long <- pc1.diffs.sep %>% 
  pivot_longer(cols = c(quant_5, quant_50, quant_95), names_to = "quant", values_to = "diff")

ggplot(pc1.diffs.long, aes(x=agent, y=diff))+
  geom_boxplot(fill="lightgrey")+
  geom_jitter(height=0,width=0.2, aes(color=species.name))+
  facet_wrap(~quant)+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust=1))

##dataframe for stars
pvalues.pc1<- pvalues.pc1 %>% 
  mutate(sym.pos = ifelse(p.value<0.05,0.14,NA))

quant.labs = c("5th quantile","50th quantile","95th quantile") 
names(quant.labs)= c("quant_5","quant_50","quant_95")

pc1.shift.plot <- ggplot(pvalues.pc1, aes(x=agent, y=estimate, color = agent, fill=agent))+
  geom_point(shape = 23, size = 3.5)+
  geom_errorbar(aes(ymin=lowerci, ymax=upperci),width=0.3, size=1)+
  #scale_color_manual(values = c("#d7191c", "#fdae61","#2b83ba"))+
  #scale_fill_manual(values = c("#d7191c", "#fdae61","#2b83ba"))+
  scale_color_manual(values = disturbance.colors)+
  scale_fill_manual(values = disturbance.colors)+
  geom_text(data = subset(pvalues.pc1, !is.na(sym.pos)), aes(x=agent, y=sym.pos), label="*", color="black",size=8)+
  geom_hline(yintercept=0, color="black", lty=2, size=1)+
  ylim(c(-0.08,0.16))+
  coord_flip()+
facet_wrap(~ quant, labeller = labeller(quant=quant.labs))+
  ggtitle("Shifts on PC1")+
  ylab("Difference on PC1")+
  xlab("")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust = 1), axis.text = element_text(size=12, color="black"), text = element_text(size=12),legend.text = element_text(size=12),strip.text = element_text(color="black",size=12), legend.position = "none")

```

Here I test for significant differences among disturbance agents in adult vs. seedling differences in the three percentiles of PC1. I only include species in this analysis that had data for all three disturbance types. There are no significant differences between disturbances in shifts on PC1.

```{r, echo=FALSE}
pc1.diffs.fullset<- pc1.diffs.sep %>% 
  filter(!species.name %in% c(insectonlyspp,fireonlyspp))

#check for significant differences in 5th percentile by disturbance type
pc1.mod5 <- lm(quant_5 ~ factor(agent), data=pc1.diffs.fullset)
par(mfrow=c(2,2))
plot(pc1.mod5)
leveneTest(quant_5 ~ factor(agent), data=pc1.diffs.fullset)
anova(pc1.mod5)
#check for significant differences in 50th percentile by disturbance type
pc1.mod50 <- lm(quant_50 ~ factor(agent), data=pc1.diffs.fullset)
par(mfrow=c(2,2))
plot(pc1.mod50)
leveneTest(quant_50 ~ factor(agent), data=pc1.diffs.fullset)
anova(pc1.mod50)
#check for significant differences in 95th percentile by disturbance type
pc1.mod95 <- lm(quant_95 ~ factor(agent), data=pc1.diffs.fullset)
par(mfrow=c(2,2))
plot(pc1.mod95)
leveneTest(quant_95 ~ factor(agent), data=pc1.diffs.fullset)
anova(pc1.mod95)

par(mfrow=c(1,1))
```


###PC2

And now for PC2, which corresponds most strongly to precipitation variables, with positive shifts representing seedling shifts to wetter climates and negative shifts representing seedling shifts to drier climates.

```{r, echo=FALSE, results='hide'}
#calculate difference in 5th, 50th and 95th percentiles in pc2 position between adults and seedlings of each species and disturbance type 
pc2.diffs.table <- data.frame(uid = names(adult_data_pcapts))
for(i in c(0.05, 0.5, 0.95)){
diffs <- get_diff(adult_data_pcapts, seedling_data_pcapts, quo(PC2), i) 
  pc2.diffs.table <- pc2.diffs.table %>% 
    left_join(diffs, by = "uid")
}
pc2.diffs.table

#get dataframe in order for analysis and plotting
pc2.diffs.sep <- pc2.diffs.table %>% 
  separate(uid, c("species","agent"),sep="_",remove=FALSE, extra="drop") %>% 
  #mutate(species = as.numeric(species)) %>% 
  left_join(species.names,by=c("species" = "species.code")) %>% 
  filter(!(agent == "fire" & species.name %in% insectonlyspp)) %>%
  filter(!(agent=="insect.disease" & species.name %in% fireonlyspp)) %>% 
  filter(!species.name=="singleleaf pinyon")

```

Check for normality 

```{r,echo=FALSE,eval=FALSE, results='hide'}
##checking for normality
fire.diffs.pc2<- pc2.diffs.sep %>% 
  filter(agent == "fire") 
id.diffs.pc2<- pc2.diffs.sep %>% 
  filter(agent == "insect.disease") 
none.diffs.pc2<- pc2.diffs.sep %>% 
  filter(agent == "none")

#fire 5th quantile
ggqqplot(fire.diffs.pc2$quant_5)
shapiro.test(fire.diffs.pc2$quant_5)

#fire 50th quantile 
ggqqplot(fire.diffs.pc2$quant_50)
shapiro.test(fire.diffs.pc2$quant_50)

#fire 95th quantile
ggqqplot(fire.diffs.pc2$quant_95)
shapiro.test(fire.diffs.pc2$quant_95)

#id 5th quantile
ggqqplot(id.diffs.pc2$quant_5)
shapiro.test(id.diffs.pc2$quant_5)

#id 50th quantile
ggqqplot(id.diffs.pc2$quant_50)
shapiro.test(id.diffs.pc2$quant_50)

#id 95th quantile 
ggqqplot(id.diffs.pc2$quant_95)
shapiro.test(id.diffs.pc2$quant_95)

#none 5th quantile -- NOT NORMAL
ggqqplot(none.diffs.pc2$quant_5)
shapiro.test(none.diffs.pc2$quant_5)

#none 50th quantile -- NOT NORMAL
ggqqplot(none.diffs.pc2$quant_50)
shapiro.test(none.diffs.pc2$quant_50)

#none 95th quantile 
ggqqplot(none.diffs.pc2$quant_95)
shapiro.test(none.diffs.pc2$quant_95)
```

5th & 50th quantile shifts in undisturbed plots are non-normal

```{r,echo=FALSE, results='hide'}
#perform T-tests
pvalues.pc2 <- get_pvalues(pc2.diffs.sep)
pvalues.pc2
pvalues.pc2<- pvalues.pc2 %>% 
  mutate(wilcoxon="n")

#perform wilcoxon tests for non-normal data
#5th none
pc2.none5.test <- pc2.diffs.sep %>% 
  filter(agent == "none") %>% 
  pull("quant_5") %>% 
  wilcox.test(y = NULL, alternative = "two.sided", mu = 0, conf.int = TRUE, conf.level = 0.95)

pc2.none5.p.df <- data.frame(agent = "none", quant = "quant_5", p.value = pc2.none5.test$p.value, estimate = pc2.none5.test$estimate, lowerci = pc2.none5.test$conf.int[1], upperci = pc2.none5.test$conf.int[2],t=pc2.none5.test$statistic,df=NA,wilcoxon="y")

#50th none
pc2.none50.test <- pc2.diffs.sep %>% 
  filter(agent == "none") %>% 
  pull("quant_50") %>% 
  wilcox.test(y = NULL, alternative = "two.sided", mu = 0, conf.int = TRUE, conf.level = 0.95)

pc2.none50.p.df <- data.frame(agent = "none", quant = "quant_50", p.value = pc2.none50.test$p.value, estimate = pc2.none50.test$estimate, lowerci = pc2.none50.test$conf.int[1], upperci = pc2.none50.test$conf.int[2],t=pc2.none50.test$statistic,df=NA,wilcoxon="y")

##replace t-test with wilcoxon test values
pvalues.pc2<- pvalues.pc2 %>% 
                rows_update(pc2.none5.p.df, by=c("agent","quant")) %>% 
  rows_update(pc2.none50.p.df, by=c("agent","quant"))

pvalues.pc2
```

Now plot the differences

```{r, echo=FALSE}
#make data long for plotting
pc2.diffs.long <- pc2.diffs.sep %>% 
  pivot_longer(cols = c(quant_5, quant_50, quant_95), names_to = "quant", values_to = "diff")

ggplot(pc2.diffs.long, aes(x=agent, y=diff))+
  geom_boxplot(fill="lightgrey")+
  geom_jitter(height=0,width=0.2, aes(color=species.name))+
  facet_wrap(~quant)+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust=1))

##dataframe for stars
pvalues.pc2<- pvalues.pc2 %>% 
  mutate(sym.pos = ifelse(p.value<0.05,0.14,NA))

quant.labs = c("95th quantile","50th quantile","5th quantile") 
names(quant.labs)= c("quant_95","quant_50","quant_5")

pc2.shift.plot<- ggplot(pvalues.pc2 %>%  mutate(across(quant,factor,levels=c("quant_95","quant_50","quant_5"))), aes(x=agent, y=estimate, color = agent, fill=agent))+
  geom_point(shape = 23, size = 3.5)+
  geom_errorbar(aes(ymin=lowerci, ymax=upperci),width=0.3, size=1)+
  #scale_color_manual(values = c("tan4", "#fdae61","turquoise"))+
  #scale_fill_manual(values = c("tan4", "#fdae61","turquoise"))+
  scale_color_manual(values = disturbance.colors)+
  scale_fill_manual(values = disturbance.colors)+
  geom_text(data = subset(pvalues.pc2, !is.na(sym.pos)), aes(x=agent, y=sym.pos), label="*", color="black",size=8)+
  geom_hline(yintercept=0, color="black", size=1, lty=2)+
  ylim(c(-0.15,0.16))+
  facet_grid(~quant, labeller= labeller(quant=quant.labs))+
  ggtitle("Shifts on PC2")+
  xlab("")+
  ylab("Difference on PC2")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust = 1), axis.text = element_text(color="black",size=12), text = element_text(color="black",size=12), strip.text = element_text(color="black",size=12), legend.position = "none")

```

```{r}
#combine pc1 and pc2 plots

ggarrange(pc1.shift.plot, pc2.shift.plot, ncol=1)

```

There is a significant positive shift on the 5th quantile in fire plots, representing a contraction away from dry plots in areas that were burned. Conversely, insect/disease and undisturbed plots showed significant contractions on the 95th quantile (wettest plots) away from wet areas, with a significant negative shift in the median of insect/disease plots as well. ANOVAs indicate significant differences between disturbances in shifts on the 5th and 50th quantiles, with the biggest differences between fire and insect/disease plots, with undisturbed plots showing intermediate shifts. 

```{r, echo=FALSE}
#test for significant differences in the quantiles
pc2.diffs.fullset<- pc2.diffs.sep %>% 
  filter(!species.name %in% c(insectonlyspp,fireonlyspp))


#check for significant differences in 5th percentile by disturbance type
pc2.mod5 <- lm(quant_5 ~ factor(agent), data=pc2.diffs.fullset)
par(mfrow=c(2,2))
plot(pc2.mod5)
leveneTest(quant_5 ~ factor(agent), data=pc2.diffs.fullset)
anova(pc2.mod5)
TukeyHSD(aov(quant_5 ~ factor(agent), data=pc2.diffs.fullset))

# kruskal.test(quant_5 ~ factor(agent), data=pc2.diffs.fullset)
# library(FSA)
# dunnTest(quant_5 ~ factor(agent), data=pc2.diffs.fullset, method = "bonferroni")

#check for significant differences in 50th percentile by disturbance type
pc2.mod50 <- lm(quant_50 ~ factor(agent), data=pc2.diffs.fullset)
par(mfrow=c(2,2))
plot(pc2.mod50)
leveneTest(quant_50 ~ factor(agent), data=pc2.diffs.fullset) 
anova(pc2.mod50)
#TukeyHSD(aov(quant_50 ~ factor(agent), data=pc2.diffs.fullset))


#check for significant differences in 95th percentile by disturbance type
pc2.mod95 <- lm(quant_95 ~ factor(agent), data=pc2.diffs.fullset)
par(mfrow=c(2,2))
plot(pc2.mod95) #this doesn't look great, but I think it's ok.
leveneTest(quant_95 ~ factor(agent), data=pc2.diffs.fullset)
kruskal.test(quant_95 ~ factor(agent), data=pc2.diffs.fullset)
library(FSA)
dunnTest(quant_95 ~ factor(agent), data=pc2.diffs.fullset, method = "bonferroni")

par(mfrow=c(1,1))
```


## Old percentile calculations


```{r}
#shifts in range margins
c(ellipses.posterior.list[[1]]$`1.1`[1,5:6])
matrix(c(ellipses.posterior.list[[1]]$`1.1`[1,1:4]),nrow=2,ncol=2, byrow = FALSE)

quantile.table<- matrix(ncol=8,nrow=length(ellipses.posterior.list))
colnames(quantile.table)<- c("adultpc1_2.5","adultpc1_97.5","adultpc2_2.5","adultpc2_97.5","seedlingpc1_2.5","seedlingpc1_97.5","seedlingpc2_2.5","seedlingpc2_97.5")
temp.table<-matrix(nrow=4000,ncol=8)

for(i in 1:length(ellipses.posterior.list)){
  for(j in 1:4000){
set.seed(100)

adult_pc1_quantiles<- quantile(mvrnorm(n=1000,mu=c(ellipses.posterior.list[[i]]$`1.1`[j,5:6]), Sigma = matrix(c(ellipses.posterior.list[[i]]$`1.1`[j,1:4]),nrow=2,ncol=2, byrow = FALSE))[,1],c(0.025,0.975))

adult_pc2_quantiles<- quantile(mvrnorm(n=1000,mu=c(ellipses.posterior.list[[i]]$`1.1`[j,5:6]), Sigma = matrix(c(ellipses.posterior.list[[i]]$`1.1`[j,1:4]),nrow=2,ncol=2, byrow = FALSE))[,2],c(0.025,0.975))

seedling_pc1_quantiles<- quantile(mvrnorm(n=1000,mu=c(ellipses.posterior.list[[i]]$`1.2`[j,5:6]), Sigma = matrix(c(ellipses.posterior.list[[i]]$`1.2`[j,1:4]),nrow=2,ncol=2, byrow = FALSE))[,1],c(0.025,0.975))

seedling_pc2_quantiles<- quantile(mvrnorm(n=1000,mu=c(ellipses.posterior.list[[i]]$`1.2`[j,5:6]), Sigma = matrix(c(ellipses.posterior.list[[i]]$`1.2`[j,1:4]),nrow=2,ncol=2, byrow = FALSE))[,2],c(0.025,0.975))

temp.table[j,]<-c(adult_pc1_quantiles,adult_pc2_quantiles,seedling_pc1_quantiles,seedling_pc2_quantiles)
}
quantile.table[i,]<- colMeans(temp.table)
}

##try taking means first
# quantile.table2 <- matrix(ncol=8,nrow=length(ellipses.posterior.list))
# colnames(quantile.table)<- c("adultpc1_2.5","adultpc1_97.5","adultpc2_2.5","adultpc2_97.5","seedlingpc1_2.5","seedlingpc1_97.5","seedlingpc2_2.5","seedlingpc2_97.5")
# 
# for(i in 1:length(ellipses.posterior.list)){
# set.seed(100)
# 
# adult_pc1_quantiles<- quantile(mvrnorm(n=1000,mu=c(mean(ellipses.posterior.list[[i]]$`1.1`[,5]),mean(ellipses.posterior.list[[i]]$`1.1`[,6])), Sigma = matrix(c(mean(ellipses.posterior.list[[i]]$`1.1`[,1]),mean(ellipses.posterior.list[[i]]$`1.1`[,2]),mean(ellipses.posterior.list[[i]]$`1.1`[,3]),mean(ellipses.posterior.list[[i]]$`1.1`[,4])),nrow=2,ncol=2, byrow = FALSE))[,1],c(0.025,0.975))
# 
# adult_pc2_quantiles<- quantile(mvrnorm(n=1000,mu=c(ellipses.posterior.list[[i]]$`1.1`[j,5:6]), Sigma = matrix(c(ellipses.posterior.list[[i]]$`1.1`[j,1:4]),nrow=2,ncol=2, byrow = FALSE))[2,],c(0.025,0.975))
# 
# seedling_pc1_quantiles<- quantile(mvrnorm(n=1000,mu=c(ellipses.posterior.list[[i]]$`1.2`[j,5:6]), Sigma = matrix(c(ellipses.posterior.list[[i]]$`1.2`[j,1:4]),nrow=2,ncol=2, byrow = FALSE))[1,],c(0.025,0.975))
# 
# seedling_pc2_quantiles<- quantile(mvrnorm(n=1000,mu=c(ellipses.posterior.list[[i]]$`1.2`[j,5:6]), Sigma = matrix(c(ellipses.posterior.list[[i]]$`1.2`[j,1:4]),nrow=2,ncol=2, byrow = FALSE))[2,],c(0.025,0.975))
# 
# quantile.table[i,]<- colMeans(temp.table)
#   }


quantile.df <- data.frame(quantile.table) %>% 
  mutate(names = names(ellipses.posterior.list))

quantile.diffs.df <- quantile.df %>% 
  mutate(pc1_2.5_diff = (seedlingpc1_2.5 - adultpc1_2.5)/(adultpc1_97.5 - adultpc1_2.5),
         pc1_97.5_diff = (seedlingpc1_97.5 - adultpc1_97.5)/(adultpc1_97.5 - adultpc1_2.5),
         pc2_2.5_diff = (seedlingpc2_2.5 - adultpc2_2.5)/(adultpc2_97.5 - adultpc2_2.5),
         pc2_97.5_diff = (seedlingpc2_97.5 - adultpc2_97.5)/(adultpc2_97.5 - adultpc2_2.5)) %>% 
  dplyr::select(-c(1:8)) %>% 
  separate(names,into=c("species","disturbance","extra"), sep="_") %>% 
  dplyr::select(-extra) %>% 
  left_join(species.names,by=c("species" = "species.code")) %>% 
  filter(!species.name == "singleleaf pinyon")


###example of generated points vs. actual points for first iteration of MCMC
par(mfrow=c(1,1))
plot(mvrnorm(n=1000,mu=c(ellipses.posterior.list[[1]]$`1.1`[1,5:6]), Sigma = matrix(c(ellipses.posterior.list[[1]]$`1.1`[1,1:4]),nrow=2,ncol=2, byrow = FALSE)))
points(ellipses.posterior.list[[1]]$`1.1`[1,5],ellipses.posterior.list[[1]]$`1.1`[1,6],col="red")
points(adult_data_pcapts[[1]]$PC1,adult_data_pcapts[[1]]$PC2,col="blue",)

```

```{r}
#test
names(ellipses.posterior.list)

quantile(mvrnorm(n=1000,mu=c(ellipses.posterior.list[[i]]$`1.1`[j,5:6]), Sigma = matrix(c(ellipses.posterior.list[[i]]$`1.1`[j,1:4]),nrow=2,ncol=2, byrow = FALSE))[1,],c(0.025,0.975))

test.table<-matrix(nrow=length(adult_data_pcapts),ncol=8)
colnames(test.table)<- c("adultpc1_5","adultpc1_95","adultpc2_5","adultpc2_95","seedlingpc1_5","seedlingpc1_95","seedlingpc2_5","seedlingpc2_95")

for(i in 1:length(adult_data_pcapts)){
adult_pc1<- quantile(adult_data_pcapts[[i]]$PC1,c(0.05,0.95))
adult_pc2<- quantile(adult_data_pcapts[[i]]$PC2,c(0.05,0.95))
seedling_pc1<- quantile(seedling_data_pcapts[[i]]$PC1,c(0.05,0.95))
seedling_pc2<- quantile(seedling_data_pcapts[[i]]$PC2,c(0.05,0.95))

test.table[i,]<- c(adult_pc1,adult_pc2,seedling_pc1,seedling_pc2)
}
test.table<- as.data.frame(test.table) %>% 
  mutate(name=names(adult_data_pcapts))

test.per.table<- test.table %>%   
  mutate(pc1_5_diff = (seedlingpc1_5 - adultpc1_5)/(adultpc1_95 - adultpc1_5),
         pc1_95_diff = (seedlingpc1_95 - adultpc1_95)/(adultpc1_95 - adultpc1_5),
         pc2_5_diff = (seedlingpc2_5 - adultpc2_5)/(adultpc2_95 - adultpc2_5),
         pc2_95_diff = (seedlingpc2_95 - adultpc2_95)/(adultpc2_95 - adultpc2_5)) %>% 
  dplyr::select(-c(1:8)) %>% 
  separate(name,into=c("species","disturbance","extra"), sep="_") %>% 
  dplyr::select(-extra) %>% 
  left_join(species.names,by=c("species" = "species.code")) %>% 
  filter(!(disturbance == "fire" & species.name %in% insectonlyspp)) %>% 
  filter(!species.name=="singleleaf pinyon")

ggplot(test.per.table,aes(x=disturbance,y=pc1_5_diff))+
  geom_boxplot()+
  coord_flip()
ggplot(test.per.table,aes(x=disturbance,y=pc1_95_diff))+
  geom_boxplot()+
  coord_flip()


```


```{r}

#check for normality

#pc1 2.5 fire 
ggqqplot(quantile.diffs.df %>% filter(disturbance == "fire") %>% pull(pc1_2.5_diff))
shapiro.test(quantile.diffs.df %>% filter(disturbance == "fire") %>% pull(pc1_2.5_diff))

#pc1 2.5 ID 
ggqqplot(quantile.diffs.df %>% filter(disturbance == "insect.disease") %>% pull(pc1_2.5_diff))
shapiro.test(quantile.diffs.df %>% filter(disturbance == "insect.disease") %>% pull(pc1_2.5_diff))

#pc1 2.5 none 
ggqqplot(quantile.diffs.df %>% filter(disturbance == "none") %>% pull(pc1_2.5_diff))
shapiro.test(quantile.diffs.df %>% filter(disturbance == "none") %>% pull(pc1_2.5_diff))

#pc1 97.5 fire -- NOT NORMAL
ggqqplot(quantile.diffs.df %>% filter(disturbance == "fire") %>% pull(pc1_97.5_diff))
shapiro.test(quantile.diffs.df %>% filter(disturbance == "fire") %>% pull(pc1_97.5_diff))

#pc1 97.5 ID 
ggqqplot(quantile.diffs.df %>% filter(disturbance == "insect.disease") %>% pull(pc1_97.5_diff))
shapiro.test(quantile.diffs.df %>% filter(disturbance == "insect.disease") %>% pull(pc1_97.5_diff))

#pc1 97.5 none
ggqqplot(quantile.diffs.df %>% filter(disturbance == "none") %>% pull(pc1_97.5_diff))
shapiro.test(quantile.diffs.df %>% filter(disturbance == "none") %>% pull(pc1_97.5_diff))


##pc2
#pc2 2.5 fire 
ggqqplot(quantile.diffs.df %>% filter(disturbance == "fire") %>% pull(pc2_2.5_diff))
shapiro.test(quantile.diffs.df %>% filter(disturbance == "fire") %>% pull(pc2_2.5_diff))

#pc2 2.5 ID 
ggqqplot(quantile.diffs.df %>% filter(disturbance == "insect.disease") %>% pull(pc2_2.5_diff))
shapiro.test(quantile.diffs.df %>% filter(disturbance == "insect.disease") %>% pull(pc2_2.5_diff))

#pc2 2.5 none
ggqqplot(quantile.diffs.df %>% filter(disturbance == "none") %>% pull(pc2_2.5_diff))
shapiro.test(quantile.diffs.df %>% filter(disturbance == "none") %>% pull(pc2_2.5_diff))

#pc2 97.5 fire
ggqqplot(quantile.diffs.df %>% filter(disturbance == "fire") %>% pull(pc2_97.5_diff))
shapiro.test(quantile.diffs.df %>% filter(disturbance == "fire") %>% pull(pc2_97.5_diff))

#pc2 97.5 ID
ggqqplot(quantile.diffs.df %>% filter(disturbance == "insect.disease") %>% pull(pc2_97.5_diff))
shapiro.test(quantile.diffs.df %>% filter(disturbance == "insect.disease") %>% pull(pc2_97.5_diff))

#pc2 97.5 none 
ggqqplot(quantile.diffs.df %>% filter(disturbance == "none") %>% pull(pc2_97.5_diff))
shapiro.test(quantile.diffs.df %>% filter(disturbance == "none") %>% pull(pc2_97.5_diff))
```

```{r}
#t-tests/Wilcoxen test of quantile differences

ttest.results <- data.frame(agent = NA, quant = NA, p.value = NA, estimate = NA, lowerci = NA, upperci = NA, t=NA, df=NA, wilcoxon=NA)
disturbance.list = c("fire","insect.disease","none")
quant.list = colnames(quantile.diffs.df)[3:6]
  
for(i in 1:length(disturbance.list)){
  for(j in 1:length(quant.list)){
test.temp<- quantile.diffs.df %>% filter(disturbance == disturbance.list[i]) %>% 
  pull(quant.list[j]) %>% 
  t.test(y = NULL, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95)
  
ttest.results<- rbind(ttest.results,c(agent =  disturbance.list[i], quant = quant.list[j], p.value = test.temp$p.value, estimate = test.temp$estimate, lowerci = test.temp$conf.int[1], upperci = test.temp$conf.int[2], t=test.temp$statistic, df=test.temp$parameter, wilcoxon="n"))
  }}

#pc1 97.5 fire
nonnormal.dist.list<- c("fire")
nonnormal.quant.list<- c("pc1_97.5_diff")

nonnormal.results <- list(
# quantile.diffs.df %>% filter(disturbance == "none") %>%
#   pull("pc2_2.5_diff") %>%
#   t.test(y = NULL, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95),
# quantile.diffs.df %>% filter(disturbance == "insect.disease") %>%
#   pull("pc1_2.5_diff") %>%
#   t.test(y = NULL, alternative = "two.sided", mu = 0, paired = FALSE, var.equal = FALSE, conf.level = 0.95),
 quantile.diffs.df %>% filter(disturbance == "fire") %>%
  pull("pc1_97.5_diff") %>%
  wilcox.test(y = NULL, alternative = "two.sided", mu = 0, conf.int = TRUE, conf.level = 0.95)
)

nonnormal.results.table<-data.frame(agent = NA, quant = NA, p.value = NA, estimate = NA, lowerci = NA, upperci = NA, t=NA, df=NA, wilcoxon=NA)
for(i in 1){
nonnormal.results.table<- rbind(nonnormal.results.table,c(agent =  nonnormal.dist.list[i], quant = nonnormal.quant.list[i], p.value = nonnormal.results[[i]]$p.value, estimate = nonnormal.results[[i]]$estimate, lowerci = nonnormal.results[[i]]$conf.int[1], upperci = nonnormal.results[[i]]$conf.int[2], t=nonnormal.results[[i]]$statistic, df=NA, wilcoxon="y"))
}


percentile.results<- ttest.results %>% 
  rows_update(nonnormal.results.table,by=c("agent","quant")) %>%  #put in wilcoxen results
  separate(quant,into=c("pc","quant","extra"),sep="_") %>% 
  filter(!is.na(agent)) %>% 
  mutate(estimate=as.numeric(estimate),
         upperci = as.numeric(upperci),
         lowerci = as.numeric(lowerci)) %>% 
  dplyr::select(-extra) %>% 
  bind_rows(centroid.results.pc1,centroid.results.pc2) %>%  #add in centroids
  mutate(quant = factor(quant, levels=c("2.5","centroid","97.5")))
```

```{r}
#plot new figure 2

##dataframe for stars
pvalues.percentileshifts<- percentile.results %>% 
  mutate(sym.pos = ifelse(p.value<0.05,0.25,NA))
##

ggplot(percentile.results %>% filter(pc=="pc1"), aes(x=agent, y=estimate, color = agent, fill=agent))+
  geom_point(shape = 23, size = 3.5)+
  geom_errorbar(aes(ymin=lowerci, ymax=upperci),width=0.3, size=1)+
  #scale_color_manual(values = c("#d7191c", "#fdae61","#2b83ba"))+
  #scale_fill_manual(values = c("#d7191c", "#fdae61","#2b83ba"))+
  scale_color_manual(values = disturbance.colors)+
  scale_fill_manual(values = disturbance.colors)+
  geom_text(data = subset(pvalues.percentileshifts %>% filter(pc=="pc1"), !is.na(sym.pos)), aes(x=agent, y=sym.pos), label="*", color="black",size=8)+
  geom_hline(yintercept=0, color="black", lty=2, size=1)+
  ylim(c(-.3,.3))+
  coord_flip()+
facet_wrap(~ factor(quant,levels=c("2.5","centroid","97.5")))+
  ggtitle("Shifts on PC1")+
  ylab("Difference on PC1")+
  xlab("")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust = 1), axis.text = element_text(size=12, color="black"), text = element_text(size=12),legend.text = element_text(size=12),strip.text = element_text(color="black",size=12), legend.position = "none")

ggplot(percentile.results %>% filter(pc=="pc2"), aes(x=agent, y=estimate, color = agent, fill=agent))+
  geom_point(shape = 23, size = 3.5)+
  geom_errorbar(aes(ymin=lowerci, ymax=upperci),width=0.3, size=1)+
  #scale_color_manual(values = c("#d7191c", "#fdae61","#2b83ba"))+
  #scale_fill_manual(values = c("#d7191c", "#fdae61","#2b83ba"))+
  scale_color_manual(values = disturbance.colors)+
  scale_fill_manual(values = disturbance.colors)+
  geom_text(data = subset(pvalues.percentileshifts %>% filter(pc=="pc2"), !is.na(sym.pos)), aes(x=agent, y=sym.pos), label="*", color="black",size=8)+
  geom_hline(yintercept=0, color="black", lty=2, size=1)+
  ylim(c(-.3,.3))+
  facet_wrap(~ factor(quant,levels=c("2.5","centroid","97.5")))+
  ggtitle("Shifts on PC2")+
  ylab("Difference on PC2")+
  xlab("")+
  theme_bw()+
  theme(axis.text.x = element_text(angle=45, hjust = 1), axis.text = element_text(size=12, color="black"), text = element_text(size=12),legend.text = element_text(size=12),strip.text = element_text(color="black",size=12), legend.position = "none")

```


## Checking inventory type

```{r}
plot<- read_csv("annual_plots2020.csv")
cond<- read_csv("compiled_data_annual2020/COND.csv")

plot.data <- dist %>% 
  left_join(plot, by=c("PLT_CN" = "CN"))

#forest vs. non-forest
plot.data %>% 
  group_by(PLOT_STATUS_CD) %>% 
  dplyr::summarise(n=n()) ##so i do have non-forest plots in here, but they were all sampled...is this a problem??? 1 = sampled - at least one accessible forest land condition on plot, 2 = sampled - no accessible forest land condition on plot
plot.data %>% 
  group_by(KINDCD) %>% 
  dplyr::summarise(n=n()) #all either initial installation, re-measurement or replacement of National design plot
plot.data %>% 
  group_by(PLOT_NONSAMPLE_REASN_CD) %>% 
  dplyr::summarise(n=n()) #none of them have a nonsample reason, which makes sense because they were all sampled
plot.data %>% 
  group_by(NF_SAMPLING_STATUS_CD) %>% 
  dplyr::summarise(n=n()) #lots of 0's here - which means "Nonforest plots / conditions are not inventoried" and some 1's which mean "Nonforest plots / conditions are inventoried". I'm wondering if this is a mistake because according to the PLOT_STATUS_CD, they were all sampled, but maybe this just means the condition data is not complete??" 
plot.data %>% 
  group_by(NF_PLOT_STATUS_CD) %>% 
  dplyr::summarise(n=n()) # 1 = sampled, one accessible nonforest land condition on plot, 2 = sampled, no nonforest land condition present on plot, 3 = nonsampled nonforest
plot.data %>% 
  group_by(INTENSITY) %>% 
  dplyr::summarise(n=n()) #15 of the plots had intensified sampling (more plots per acre than federal base grid)

plot.data %>% 
  filter(PLOT_STATUS_CD==2) %>% #sampled - no accessible forest land on plot
  group_by(NF_SAMPLING_STATUS_CD) %>% 
  dplyr::summarise(n=n()) # 0 = nonforest plots / conditions are not inventoried, #1 = nonforest plots / conditions are inventoried

################
plot.codes.summary<- plot.data %>% 
  group_by(PLOT_STATUS_CD,NF_PLOT_STATUS_CD, NF_SAMPLING_STATUS_CD) %>% 
  dplyr::summarise(n=n()) 

write.csv(plot.codes.summary, "FIAplots_plotcode_summary.csv")

#look at some of the unique combinations
View(cond %>% 
  filter(PLT_CN %in% c(plot.data %>% 
  filter(PLOT_STATUS_CD==1,is.na(NF_PLOT_STATUS_CD),is.na(NF_SAMPLING_STATUS_CD)) %>% 
  pull(unique(PLT_CN)))))

#look at which combinations have trees recorded
View(tree %>% 
       filter(PLT_CN %in% 
                c(cond %>% 
  filter(PLT_CN %in% c(plot.data %>% 
  filter(PLOT_STATUS_CD==2,NF_PLOT_STATUS_CD==2,NF_SAMPLING_STATUS_CD==1) %>% 
  pull(unique(PLT_CN)))) %>% 
    pull(PLT_CN))))

tree %>% 
       filter(PLT_CN %in% 
                c(cond %>% 
  filter(PLT_CN %in% c(plot.data %>% 
  filter(PLOT_STATUS_CD==2,is.na(NF_PLOT_STATUS_CD),NF_SAMPLING_STATUS_CD==0) %>% 
  pull(PLT_CN) %>% 
    unique())) %>% 
    pull(PLT_CN) %>% 
    unique())) %>% 
  pull(PLT_CN) %>% 
  unique() %>% 
  length() #128 plots with trees, but no disturbance data recorded by FIA

tree %>% 
       filter(PLT_CN %in% 
                c(cond %>% 
  filter(PLT_CN %in% c(plot.data %>% 
  filter(PLOT_STATUS_CD==2,NF_PLOT_STATUS_CD==2,NF_SAMPLING_STATUS_CD==1) %>% 
  pull(PLT_CN) %>% 
    unique())) %>% 
    pull(PLT_CN) %>% 
    unique())) %>% 
  pull(PLT_CN) %>% 
  unique() %>% 
  length() #28 plots with trees, but no disturbance data recorded by FIA

tree %>% 
       filter(PLT_CN %in% 
                c(cond %>% 
  filter(PLT_CN %in% c(plot.data %>% 
  filter(PLOT_STATUS_CD==2,NF_PLOT_STATUS_CD==1,NF_SAMPLING_STATUS_CD==1) %>% 
  pull(PLT_CN) %>% 
    unique())) %>% 
    pull(PLT_CN) %>% 
    unique())) %>% 
  pull(PLT_CN) %>% 
  unique() %>% 
  length() #242 plots with trees, but no disturbance data recorded by FIA

tree %>% 
       filter(PLT_CN %in% 
                c(cond %>% 
  filter(PLT_CN %in% c(plot.data %>% 
  filter(PLOT_STATUS_CD==1,is.na(NF_PLOT_STATUS_CD),NF_SAMPLING_STATUS_CD==0) %>% 
  pull(PLT_CN) %>% 
    unique())) %>% 
    pull(PLT_CN) %>% 
    unique())) %>% 
  pull(PLT_CN) %>% 
  unique() %>% 
  length() #9781 plots with trees, but no disturbance data recorded by FIA

tree %>% 
       filter(PLT_CN %in% 
                c(cond %>% 
  filter(PLT_CN %in% c(plot.data %>% 
  filter(PLOT_STATUS_CD==1,is.na(NF_PLOT_STATUS_CD),NF_SAMPLING_STATUS_CD==1) %>% 
  pull(PLT_CN) %>% 
    unique())) %>% 
    pull(PLT_CN) %>% 
    unique())) %>% 
  pull(PLT_CN) %>% 
  unique() %>% 
  length() #9781 plots with trees, but no disturbance data recorded by FIA

tree %>% 
       filter(PLT_CN %in% 
                c(cond %>% 
  filter(PLT_CN %in% c(plot.data %>% 
  filter(PLOT_STATUS_CD==1,is.na(NF_PLOT_STATUS_CD),is.na(NF_SAMPLING_STATUS_CD)) %>% 
  pull(PLT_CN) %>% 
    unique())) %>% 
    pull(PLT_CN) %>% 
    unique())) %>% 
  pull(PLT_CN) %>% 
  unique() %>% 
  length() #9781 plots with trees, but no disturbance data recorded by FIA

######################

View(plot.data %>% 
       filter(PLOT_STATUS_CD == 1 & NF_SAMPLING_STATUS_CD == 1))

View(plot.data %>% 
  filter(PLOT_STATUS_CD == 2 & NF_SAMPLING_STATUS_CD == 0))

##looking at how many of the plots that weren't sampled (supposedly) have tree data. 
View(tree %>% 
       filter(PLT_CN %in% c(plot.data %>% 
                filter(PLOT_STATUS_CD == 2 & NF_SAMPLING_STATUS_CD == 0) %>% 
                pull(PLT_CN))))
tree %>% 
       filter(PLT_CN %in% c(plot.data %>% 
                filter(PLOT_STATUS_CD == 2 & NF_SAMPLING_STATUS_CD == 0) %>% 
                pull(PLT_CN))) %>% 
  pull(PLT_CN) %>% 
  unique() %>% 
  length() #128 out of 28,442 plots that had "conditions not inventoried" have tree data

plot.data %>% 
  filter(PLOT_STATUS_CD == 2 & NF_SAMPLING_STATUS_CD == 0) %>% 
  pull(PLT_CN) %>% 
  unique() %>% 
  length()

tree %>% 
       filter(PLT_CN %in% c(plot.data %>% 
                filter(PLOT_STATUS_CD == 2 & NF_SAMPLING_STATUS_CD == 1) %>% #conditions inventoried
                pull(PLT_CN))) %>% 
  pull(PLT_CN) %>% 
  unique() %>% 
  length() #270 out of 1,101

tree %>% 
       filter(PLT_CN %in% c(plot.data %>% 
                filter(PLOT_STATUS_CD == 2 & is.na(NF_SAMPLING_STATUS_CD)) %>% #NA for conditions inventoried or not
                pull(PLT_CN))) %>% 
  pull(PLT_CN) %>% 
  unique() %>% 
  length() #no data on trees in these plots

tree %>% 
       filter(PLT_CN %in% c(plot.data %>% 
                filter(PLOT_STATUS_CD == 1) %>% #sampled with at least one accessible forest land condition on plot
                pull(PLT_CN))) %>% 
  pull(PLT_CN) %>% 
  unique() %>% 
  length() #19,334 of the 19,587 plots in this category have tree data. 
###

#########
##look at subplot data
subplot<- read_csv("compiled_data_annual2020/SUBPLOT.csv")
subplotcond<- read_csv("compiled_data_annual2020/SUBP_COND.csv")


subplot %>% 
  filter(PLT_CN %in% c(plot.data %>% pull(PLT_CN))) %>% 
  group_by(SUBPCOND) %>% 
  dplyr::summarise(n=n(),plots=length(unique(PLT_CN)))

subplotcond %>% 
  filter(PLT_CN %in% c(plot.data %>% pull(PLT_CN))) %>%
  group_by(CONDID) %>% 
  dplyr::summarise(n=n(),plots=length(unique(PLT_CN)))
##########


#look at how many "conditions are not inventoried" plots have condition data 
View(cond %>% 
       filter(PLT_CN %in% c(plot.data %>% 
                filter(PLOT_STATUS_CD == 2 & NF_SAMPLING_STATUS_CD == 0) %>% 
                pull(PLT_CN)))) # all of the 28,442 plots have condition data with COND_STATUS_CD = 2 (nonforest land), COND_NONSAMPLE_REASON_CD = NA. None of them have a FORTYPCD (forest type) or FLDTYPCD (field forest type) code or any other data related to slope, aspect, disturbance and treatment types. This must be why they are labelled as "conditions not inventoried". The only issue I see here is that disturbances in these plots cannot be categorized by FIA condition data, so we may be missing small fires or insect outbreaks that occurred in these plots but were not picked up by the spatial data layers.   
View(cond %>% 
       filter(PLT_CN %in% c(plot.data %>% 
                filter(PLOT_STATUS_CD == 2 & NF_SAMPLING_STATUS_CD == 1) %>% 
                pull(PLT_CN)))) #most of these have data on disturbance
View(cond %>% 
       filter(PLT_CN %in% c(plot.data %>% 
                filter(PLOT_STATUS_CD == 2 & is.na(NF_SAMPLING_STATUS_CD)) %>% 
                pull(PLT_CN)))) #about 1,060 out of 10,323 of these plots have a disturbance code

cond %>% 
       filter(PLT_CN %in% c(plot.data %>% 
                filter(PLOT_STATUS_CD == 2 & is.na(NF_SAMPLING_STATUS_CD)) %>% 
                pull(PLT_CN))) %>% 
  pull(PLT_CN) %>% 
  unique() %>% 
  length()

#all of the nonforest sampled plots together (n = 39,866)

View(cond %>% 
       filter(PLT_CN %in% c(plot.data %>% 
                filter(PLOT_STATUS_CD == 2) %>% #sampled with at least one forest land condition on plot
                pull(PLT_CN))) %>% 
  filter_at(vars(c(SLOPE:PRESNFCD,FLDAGE:MIXEDCONFCD,CARBON_DOWN_DEAD:CARBON_UNDERSTORY_BG,SOIL_ROOTING_DEPTH_PNW:LAND_COVER_CLASS_CD)), any_vars(!is.na(.))) %>% 
    dplyr::select(c(SLOPE:PRESNFCD,FLDAGE:MIXEDCONFCD,CARBON_DOWN_DEAD:CARBON_UNDERSTORY_BG,SOIL_ROOTING_DEPTH_PNW:LAND_COVER_CLASS_CD))) #this shows the plots that are not NA for all condition data (i.e. that had some level of condition data recorded)

cond %>% 
       filter(PLT_CN %in% c(plot.data %>% 
                filter(PLOT_STATUS_CD == 2) %>% #sampled with at least one forest land condition on plot
                pull(PLT_CN))) %>% 
  filter_at(vars(c(SLOPE:PRESNFCD,FLDAGE:MIXEDCONFCD,CARBON_DOWN_DEAD:CARBON_UNDERSTORY_BG,SOIL_ROOTING_DEPTH_PNW:LAND_COVER_CLASS_CD)), any_vars(!is.na(.))) %>%
  pull(PLT_CN) %>% 
  unique() %>% 
  length() #there are 30,604 plots out of the 39,866 plots with some condition data recorded. Most of the time when disturbance wasn't recorded, the field "PRESENFCD" was - which is a code indicating the current nonforest land use for conditions that were previously classified as forest but are now classified as nonforest 
    

cond %>% 
       filter(PLT_CN %in% c(plot.data %>% 
                filter(PLOT_STATUS_CD == 2) %>% #sampled with at least one forest land condition on plot
                pull(PLT_CN))) %>% 
  group_by(DSTRBCD1) %>% 
  dplyr::summarise(n=n())


View(cond %>% 
       filter(PLT_CN %in% c(plot.data %>% 
                filter(PLOT_STATUS_CD == 1) %>% #sampled with at least one forest land condition on plot
                pull(PLT_CN)))) #these plots have data on forest type, slope, aspect, disturbance, etc. - expect for where COND_PROP_UNADJ = 0, which appear to all be the second entry for a plot. 
###


plot.data %>% 
  group_by(KINDCD) %>% 
  dplyr::summarise(n=n())
plot.data %>% 
  group_by(DESIGNCD) %>% 
  dplyr::summarise(n=n())
plot.data %>% 
  group_by(MEASYEAR.y) %>% 
  dplyr::summarise(n=n())
View(plot.data %>% 
  group_by(MANUAL) %>% 
  dplyr::summarise(n=n(), years = list(paste(unique(MEASYEAR.y)))))

seedling %>% 
  filter(PLT_CN %in% plot.data$PLT_CN) %>% 
  pull(PLT_CN) %>% 
  unique() %>% 
  length()

plot.data %>% 
  filter(PLT_CN %in% unique(seedling$PLT_CN)) %>% 
  group_by(PLOT_STATUS_CD) %>% 
  dplyr::summarise(n=n())
 
tree %>% 
  filter(PLT_CN %in% plot.data$PLT_CN) %>% 
  pull(PLT_CN) %>% 
  unique() %>% 
  length()

plot.data %>% 
  left_join(cond, by="PLT_CN") %>% 
  group_by(PRESNFCD) %>% 
  dplyr::summarise(n())
plot.data %>% 
  left_join(cond, by="PLT_CN") %>% 
  group_by(FLDTYPCD) %>% 
  dplyr::summarise(n())
plot.data %>% 
  left_join(cond, by="PLT_CN") %>% 
  group_by(FLDSZCD) %>% 
  dplyr::summarise(n())
View(plot.data %>% 
  left_join(cond, by="PLT_CN") %>% 
  filter(is.na(FLDSZCD)) 
    )  
plot.data %>% 
  left_join(cond, by="PLT_CN") %>% 
  group_by(CONDPROP_UNADJ) %>% 
  dplyr::summarise(n()) 
View(plot.data %>% 
       left_join(cond, by="PLT_CN") %>% 
       filter(CONDPROP_UNADJ == 1) %>% 
       filter(COND_STATUS_CD == 1 & NF_SAMPLING_STATUS_CD == 1))  

View(plot.data %>% 
  filter(MANUAL < 1) %>% 
  group_by(STATECD) %>% 
  dplyr::summarise(list(paste(unique(MEASYEAR.y)))))

View(plot.data %>% 
  filter(MANUAL >= 1) %>% 
  group_by(MEASYEAR.y) %>% 
  dplyr::summarise(list(paste(unique(STATECD)))))
```


```{r}
#see how many plots have trees and seedlings recorded on them
tree %>% 
  filter(PLT_CN %in% plot.data$PLT_CN) %>% 
  pull(PLT_CN) %>% 
  unique() %>% 
  length()#19,732

seedling %>% 
  filter(PLT_CN %in% plot.data$PLT_CN) %>% 
  filter(!PLT_CN %in% c(tree %>% 
  filter(PLT_CN %in% plot.data$PLT_CN) %>% 
  pull(PLT_CN) %>% 
  unique())
  ) #there are 343 plots with seedlings but no trees

seedling %>% 
  filter(PLT_CN %in% plot.data$PLT_CN) %>% 
  filter(!PLT_CN %in% c(tree %>% 
  filter(PLT_CN %in% plot.data$PLT_CN) %>% 
  pull(PLT_CN) %>% 
  unique())
  ) %>% 
  pull(PLT_CN)
```


## >1 Condition Plots 

I'm going to look at where the plots were that I excluded based on being in more than one condition. 

```{r}
#FIA data for WA, OR, CA, AZ, CO, NM, NV, ID, UT, WY, NE, SD, ND, MT, KS, OK, TX

## updated 1/23/2020 for new FIA data. Restricted to only Interior West plots since that's all we have coordinates for. 
##read in FIA tree & regen data 
cond<- read_csv("compiled_data_annual2020/COND.csv")
survey<- read_csv("compiled_data_annual2020/SURVEY.csv")

##subset to public plots in more than one condition, forested and unforested
cond %>% 
  filter(OWNCD < 40) %>% 
  filter(COND_STATUS_CD == 2) %>% 
  dplyr::select(CONDPROP_UNADJ) %>% 
  ggplot(aes(x=CONDPROP_UNADJ))+
  geom_histogram()

cond %>% 
  filter(OWNCD < 40) %>% 
  filter(COND_STATUS_CD == 2) %>% 
  left_join(plot,by=c("PLT_CN" = "CN")) %>% 
  dplyr::select(ELEV) %>% 
  ggplot(aes(x=ELEV))+
  geom_histogram()+
  ggtitle("Elevation of public non-forested plots")

nonforest.plots <- cond %>% 
  filter(OWNCD < 40) %>% 
  filter(COND_STATUS_CD == 2) %>% 
  left_join(plot,by=c("PLT_CN" = "CN"))

forest.plots.multconds <- cond %>% 
  filter(OWNCD < 40) %>% 
  filter(COND_STATUS_CD == 1) %>% 
  filter(CONDPROP_UNADJ < 1) %>% 
  left_join(plot,by=c("PLT_CN" = "CN"))

usa<- map_data("state")

ggplot()+
  geom_polygon(data=usa,aes(x=long, y=lat, group=group),
                color="black", fill="lightblue")+
  geom_point(data=forest.plots.multconds, aes(x=LON, y=LAT))+
  coord_quickmap()

#look at multiple conditions plots that were multiple forest types (ecotones)

multcond_summary<- cond %>% 
  filter(OWNCD < 40) %>% 
  filter(CONDPROP_UNADJ < 1) %>% 
  group_by(PLT_CN) %>% 
  dplyr::summarise(fldyps = n_distinct(FLDTYPCD,na.rm=TRUE),
                   fortyps = n_distinct(FORTYPCD,na.rm=TRUE),
                   owners = n_distinct(OWNGRPCD,na.rm=TRUE),
                   standsize = n_distinct(STDSZCD,na.rm=TRUE),
                   fldstandsize = n_distinct(FLDSZCD,na.rm=TRUE),
                   productivity = n_distinct(SITECLCD,na.rm=TRUE),
                   disturbance = n_distinct(DSTRBCD1,na.rm=TRUE),
                   treatment = n_distinct(TRTCD1,na.rm=TRUE),
                   nonforestcd = n_distinct(PRESNFCD,na.rm=TRUE),
                   habitat = n_distinct(HABTYPCD1,na.rm=TRUE),
                   coverclass = n_distinct(LAND_COVER_CLASS_CD, na.rm=TRUE),
                   coverclass_ret = n_distinct(LAND_COVER_CLASS_CD_RET, na.rm=TRUE)
                   )

View(multcond_summary %>% 
  filter(if_any(.cols = fldyps:coverclass, .fns = ~.> 1)))

multcond_summary %>% 
  filter(if_any(.cols = fldyps:coverclass, .fns = ~.> 1)) %>% 
  filter(disturbance <2)

multcond_summary %>% 
  filter(if_any(.cols = fldyps:coverclass, .fns = ~.> 1)) %>% 
  filter(disturbance <2) %>% 
  filter(fldyps <2) %>% 
  filter(fortyps<2)

##all plots with more than one field forest type
View(cond %>% 
  filter(PLT_CN %in% c(multcond_summary %>% 
  filter(if_any(.cols = fldyps:coverclass, .fns = ~.> 1)) %>% 
  filter(disturbance <2) %>% 
  filter(fldyps > 1) %>% 
  pull(PLT_CN) %>% 
  unique())) %>% 
  group_by(PLT_CN) %>% 
  dplyr::summarise(paste(list(unique(FLDTYPCD))))) #1201

##all plots with more than one algorithm derived forest type, that didn't have more than one field forest type 
cond %>% 
  filter(PLT_CN %in% c(multcond_summary %>% 
  filter(if_any(.cols = fldyps:coverclass, .fns = ~.> 1)) %>% 
  filter(disturbance <2) %>% 
  filter(fortyps > 1 & fldyps < 2) %>% 
  pull(PLT_CN) %>% 
  unique())) %>% 
  group_by(PLT_CN) %>% 
  dplyr::summarise(paste(list(unique(FORTYPCD)))) #314

#there are only 4 plots that are partially privately owned
View(cond %>% 
  filter(PLT_CN %in% c(multcond_summary %>% 
  filter(if_any(.cols = fldyps:coverclass, .fns = ~.> 1)) %>% 
  filter(disturbance <2) %>% 
  filter(owners > 1) %>% 
  pull(PLT_CN) %>% 
  unique())) %>% 
  group_by(PLT_CN) %>% 
  dplyr::summarise(owners = paste(list(unique(OWNGRPCD)))))

#plots that have multiple stand sizes that don't have different forest types
cond %>% 
  filter(PLT_CN %in% c(multcond_summary %>% 
  filter(if_any(.cols = fldyps:coverclass, .fns = ~.> 1)) %>% 
  filter(disturbance <2) %>% 
  filter(fortyps < 2 & fldyps < 2 & standsize > 1) %>% 
  pull(PLT_CN) %>% 
  unique())) %>% 
  group_by(PLT_CN) %>% 
  dplyr::summarise(paste(list(unique(STDSZCD)))) #253

#number of plots I'd be excluding if I used plots with multiple conditions AS LONG AS they didn't have multiple disturbances or treatments or owners on them 
multcond_summary %>% 
  filter(disturbance < 2, treatment < 2, owners <2) #8,158 plots 

##why are the plots in different conditions?
condition.comparison<- cond %>% 
  filter(PLT_CN %in% 
            c(multcond_summary %>% 
            filter(disturbance < 2, treatment < 2, owners <2) %>% 
            filter(!if_any(.cols = fldyps:coverclass, .fns = ~ . >1)) %>% 
            pull(PLT_CN))
  ) %>% 
  group_by(PLT_CN) %>% 
  dplyr::summarise_all(n_distinct,na.rm=TRUE) 

View(condition.comparison %>% 
  filter(if_any(.cols = colnames(condition.comparison)[4:ncol(condition.comparison)], .fns = ~.>1)))

largecondstatusplots<- cond %>% filter(PLT_CN %in% c(
condition.comparison %>% 
  filter(if_any(.cols = colnames(condition.comparison)[4:ncol(condition.comparison)], .fns = ~.>1)) %>% 
  filter(COND_STATUS_CD > 1) %>% 
  pull(PLT_CN) %>% 
  unique())
) %>% 
  group_by(PLT_CN) %>% 
  dplyr::summarise(COND_STATUS_CD > 2) %>% 
  filter(`COND_STATUS_CD > 2` == "TRUE") %>% 
  pull(PLT_CN) %>% 
  unique()

#number of plots I'd be excluding if I used plots with multiple conditions AS LONG AS they didn't have multiple disturbances or treatments or owners on them 
multcond_summary %>% 
  filter(disturbance < 2, treatment < 2, owners <2) %>% 
  filter(! PLT_CN %in% largecondstatusplots)  #6,733 plots 


```

##Tree status exploration
This plot shows the number of trees with STATUS codes of 0-3 across all plots used in the analysis, separated by disturbance category. Status codes are as follows:

* 0 no status. Tree is not in sample because either it was incorrectly tallied in the last sample, not currently tallied due to procedural change, or natural causes. 
* 1 live
* 2 dead
* 3 removed by human activity 

Burned plots had the most dead trees (45%), followed by insect/disease (41%), harvest (19%) and none (18%). Harvested plots also had 5% of trees classified as “removed”.
```{r, echo=FALSE}
#look at tree status codes
tree.status <- dist %>% 
  left_join(tree, by="PLT_CN") %>% 
  group_by(Agent) %>%
  filter(!is.na(STATUSCD)) %>% 
  dplyr::summarise(count(STATUSCD))

ggplot(tree.status, aes(x=Agent, y=freq, fill=factor(x)))+
  geom_bar(position = "fill", stat = "identity")+
  scale_y_continuous(labels = scales::percent_format())+
  scale_fill_brewer(palette = "Set1")
# 0 = no status (tree not presently in the sample), 1 = live, 2 = dead, 3 = removed
```

```{r, echo=FALSE, results='hide'}
#percent of harvested plots with status = 3
tree.status %>% 
  filter(Agent == "harvest", x == 3) %>% 
  pluck("freq") /
tree.status %>% 
  filter(Agent == "harvest") %>% 
  pull("freq") %>% 
  sum()*100
#percent of harvested plots with status = 2
tree.status %>% 
  filter(Agent == "harvest", x == 2) %>% 
  pluck("freq") /
tree.status %>% 
  filter(Agent == "harvest") %>% 
  pull("freq") %>% 
  sum()*100
#percent of ID plots with status = 2
tree.status %>% 
  filter(Agent == "insect.disease", x == 2) %>% 
  pluck("freq") /
tree.status %>% 
  filter(Agent == "insect.disease") %>% 
  pull("freq") %>% 
  sum()*100
#percent of none plots with status = 2
tree.status %>% 
  filter(Agent == "none", x == 2) %>% 
  pluck("freq") /
tree.status %>% 
  filter(Agent == "none") %>% 
  pull("freq") %>% 
  sum()*100
#percent of fire plots with status = 2
tree.status %>% 
  filter(Agent == "fire", x == 2) %>% 
  pluck("freq") /
tree.status %>% 
  filter(Agent == "fire") %>% 
  pull("freq") %>% 
  sum()*100
```


##Species Investigation
Ed asked the question of what the species composition is like in plots that are showing "contractions" AWAY from wet areas. So I am going to look at the species that experienced significant shifts away from wet areas to see if I can tease this out. 

```{r}
pc2.diffs.sep %>% 
  filter(quant_95 < 0)

ggplot(adult_data_pcapts[["93_insect.disease_cvars"]],aes(x=PC1,y=PC2))+
  geom_point()+
  geom_point(data=seedling_data_pcapts[["93_insect.disease_cvars"]],aes(x=PC1,y=PC2),color="purple")

contr.plots.id93<- adult_data_pcapts[["93_insect.disease_cvars"]] %>% 
  filter(PC2 > 2) %>% 
  filter(!PLT_CN %in% seedling_data_pcapts[["93_insect.disease_cvars"]]$PLT_CN) %>% 
  pull(PLT_CN)

adults %>% 
  filter(PLT_CN %in% contr.plots.id93) %>% 
  group_by(PLT_CN) %>% 
  ggplot(aes(y=factor(SPCD),x=factor(PLT_CN),fill=factor(SPCD)))+
  geom_bar(stat='identity')

locs93<- adults %>% 
  filter(SPCD == '93') %>% 
  left_join(plot, by=c("PLT_CN"="CN")) %>% 
  dplyr::select(LAT,LON)

contr.plots.id93.locs<- plot %>% 
  filter(CN %in% contr.plots.id93) %>% 
  dplyr::select(LAT, LON)

usa<- map_data("state")

ggplot()+
  geom_polygon(data=usa,aes(x=long, y=lat, group=group),
                color="black", fill="lightblue")+
  geom_point(data=locs93, aes(x=LON, y=LAT))+
  geom_point(data=contr.plots.id93.locs, aes(x=LON, y=LAT), color="purple")+
  coord_quickmap()

##lodgepole
ggplot(adult_data_pcapts[["108_insect.disease_cvars"]],aes(x=PC1,y=PC2))+
  geom_point()+
  geom_point(data=seedling_data_pcapts[["108_insect.disease_cvars"]],aes(x=PC1,y=PC2),color="purple")

contr.plots.id108<- adult_data_pcapts[["108_insect.disease_cvars"]] %>% 
  filter(PC2 > 1.5) %>% 
  filter(!PLT_CN %in% seedling_data_pcapts[["108_insect.disease_cvars"]]$PLT_CN) %>% 
  pull(PLT_CN)

adults %>% 
  filter(PLT_CN %in% contr.plots.id108) %>% 
  group_by(PLT_CN) %>% 
  ggplot(aes(y=factor(SPCD),x=factor(PLT_CN),fill=factor(SPCD)))+
  geom_bar(stat='identity')

locs108<- adults %>% 
  filter(SPCD == '108') %>% 
  left_join(plot, by=c("PLT_CN"="CN")) %>% 
  dplyr::select(LAT,LON)

contr.plots.id108.locs<- plot %>% 
  filter(CN %in% contr.plots.id108) %>% 
  dplyr::select(LAT, LON)

usa<- map_data("state")

ggplot()+
  geom_polygon(data=usa,aes(x=long, y=lat, group=group),
                color="black", fill="lightblue")+
  geom_point(data=locs108, aes(x=LON, y=LAT))+
  geom_point(data=contr.plots.id108.locs, aes(x=LON, y=LAT), color="purple")+
  coord_quickmap()

```


##Original Trait analysis by PC

###PC1


```{r}
#models looking at seed weight, shade tolerance, and their interaction with disturbance on shift magnitude for PC1. 

#removed pinyon and gambel oak to make residuals vs. leverage graph better. 
pc1_quant5_traitmod <- lm(log(abs(quant_5)) ~ agent*(shade.tolerance + scale(mean.weight.mg)), data=pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),])
par(mfrow=c(2,2))
plot(pc1_quant5_traitmod)
summary(pc1_quant5_traitmod)
anova(pc1_quant5_traitmod)

pc1_quant50_traitmod <- lm(log(abs(quant_50)) ~ agent*(shade.tolerance + scale(mean.weight.mg)), data=pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),])
par(mfrow=c(2,2))
plot(pc1_quant50_traitmod)
summary(pc1_quant50_traitmod)
anova(pc1_quant50_traitmod)

pc1_quant95_traitmod <- lm(log(abs(quant_95)) ~ agent*(shade.tolerance + scale(mean.weight.mg)), data=pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),])
par(mfrow=c(2,2))
plot(pc1_quant95_traitmod)
summary(pc1_quant95_traitmod)
anova(pc1_quant95_traitmod)
```

The below plot shows the relationship between absolute value of shifts on PC1 in 5th, 50th, and 95th quantiles vs. the shade tolerance of species, using values in Niinemets & Valladares (2006).

```{r, echo=FALSE}
#plot relationships
library(RColorBrewer)
####shade tolerance
shade5 <- ggplot(pc1.diffs.traits, aes(x=shade.tolerance, y=abs(quant_5),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC1 5th quantile")+
  ylab("Shift Magnitude")+
  xlab("Shade Tolerance Score")+
  scale_y_continuous(limits=c(-0.05,0.2),breaks=c(0,0.1,0.2))+
  theme(legend.position = "none",axis.text = element_text(color="black",size=10),text = element_text(size=10),legend.text = element_text(size=10),strip.text = element_text(color="black",size=10))

shade50 <- ggplot(pc1.diffs.traits, aes(x=shade.tolerance, y=abs(quant_50),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  #facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC1 50th quantile")+
  ylab("Shift Magnitude")+
  xlab("Shade Tolerance Score")+
  scale_y_continuous(limits=c(-0.05,0.2),breaks=c(0,0.1,0.2))+
  theme(legend.position = "none",axis.text = element_text(color="black",size=10),text = element_text(size=10),legend.text = element_text(size=10),strip.text = element_text(color="black",size=10))

shade95 <- ggplot(pc1.diffs.traits, aes(x=shade.tolerance, y=abs(quant_95),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC1 95th quantile")+
  ylab("Shift Magnitude")+
  xlab("Shade Tolerance Score")+
  scale_y_continuous(limits=c(-0.05,0.2),breaks=c(0,0.1,0.2))+
  theme(axis.text = element_text(color="black",size=10),text = element_text(size=10),legend.text = element_text(size=10),strip.text = element_text(color="black",size=10))


ggarrange(shade5, shade50, shade95, nrow=1, ncol=3, common.legend = TRUE)
```

The next plot shows the relationship between absolute value of shifts on PC1 in 5th, 50th, and 95th quantiles vs. the drought tolerance of species, using values in Niinemets & Valladares (2006).

```{r}

qplot(drought.tolerance, abs(quant_5), shape = agent, color = agent, data = pc1.diffs.traits) +
 geom_smooth(method = "lm", se = FALSE, fullrange = T)

####drought tolerance
drought5 <- ggplot(pc1.diffs.traits, aes(x=drought.tolerance, y=abs(quant_5),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC1 5th quantile")+
  ylab("Shift Magnitude")+
  xlab("Drought Tolerance Score")+
  scale_y_continuous(limits=c(-0.05,0.2),breaks=c(0,0.1,0.2))+
  theme(legend.position = "none",axis.text = element_text(color="black",size=10),text = element_text(size=10),legend.text = element_text(size=10),strip.text = element_text(color="black",size=10))

drought50 <- ggplot(pc1.diffs.traits, aes(x=drought.tolerance, y=abs(quant_50),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC1 50th quantile")+
  ylab("Shift Magnitude")+
  xlab("Drought Tolerance Score")+
  scale_y_continuous(limits=c(-0.05,0.2),breaks=c(0,0.1,0.2))+
  theme(legend.position = "none",axis.text = element_text(color="black",size=10),text = element_text(size=10),legend.text = element_text(size=10),strip.text = element_text(color="black",size=10))

drought95 <- ggplot(pc1.diffs.traits, aes(x=drought.tolerance, y=abs(quant_95),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ylab("Shift Magnitude")+
  xlab("Drought Tolerance Score")+
  ggtitle("PC1 95th quantile")+
  scale_y_continuous(limits=c(-0.05,0.2),breaks=c(0,0.1,0.2))+
  theme(axis.text = element_text(color="black",size=10),text = element_text(size=10),legend.text = element_text(size=10),strip.text = element_text(color="black",size=10))

ggarrange(drought5, drought50, drought95, nrow=1, ncol=3, common.legend = TRUE)

drought5a <- ggplot(pc1.diffs.traits, aes(x=drought.tolerance, y=abs(quant_5),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3,aes(shape=agent))+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC1 5th quantile")+
  ylab("Shift Magnitude")+
  xlab("Drought Tolerance Score")+
  scale_y_continuous(limits=c(-0.05,0.2),breaks=c(0,0.1,0.2))+
  theme(legend.position = "none",axis.text = element_text(color="black",size=10),text = element_text(size=10),legend.text = element_text(size=10),strip.text = element_text(color="black",size=10))

drought50a <- ggplot(pc1.diffs.traits, aes(x=drought.tolerance, y=abs(quant_50),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3,aes(shape=agent))+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC1 50th quantile")+
  ylab("Shift Magnitude")+
  xlab("Drought Tolerance Score")+
  scale_y_continuous(limits=c(-0.05,0.2),breaks=c(0,0.1,0.2))+
  theme(legend.position = "none",axis.text = element_text(color="black",size=10),text = element_text(size=10),legend.text = element_text(size=10),strip.text = element_text(color="black",size=10))

drought95a <- ggplot(pc1.diffs.traits, aes(x=drought.tolerance, y=abs(quant_95),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3,aes(shape=agent))+
  scale_color_brewer(palette="Paired")+
  ylab("Shift Magnitude")+
  xlab("Drought Tolerance Score")+
  ggtitle("PC1 95th quantile")+
  scale_y_continuous(limits=c(-0.05,0.2),breaks=c(0,0.1,0.2))+
  theme(axis.text = element_text(color="black",size=10),text = element_text(size=10),legend.text = element_text(size=10),strip.text = element_text(color="black",size=10))

ggarrange(drought5a,drought50a,drought95a,ncol=3,nrow=1,common.legend = TRUE)
```

The below plot shows the relationship between shifts on PC1 in 5th, 50th, and 95th quantiles vs. the seed weight of species, using average values of results from a TRY database search. Gambel oak and pinyon were excluded from this analysis, as they were large outliers in seed weight (very heavy). Perhaps there is a better proxy for seed dispersal? Or I can scale values so that these species can be included? Even with scaling, these are large outliers. 

```{r}

####seed weight
##excluded gambel oak and pinyon as they were large outliers
weight5 <- ggplot(pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),]
, aes(x=mean.weight.mg, y=abs(quant_5),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC1 5th quantile")+
  ylab("Shift Magnitude")+
  xlab("Seed Weight (mg)")+
  scale_y_continuous(limits=c(-0.05,0.2),breaks=c(0,0.1,0.2))+
  theme(legend.position = "none",axis.text = element_text(color="black",size=10),text = element_text(size=10),legend.text = element_text(size=10),strip.text = element_text(color="black",size=10))

weight50 <- ggplot(pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),]
, aes(x=mean.weight.mg, y=abs(quant_50),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC1 50th quantile")+
  ylab("Shift Magnitude")+
  xlab("Seed Weight (mg)")+
  scale_y_continuous(limits=c(-0.05,0.2),breaks=c(0,0.1,0.2))+
  theme(legend.position = "none",axis.text = element_text(color="black",size=10),text = element_text(size=10),legend.text = element_text(size=10),strip.text = element_text(color="black",size=10))

weight95 <- ggplot(pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),]
, aes(x=mean.weight.mg, y=abs(quant_95),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC1 95th quantile")+
  ylab("Shift Magnitude")+
  xlab("Seed Weight (mg)")+
  scale_y_continuous(limits=c(-0.05,0.2),breaks=c(0,0.1,0.2))+
  theme(axis.text = element_text(color="black",size=10),text = element_text(size=10),legend.text = element_text(size=10),strip.text = element_text(color="black",size=10))

ggarrange(weight5, weight50, weight95, nrow=1, ncol=3, common.legend = TRUE)

####seed weight
##excluded gambel oak and pinyon as they were large outliers
weight5a <- ggplot(pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),]
, aes(x=mean.weight.mg, y=abs(quant_5),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3,aes(shape=agent))+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC1 5th quantile")+
  ylab("Shift Magnitude")+
  xlab("Seed Weight (mg)")+
  scale_y_continuous(limits=c(-0.05,0.2),breaks=c(0,0.1,0.2))+
  theme(legend.position = "none",axis.text = element_text(color="black",size=10),text = element_text(size=10),legend.text = element_text(size=10),strip.text = element_text(color="black",size=10))

weight50a <- ggplot(pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),]
, aes(x=mean.weight.mg, y=abs(quant_50),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3,aes(shape=agent))+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC1 50th quantile")+
  ylab("Shift Magnitude")+
  xlab("Seed Weight (mg)")+
  scale_y_continuous(limits=c(-0.05,0.2),breaks=c(0,0.1,0.2))+
  theme(legend.position = "none",axis.text = element_text(color="black",size=10),text = element_text(size=10),legend.text = element_text(size=10),strip.text = element_text(color="black",size=10))

weight95a <- ggplot(pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),]
, aes(x=mean.weight.mg, y=abs(quant_95),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3,aes(shape=agent))+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC1 95th quantile")+
  ylab("Shift Magnitude")+
  xlab("Seed Weight (mg)")+
  scale_y_continuous(limits=c(-0.05,0.2),breaks=c(0,0.1,0.2))+
  theme(axis.text = element_text(color="black",size=10),text = element_text(size=10),legend.text = element_text(size=10),strip.text = element_text(color="black",size=10))

ggarrange(weight5a, weight50a, weight95a, nrow=1, ncol=3, common.legend = TRUE)

```

By running some linear models with species traits and disturbance type as predictor variables, we see that shade tolerance is a marginally significant predictor of the magnitude of 50th percentile shifts on PC1 and a significant predictor of the magnitude of 95th percentile shifts on PC1, with both being negative relationships (species with higher shade tolerance shifted less). There are no significant relationships between drought tolerance or seed weight and shifts on PC1 and no significant interactive effects between disturbance type and species traits (should this be a random effect?).

```{r}
library(emmeans)
##linear models
par(mfrow=c(2,2))
####shade tolerance
summary(lm(abs(quant_5)~shade.tolerance,pc1.diffs.traits))

pc1mod5_shade <- lm(abs(quant_5)~shade.tolerance*agent,pc1.diffs.traits)
summary(pc1mod5_shade)
anova(pc1mod5_shade)

qplot(shade.tolerance, abs(quant_5), shape = agent, color = agent, data = pc1.diffs.traits)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T)
```

```{r, echo=FALSE, eval=FALSE}
plot(pc1mod5_shade)
```

```{r}
pc1mod50_shade <- lm(abs(quant_50)~shade.tolerance*agent,pc1.diffs.traits)
summary(aov(abs(quant_50)~shade.tolerance*agent, pc1.diffs.traits))
anova(pc1mod50_shade)
summary(pc1mod50_shade)
summary(lm(abs(quant_50)~shade.tolerance,pc1.diffs.traits))
```

```{r, echo=FALSE, eval=FALSE}
plot(pc1mod50_shade)
```

```{r}
pc1mod95_shade <- lm(abs(quant_95)~shade.tolerance*agent,pc1.diffs.traits)
summary(pc1mod95_shade)
anova(pc1mod95_shade)
summary(lm(abs(quant_95)~shade.tolerance,pc1.diffs.traits))
```

```{r, echo=FALSE, eval=FALSE}
plot(pc1mod95_shade)
```

```{r,echo=FALSE}
ggplot(pc1.diffs.traits, aes(x=shade.tolerance, y=abs(quant_95),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC1 95th quantile")+
  ylab("Shift Magnitude")+
  xlab("Shade Tolerance Score")+
  scale_y_continuous(limits=c(-0.05,0.2),breaks=c(0,0.1,0.2))+
  theme(axis.text = element_text(color="black",size=10),text = element_text(size=10),legend.text = element_text(size=10),strip.text = element_text(color="black",size=10))

```


```{r}
####drought tolerance
pc1mod5_drought <- lm(abs(quant_5)~drought.tolerance*agent,pc1.diffs.traits)
summary(pc1mod5_drought)
anova(pc1mod5_drought)
summary(lm(abs(quant_5)~drought.tolerance,pc1.diffs.traits))

```

```{r, echo=FALSE, eval=FALSE}
plot(pc1mod5_drought)
```

```{r}
pc1mod50_drought <- lm(abs(quant_50)~drought.tolerance*agent,pc1.diffs.traits)
summary(pc1mod50_drought)
anova(pc1mod50_drought)
summary(lm(abs(quant_50)~drought.tolerance,pc1.diffs.traits))

```

```{r, echo=FALSE, eval=FALSE}
plot(pc1mod50_drought)
```

```{r}
pc1mod95_drought <- lm(abs(quant_95)~drought.tolerance*agent,pc1.diffs.traits)
anova(pc1mod95_drought)
summary(pc1mod95_drought)
summary(lm(abs(quant_95)~drought.tolerance,pc1.diffs.traits))


```

```{r, echo=FALSE, eval=FALSE}
plot(pc1mod95_drought)
```

```{r}
####seed weight
pc1mod5_weight <- lm(abs(quant_5)~mean.weight.mg*agent,pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),])
summary(pc1mod5_weight)
anova(pc1mod5_weight)
summary(lm(abs(quant_5)~mean.weight.mg,pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),]))
```

```{r, echo=FALSE, eval=FALSE}
plot(pc1mod5_weight)
```

```{r}
pc1mod50_weight <- lm(abs(quant_50)~mean.weight.mg*agent,pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),])
anova(pc1mod50_weight)
summary(pc1mod50_weight)
summary(lm(abs(quant_50)~mean.weight.mg,pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),]))
```

```{r, echo=FALSE, eval=FALSE}
plot(pc1mod50_weight)
```

```{r}
pc1mod95_weight <- lm(abs(quant_95)~mean.weight.mg*agent,pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),])
anova(pc1mod95_weight)
summary(pc1mod95_weight)
summary(lm(abs(quant_95)~mean.weight.mg,pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),]))
```

```{r, echo=FALSE, eval=FALSE}
plot(pc1mod95_weight)
```

```{r}
#combine all traits in one model 

pc1mod5_all <- lm(abs(quant_5)~agent*(shade.tolerance + drought.tolerance),data=pc1.diffs.traits)
anova(pc1mod5_all)
summary(pc1mod5_all)

qplot(shade.tolerance, abs(quant_5), shape = agent, color = agent, data = pc1.diffs.traits)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T)
qplot(drought.tolerance, abs(quant_5), shape = agent, color = agent, data = pc1.diffs.traits)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T)

pc1mod50_all <- lm(abs(quant_50)~agent*(shade.tolerance + drought.tolerance),data=pc1.diffs.traits)
anova(pc1mod50_all)
summary(pc1mod50_all)
plot(pc1mod50_all)

qplot(shade.tolerance, abs(quant_50), shape = agent, color = agent, data = pc1.diffs.traits)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T)
qplot(drought.tolerance, abs(quant_50), shape = agent, color = agent, data = pc1.diffs.traits)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T)


pc1mod95_all <- lm(log(abs(quant_95))~agent*(shade.tolerance + drought.tolerance),data=pc1.diffs.traits)
anova(pc1mod95_all)
summary(pc1mod95_all)
plot(pc1mod95_all)

qplot(shade.tolerance, abs(quant_95), shape = agent, color = agent, data = pc1.diffs.traits)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T)
qplot(drought.tolerance, abs(quant_95), shape = agent, color = agent, data = pc1.diffs.traits)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T)
```

###PC2

Now we'll analyze the relationship between species traits, disturbance, and the magnitude (absolute value) of shifts on PC2.

```{r}
pc2_quant5_traitmod <- lm(log(abs(quant_5)) ~ agent*(shade.tolerance + scale(mean.weight.mg)), data=pc2.diffs.traits[!pc2.diffs.traits$species.code %in% c(814,106),])
par(mfrow=c(2,2))
plot(pc2_quant5_traitmod)
summary(pc2_quant5_traitmod)
anova(pc2_quant5_traitmod)

pc2_quant50_traitmod <- lm(log(abs(quant_50)) ~ agent*(shade.tolerance + scale(mean.weight.mg)), data=pc2.diffs.traits[!pc2.diffs.traits$species.code %in% c(814,106),])
par(mfrow=c(2,2))
plot(pc2_quant50_traitmod)
summary(pc2_quant50_traitmod)
anova(pc2_quant50_traitmod)

pc2_quant95_traitmod <- lm(log(abs(quant_95)) ~ agent*(shade.tolerance + scale(mean.weight.mg)), data=pc2.diffs.traits[!pc2.diffs.traits$species.code %in% c(814,106),])
par(mfrow=c(2,2))
plot(pc2_quant95_traitmod)
summary(pc2_quant95_traitmod)
anova(pc2_quant95_traitmod)
```


```{r, echo=FALSE}
#plot relationships
####shade tolerance
shade5_2 <- ggplot(pc2.diffs.traits, aes(x=shade.tolerance, y=abs(quant_5),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC2 5th quantile")+
  ylab("Shift Magnitude")+
  xlab("Shade Tolerance Score")+
  theme(legend.position = "none",axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))

shade50_2 <- ggplot(pc2.diffs.traits, aes(x=shade.tolerance, y=abs(quant_50),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC2 50th quantile")+
  ylab("Shift Magnitude")+
  xlab("Shade Tolerance Score")+
  theme(legend.position = "none",axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))

shade95_2 <- ggplot(pc2.diffs.traits, aes(x=shade.tolerance, y=abs(quant_95),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC2 95th quantile")+
  ylab("Shift Magnitude")+
  xlab("Shade Tolerance Score")+
  theme(axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))


ggarrange(shade5_2, shade50_2, shade95_2, nrow=1, ncol=3, common.legend = TRUE)
```

```{r, echo=FALSE}
####drought tolerance
drought5_2 <- ggplot(pc2.diffs.traits, aes(x=drought.tolerance, y=abs(quant_5),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC2 5th quantile")+
  ylab("Shift Magnitude")+
  xlab("Drought Tolerance Score")+
  theme(legend.position = "none",axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))

drought50_2 <- ggplot(pc2.diffs.traits, aes(x=drought.tolerance, y=abs(quant_50),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC2 50th quantile")+
  ylab("Shift Magnitude")+
  xlab("Drought Tolerance Score")+
  theme(legend.position = "none",axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))

drought95_2 <- ggplot(pc2.diffs.traits, aes(x=drought.tolerance, y=abs(quant_95),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ylab("Shift Magnitude")+
  xlab("Drought Tolerance Score")+
  ggtitle("PC2 95th quantile")+
  theme(axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))

ggarrange(drought5_2, drought50_2, drought95_2, nrow=1, ncol=3, common.legend = TRUE)
```

```{r, echo=FALSE}
####seed weight
##excluded gambel oak and pinyon as they were large outliers
weight5_2 <- ggplot(pc2.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),]
, aes(x=mean.weight.mg, y=abs(quant_5),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC2 5th quantile")+
  ylab("Shift Magnitude")+
  xlab("Seed Weight (mg)")+
  theme(legend.position = "none",axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))

weight50_2 <- ggplot(pc2.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),]
, aes(x=mean.weight.mg, y=abs(quant_50),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC2 50th quantile")+
  ylab("Shift Magnitude")+
  xlab("Seed Weight (mg)")+
  theme(legend.position = "none",axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))

weight95_2 <- ggplot(pc2.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),]
, aes(x=mean.weight.mg, y=abs(quant_95),color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC2 95th quantile")+
  ylab("Shift Magnitude")+
  xlab("Seed Weight (mg)")+
  theme(axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))

ggarrange(weight5_2, weight50_2, weight95_2, nrow=1, ncol=3, common.legend = TRUE)
```

By running some linear models we see that seed weight, drought and shade tolerance do not seem to predict shifts on PC2 significantly for any of the disturbance categories.

```{r, echo=FALSE}
##linear models
par(mfrow=c(2,2))
####shade tolerance
pc2mod5_shade <- lm(abs(quant_5)~shade.tolerance*agent,pc2.diffs.traits)
anova(pc2mod5_shade)
summary(pc2mod5_shade)
summary(lm(abs(quant_5)~shade.tolerance,pc2.diffs.traits))
#plot(pc2mod5_shade)

pc2mod50_shade <- lm(abs(quant_50)~shade.tolerance*agent,pc2.diffs.traits)
anova(pc2mod50_shade)
summary(pc2mod50_shade)
summary(lm(abs(quant_50)~shade.tolerance,pc2.diffs.traits))
#plot(pc2mod50_shade)

pc2mod95_shade <- lm(abs(quant_95)~shade.tolerance*agent,pc2.diffs.traits)
anova(pc2mod95_shade)
summary(pc2mod95_shade)
summary(lm(abs(quant_95)~shade.tolerance,pc2.diffs.traits))
#plot(pc2mod95_shade)

####drought tolerance
pc2mod5_drought <- lm(abs(quant_5)~drought.tolerance*agent,pc2.diffs.traits)
anova(pc2mod5_drought)
summary(pc2mod5_drought)
summary(lm(abs(quant_5)~drought.tolerance,pc2.diffs.traits))
#plot(pc2mod5_drought)

pc2mod50_drought <- lm(abs(quant_50)~drought.tolerance*agent,pc2.diffs.traits)
anova(pc2mod50_drought)
summary(pc2mod50_drought)
summary(lm(abs(quant_50)~drought.tolerance,pc2.diffs.traits))
#plot(pc2mod50_drought)

pc2mod95_drought <- lm(abs(quant_95)~drought.tolerance*agent,pc2.diffs.traits)
anova(pc2mod95_drought)
summary(pc2mod95_drought)
summary(lm(abs(quant_95)~drought.tolerance,pc2.diffs.traits))
#plot(pc2mod95_drought)

####seed weight
pc2mod5_weight <- lm(abs(quant_5)~mean.weight.mg*agent,pc2.diffs.traits[!pc2.diffs.traits$species.code %in% c(814,106),])
summary(pc2mod5_weight)
summary(lm(abs(quant_5)~mean.weight.mg,pc2.diffs.traits[!pc2.diffs.traits$species.code %in% c(814,106),]))
#plot(pc2mod5_weight)

pc2mod50_weight <- lm(abs(quant_50)~mean.weight.mg*agent,pc2.diffs.traits[!pc2.diffs.traits$species.code %in% c(814,106),])
summary(pc2mod50_weight)
summary(lm(abs(quant_50)~mean.weight.mg,pc2.diffs.traits[!pc2.diffs.traits$species.code %in% c(814,106),]))
#plot(pc2mod50_weight)

pc2mod95_weight <- lm(abs(quant_95)~mean.weight.mg*agent,pc2.diffs.traits[!pc2.diffs.traits$species.code %in% c(814,106),])
summary(pc2mod95_weight)
summary(lm(abs(quant_95)~mean.weight.mg,pc2.diffs.traits[!pc2.diffs.traits$species.code %in% c(814,106),]))
#plot(pc2mod95_weight)
plot(lm(abs(quant_5)~mean.weight.mg,pc2.diffs.traits[!pc2.diffs.traits$species.code %in% c(814,106),]))
```

##Magnitude + Direction

I will now look at the effect of species traits on the combination of magnitude and direction of shifts (real value) on PC1 and PC2. We'll start with running some models for PC1. 

###PC1

```{r}
##linear models
par(mfrow=c(2,2))
####shade tolerance
dm_pc1mod5_shade <- lm(quant_5~shade.tolerance*agent,pc1.diffs.traits)
anova(dm_pc1mod5_shade)
summary(dm_pc1mod5_shade)
```

```{r, echo=FALSE, eval=FALSE}
plot(dm_pc1mod5_shade)
```

```{r}
dm_pc1mod50_shade <- lm(quant_50~shade.tolerance*agent,pc1.diffs.traits)
anova(dm_pc1mod50_shade)
summary(dm_pc1mod50_shade)
summary(lm(quant_50~shade.tolerance, pc1.diffs.traits))

qplot(shade.tolerance, quant_50, shape = agent, color = agent, data = pc1.diffs.traits)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T)

```

```{r, echo=FALSE, eval=FALSE}
plot(dm_pc1mod50_shade)
```

```{r}
dm_pc1mod95_shade <- lm(quant_95~shade.tolerance*agent,pc1.diffs.traits)
anova(dm_pc1mod95_shade)
summary(dm_pc1mod95_shade)
```

```{r}
ggplot(pc1.diffs.traits,aes(x=shade.tolerance,y=quant_95,color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  scale_color_brewer(palette="Paired")+
  ylab("Difference in 95th quantile on PC1")+
  xlab("Shade Tolerance Score")+
  ggtitle("PC1 95th quantile")+
  theme(axis.text = element_text(color="black",size=11),text = element_text(size=11),legend.text = element_text(size=11),strip.text = element_text(color="black",size=11))
```

```{r, echo=FALSE, eval=FALSE}
plot(dm_pc1mod95_shade)
```

```{r}
####drought tolerance
dm_pc1mod5_drought <- lm(quant_5~drought.tolerance*agent,pc1.diffs.traits)
anova(dm_pc1mod5_drought)
summary(dm_pc1mod5_drought)
```

```{r, echo=FALSE, eval=FALSE}
plot(dm_pc1mod5_drought)
```

```{r}
#significant interaction between drought tolerance and harvest for shifts in 5th percentile of PC1. Let's plot to see which direction this interaction is:
ggplot(pc1.diffs.traits, aes(x=drought.tolerance, y=quant_5,color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC1 5th quantile")+
  ylab("Shift")+
  xlab("Drought Tolerance Score")+
  theme(axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))

```

```{r}
dm_pc1mod50_drought <- lm(quant_50~drought.tolerance*agent,pc1.diffs.traits)
anova(dm_pc1mod50_drought)
summary(dm_pc1mod50_drought)
```

```{r, echo=FALSE, eval=FALSE}
plot(dm_pc1mod50_drought)
```

```{r}
dm_pc1mod95_drought <- lm(quant_95~drought.tolerance*agent,pc1.diffs.traits)
anova(dm_pc1mod95_drought)
summary(dm_pc1mod95_drought)
summary(lm(quant_95~drought.tolerance, pc1.diffs.traits))
```

```{r, echo=FALSE, eval=FALSE}
plot(dm_pc1mod95_drought)
```

```{r}
ggplot(pc1.diffs.traits, aes(x=agent, y=quant_95))+
  geom_boxplot()
  

ggplot(pc1.diffs.traits, aes(x=drought.tolerance, y=quant_95,color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC1 95th quantile")+
  ylab("Difference in 95th quantile on PC1")+
  xlab("Drought Tolerance Score")+
  theme(axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))


qplot(drought.tolerance, quant_95, shape = agent, color = agent, data = pc1.diffs.traits)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T)
```

```{r}
####seed weight
dm_pc1mod5_weight <- lm(quant_5~mean.weight.mg*agent,pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),])
summary(dm_pc1mod5_weight)
```

```{r, echo=FALSE, eval=FALSE}
plot(dm_pc1mod5_weight)
```

```{r}
dm_pc1mod50_weight <- lm(quant_50~mean.weight.mg*agent,pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),])
summary(dm_pc1mod50_weight)
```

```{r, echo=FALSE, eval=FALSE}
plot(dm_pc1mod50_weight)
```

```{r}
dm_pc1mod95_weight <- lm(quant_95~mean.weight.mg*agent,pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(814,106),])
summary(dm_pc1mod95_weight)
```

```{r, echo=FALSE, eval=FALSE}
plot(dm_pc1mod95_weight)
```


###PC2

And now for PC2:

```{r}
##linear models
par(mfrow=c(2,2))
####shade tolerance
dm_pc2mod5_shade <- lm(quant_5~shade.tolerance*agent,pc2.diffs.traits)
anova(dm_pc2mod5_shade)
summary(dm_pc2mod5_shade)
```

```{r, echo=FALSE, eval=FALSE}
plot(dm_pc2mod5_shade)
```

```{r}
dm_pc2mod50_shade <- lm(quant_50~shade.tolerance*agent,pc2.diffs.traits)
anova(dm_pc2mod50_shade)
summary(dm_pc2mod50_shade)
```

```{r, echo=FALSE, eval=FALSE}
plot(dm_pc2mod50_shade)
```

```{r}
dm_pc2mod95_shade <- lm(quant_95~shade.tolerance*agent,pc2.diffs.traits)
anova(dm_pc2mod95_shade)
summary(dm_pc2mod95_shade)
```

```{r, echo=FALSE, eval=FALSE}
plot(dm_pc2mod95_shade)
```

```{r}
####drought tolerance
dm_pc2mod5_drought <- lm(quant_5~drought.tolerance*agent,pc2.diffs.traits)
anova(dm_pc2mod5_drought)
summary(dm_pc2mod5_drought)
```

```{r, echo=FALSE, eval=FALSE}
plot(dm_pc2mod5_drought)
```

```{r}
#marginal significant interaction between drought tolerance and harvest/none for shifts in 5th percentile of pc2. Let's plot to see which direction this interaction is:
ggplot(pc2.diffs.traits, aes(x=drought.tolerance, y=quant_5,color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("pc2 5th quantile")+
  ylab("Shift")+
  xlab("Drought Tolerance Score")+
  theme(axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))

```

```{r}
dm_pc2mod50_drought <- lm(quant_50~drought.tolerance*agent,pc2.diffs.traits)
anova(dm_pc2mod50_drought)
summary(dm_pc2mod50_drought)
```

```{r, echo=FALSE, eval=FALSE}
plot(dm_pc2mod50_drought)
```

```{r}
dm_pc2mod95_drought <- lm(quant_95~drought.tolerance*agent,pc2.diffs.traits)
anova(dm_pc2mod95_drought)
summary(dm_pc2mod95_drought)
```

```{r, echo=FALSE, eval=FALSE}
plot(dm_pc2mod95_drought)
```

```{r}
####seed weight
dm_pc2mod5_weight <- lm(quant_5~mean.weight.mg*agent,pc2.diffs.traits[!pc2.diffs.traits$species.code %in% c(814,106),])
summary(dm_pc2mod5_weight)
```

```{r, echo=FALSE, eval=FALSE}
plot(dm_pc2mod5_weight)
```

```{r}
ggplot(pc2.diffs.traits[!pc2.diffs.traits$species.code %in% c(814,106),], aes(x=mean.weight.mg, y=quant_5,color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC2 5th quantile")+
  ylab("Shift")+
  xlab("Seed Weight(mg)")+
  theme(axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))

```

```{r}
dm_pc2mod50_weight <- lm(quant_50~mean.weight.mg*agent,pc2.diffs.traits[!pc2.diffs.traits$species.code %in% c(814,106),])
summary(dm_pc2mod50_weight)
```

```{r, echo=FALSE, eval=FALSE}
plot(dm_pc2mod50_weight)
```

```{r}
ggplot(pc2.diffs.traits[!pc2.diffs.traits$species.code %in% c(814,106),], aes(x=mean.weight.mg, y=quant_50,color=species.name))+
  stat_smooth(method = "lm", col="black", lty=2)+
  geom_point(size=3)+
  facet_wrap(~agent)+
  scale_color_brewer(palette="Paired")+
  ggtitle("PC2 50th quantile")+
  ylab("Shift")+
  xlab("Seed Weight(mg)")+
  theme(axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))
```

```{r}
dm_pc2mod95_weight <- lm(quant_95~mean.weight.mg*agent,pc2.diffs.traits[!pc2.diffs.traits$species.code %in% c(814,106),])
summary(dm_pc2mod95_weight)
```

```{r, echo=FALSE, eval=FALSE}
plot(dm_pc2mod95_weight)
```


##Plots of Trait effects for ESA presentation
```{r}
#shade tolerance effects on pc1 50th quantile shifts
ggplot(pc1.diffs.traits,aes(x=shade.tolerance, y=abs(quant_50), shape = agent, color = agent))+
  geom_point(size=3)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T, size=1)+
  xlab("Shade Tolerance")+
  ylab("50th quantile shift magnitude")+
  theme_bw()+
  theme(axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))

 ggplot(pc1.diffs.traits,aes(x=shade.tolerance, y=quant_50, shape = agent, color = agent))+
  geom_point(size=3)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T, size=1)+
  xlab("Shade Tolerance")+
  ylab("50th quantile shift")+
  ylim(c(-0.075,0.125))+
  theme_bw()+
  theme(axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))


#shade tolerance effects on pc1 95th quantile shifts
ggplot(pc1.diffs.traits,aes(x=shade.tolerance, y=abs(quant_95), shape = agent, color = agent))+
  geom_point(size=3)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T, size=1)+
  xlab("Shade Tolerance")+
  ylab("95th quantile shift magnitude")+
  theme_bw()+
  theme(axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))

ggplot(pc1.diffs.traits,aes(x=shade.tolerance, y=quant_95, shape = agent, color = agent))+
  geom_point(size=3)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T, size=1)+
  xlab("Shade Tolerance")+
  ylab("95th quantile shift")+
  ylim(c(-0.2,0.2))+
  theme_bw()+
  theme(axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))


#drought tolerance effects on pc1 95th quantile shifts

ggplot(pc1.diffs.traits,aes(x=drought.tolerance, y=abs(quant_95), shape = agent, color = agent))+
  geom_point(size=3)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T, size=1)+
  xlab("Drought Tolerance")+
  ylab("95th quantile shift magnitude")+
  theme_bw()+
  theme(axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))


ggplot(pc1.diffs.traits,aes(x=drought.tolerance, y=quant_95, shape = agent, color = agent))+
  geom_point(size=3)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T, size=1)+
  xlab("Drought Tolerance")+
  ylab("95th quantile shift")+
  ylim(c(-0.2,0.25))+
  theme_bw()+
  theme(axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))


```

```{r}
#more ESA presentation style plots
ggplot(pc1.diffs.traits,aes(x=shade.tolerance, y=abs(quant_5), shape = agent, color = agent))+
  geom_point(size=3)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T, size=1)+
  xlab("Shade Tolerance")+
  ylab("5th quantile shift magnitude")+
  theme_bw()+
  theme(axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))

ggplot(pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(106,814),],aes(x=mean.weight.mg, y=abs(quant_5), shape = agent, color = agent))+
  geom_point(size=3)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T, size=1)+
  xlab("Seed Weight (mg)")+
  ylab("5th quantile shift magnitude")+
  theme_bw()+
  theme(axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))


ggplot(pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(106,814),],aes(x=mean.weight.mg, y=abs(quant_50), shape = agent, color = agent))+
  geom_point(size=3)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T, size=1)+
  xlab("Seed Weight (mg)")+
  ylab("50th quantile shift magnitude")+
  theme_bw()+
  theme(axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))

ggplot(pc1.diffs.traits[!pc1.diffs.traits$species.code %in% c(106,814),],aes(x=mean.weight.mg, y=abs(quant_95), shape = agent, color = agent))+
  geom_point(size=3)+
  geom_smooth(method = "lm", se = FALSE, fullrange = T, size=1)+
  xlab("Seed Weight (mg)")+
  ylab("95th quantile shift magnitude")+
  theme_bw()+
  theme(axis.text = element_text(color="black",size=14),text = element_text(size=14),legend.text = element_text(size=14),strip.text = element_text(color="black",size=14))


```
##Time Since Disturbance Effects

Now I will explore how time since disturbance may be influencing shifts. I will narrow the plots in each disturbance category to only those that had a disturbance 5 or more years before monitoring. Since the FIA only measures conifer seedlings greater than 6in tall, we wouldn't expect these to show up in the first few years after a destructive disturbance. It looks like most plots were disturbed relatively recently, so it may not be possible to use this filter due to sample size - but definitely something to note in the discussion.

```{r}
#first explore time since disturbance of plots
dist %>% 
  group_by(Agent) %>% 
  dplyr::summarise(mean.TSD = mean(MEASYEAR - dist_year),
            min.TSD = min(MEASYEAR - dist_year),
            max.TSD = max(MEASYEAR - dist_year))

ggplot(dist, aes(x=MEASYEAR - dist_year))+
  geom_histogram()+
  facet_wrap(~Agent)

dist %>% 
  filter(MEASYEAR - dist_year > 4) %>% 
  group_by(Agent) %>% 
  dplyr::summarise(n=n())

#how many plots did the disturbance occur in the same year as the measurement?
View(dist %>% 
  filter(dist_year == MEASYEAR))
```


##Example PCA plot with highlighted species 

```{r,echo=FALSE, eval=FALSE}
#make example plot for presentation highlighting species presence on the PCA
autoplot(clim_pca,x=1,y=2,colour="grey",loadings.colour="black", loadings=T, loadings.label=T, loadings.label.colour="black", scale=0, loadings.label.vjust=-1.5, loadings.label.size=3, alpha=0.5)+
  geom_point(data=adult_data_pcapts[[26]],aes(x=PC1,y=PC2),color="red4")+
  geom_point(data=seedling_data_pcapts[[26]],aes(x=PC1,y=PC2),color="red")+
  theme_bw()+
  theme(axis.text = element_text(color="black",size=11),text = element_text(size=11),legend.text = element_text(size=11))

autoplot(clim_pca,x=1,y=2,colour="grey",loadings.colour="black", loadings=T, loadings.label=T, loadings.label.colour="black", scale=0, loadings.label.vjust=-1.5, loadings.label.size=3, alpha=0.5)+
  geom_point(data=seedling_data_pcapts[[26]],aes(x=PC1,y=PC2),color="pink")+
  theme_bw()+
  theme(axis.text = element_text(color="black",size=11),text = element_text(size=11),legend.text = element_text(size=11))

```

##Climate Niches

Now I will plot the climate "niche" of each species for each disturbance type in the two dimensional PCA space to visualize the niche shifts.

```{r, echo=FALSE, results='hide'}
#unlisting pcapts lists
adult_pca_unlist<- ldply(adult_data_pcapts, .id = "uid")
seedling_pca_unlist<- ldply(seedling_data_pcapts, .id="uid")

colnames(adult_pca_unlist) == colnames(seedling_pca_unlist)

comb_pcapts<- bind_rows(list(adult = adult_pca_unlist, seedling = seedling_pca_unlist), .id = "age") %>% 
  mutate(SPCD = as.character(SPCD)) %>% 
  left_join(species.names, by=c('SPCD'='species.code'))
```

```{r}
#colors
species.colors <- c("#be4e6b",
"#45c097",
"#ce4a8e",
"#5aa453",
"#6971d7",
"#acb342",
"#5b3788",
"#c87d35",
"#6d8dd7",
"#95893d",
"#c876cb",
"#b94c3f",
"#943c71")


#plot all species together
ggplot(comb_pcapts %>% filter(age=="adult") %>% filter(!species.name == "singleleaf pinyon"), aes(x=PC1, y=PC2, color=factor(species.name)))+
  #geom_point()+
    stat_ellipse(data=comb_pcapts %>% filter(age=="adult")%>% filter(!species.name == "singleleaf pinyon"),
                 aes(x=PC1, y=PC2, color=factor(species.name)),type="norm",level=0.5,cex=1.3)+
  scale_color_manual(values=species.colors,name="Species")+
  theme_bw()+
  theme(axis.text = element_text(size=16), axis.title = element_text(size=16), legend.title = element_text(size=16),legend.text = element_text(size=16))

ggplot(comb_pcapts %>% filter(age=="adult"), aes(x=PC1, y=PC2, color=factor(species.name)))+
  #geom_point()+
    stat_ellipse(data=comb_pcapts %>% filter(age=="adult") %>% filter(!species.name == "singleleaf pinyon"),
                 aes(x=PC1, y=PC2, color=factor(species.name, levels=c("two needle pinyon","Gambel oak","ponderosa pine","white fir","Douglas-fir","limber pine","trembling aspen","western redcedar","grand fir","lodgepole pine","Engelmann spruce","subalpine fir","whitebark pine")),alpha=factor(species.name, levels=c("two needle pinyon","Gambel oak","ponderosa pine","white fir","Douglas-fir","limber pine","trembling aspen","western redcedar","grand fir","lodgepole pine","Engelmann spruce","subalpine fir","whitebark pine"))),type="norm",level=0.5,cex=1.3)+
  scale_color_manual(values=c("#59378a",
"#a3b240",
"#677fd8",
"#5dbb68",
"#bf73cb",
"#45bc8d",
"#ba5494",
"#36dee6",
"#bb4542",
"#748a3c",
"#b64468",
"#c78731",
"#b66b3d"),name="Species")+
  scale_alpha_manual(values=c(1,1,rep(1,3),1,1,1,1,rep(1,3),1),guide='none')+
  theme_bw()+
  theme(axis.text = element_text(size=16), axis.title = element_text(size=16), legend.title = element_text(size=16),legend.text = element_text(size=16))

```

```{r, echo=FALSE, eval=FALSE}
#plot climate niches of adults vs. seedlings of species in PCA space
plot_niche <- function(agent, species, name, color) {
  ggplot()+
    xlim(c(min(clim_pca$x[,1]),max(clim_pca$x[,1])))+
    ylim(c(min(clim_pca$x[,2]),max(clim_pca$x[,2])))+
      geom_point(data=comb_pcapts %>% 
                   filter(Agent == agent, SPCD == species),
                 aes(x=PC1, y=PC2, color=age),size=1)+
    stat_ellipse(data=comb_pcapts %>% 
                   filter(Agent == agent, SPCD == species),
                 aes(x=PC1, y=PC2, color=age),type="norm",level=0.95,cex=1.3)+
    scale_color_manual(values=c("#000000",color))+
    ggtitle(paste(name,agent,sep="-"))+
    theme_bw()+
    theme(axis.text = element_text(color="black",size=11),text = element_text(size=11),legend.text = element_text(size=11))
}
```

```{r}
plot_niche("fire","15","white fir","#F8766D")

plot_niche("fire","106","Pinyon", "#F8766D")
gglocator()
plot_niche("none", "106", "Pinyon", "#C77CFF")

plot_niche("none","108","Lodgepole pine","#C77CFF")
plot_niche("harvest", "108", "Lodgepole pine", "#7CAE00")
plot_niche("insect.disease","108","Lodgepole pine","#00BFC4")

plot_niche("insect.disease","122","ponderosa pine","#00BFC4")

##check for appropriate distribution for ellipse
#lodgepole adults undisturbed
adult.108.none.pc1<- comb_pcapts %>% 
  filter(age=="adult", SPCD=="108", Agent==
           "none") %>% 
  pull(PC1)
adult.108.none.pc2<- comb_pcapts %>% 
  filter(age=="adult", SPCD=="108", Agent==
           "none") %>% 
  pull(PC2)
descdist(adult.108.none.pc1, discrete = FALSE)
descdist(adult.108.none.pc2, discrete=FALSE)
fit.norm.pc1 <- fitdist(adult.108.none.pc1, "norm")
plot(fit.norm.pc1)
fit.norm.pc2 <- fitdist(adult.108.none.pc2, "norm")
fit.t.pc2 <- fitdist(adult.108.none.pc2, "t",start = list(df=3), lower = c(-1,0.001,1))
plot(fit.norm.pc2)
plot(fit.t.pc2)
#lodgepole seedlings undisturbed
seed.108.none.pc1<- comb_pcapts %>% 
  filter(age=="seedling", SPCD=="108", Agent==
           "none") %>% 
  pull(PC1)
seed.108.none.pc2<- comb_pcapts %>% 
  filter(age=="seedling", SPCD=="108", Agent==
           "none") %>% 
  pull(PC2)
descdist(seed.108.none.pc1, discrete = FALSE)
descdist(seed.108.none.pc2, discrete=FALSE)
fit.norm.pc1 <- fitdist(seed.108.none.pc1, "norm")
plot(fit.norm.pc1)
fit.norm.pc2 <- fitdist(seed.108.none.pc2, "norm")
fit.t.pc2 <- fitdist(seed.108.none.pc2, "t",start = list(df=3), lower = c(-1,0.001,1))
plot(fit.t.pc2)
plot(fit.norm.pc2)
#lodgepole adults harvest
adult.108.harvest.pc1<- comb_pcapts %>% 
  filter(age=="adult", SPCD=="108", Agent==
           "harvest") %>% 
  pull(PC1)
adult.108.harvest.pc2<- comb_pcapts %>% 
  filter(age=="adult", SPCD=="108", Agent==
           "harvest") %>% 
  pull(PC2)
descdist(adult.108.harvest.pc1, discrete = FALSE)
descdist(adult.108.harvest.pc2, discrete=FALSE)
fit.norm.pc1 <- fitdist(adult.108.harvest.pc1, "norm")
plot(fit.norm.pc1)
fit.norm.pc2 <- fitdist(adult.108.harvest.pc2, "norm")
fit.t.pc2 <- fitdist(adult.108.harvest.pc2, "t",start = list(df=3), lower = c(-1,0.001,1))
plot(fit.norm.pc2)
plot(fit.t.pc2)
#lodgepole seedlings harvest
seed.108.harvest.pc1<- comb_pcapts %>% 
  filter(age=="seedling", SPCD=="108", Agent==
           "harvest") %>% 
  pull(PC1)
seed.108.harvest.pc2<- comb_pcapts %>% 
  filter(age=="seedling", SPCD=="108", Agent==
           "harvest") %>% 
  pull(PC2)
descdist(seed.108.harvest.pc1, discrete = FALSE)
descdist(seed.108.harvest.pc2, discrete=FALSE)
fit.norm.pc1 <- fitdist(seed.108.harvest.pc1, "norm")
fit.log.pc1<- fitdist(seed.108.harvest.pc1, "logis")
plot(fit.norm.pc1)
plot(fit.log.pc1)
fit.norm.pc2 <- fitdist(seed.108.harvest.pc2, "norm")
fit.t.pc2 <- fitdist(seed.108.harvest.pc2, "t",start = list(df=3), lower = c(-1,0.001,1))
fit.log.pc2<- fitdist(seed.108.harvest.pc2, "logis")
plot(fit.t.pc2)
plot(fit.norm.pc2)
plot(fit.log.pc2)
#lodgepole adults ID
adult.108.insect.disease.pc1<- comb_pcapts %>% 
  filter(age=="adult", SPCD=="108", Agent==
           "insect.disease") %>% 
  pull(PC1)
adult.108.insect.disease.pc2<- comb_pcapts %>% 
  filter(age=="adult", SPCD=="108", Agent==
           "insect.disease") %>% 
  pull(PC2)
descdist(adult.108.insect.disease.pc1, discrete = FALSE)
descdist(adult.108.insect.disease.pc2, discrete=FALSE)
fit.norm.pc1 <- fitdist(adult.108.insect.disease.pc1, "norm")
plot(fit.norm.pc1)
fit.norm.pc2 <- fitdist(adult.108.insect.disease.pc2, "norm")
fit.t.pc2 <- fitdist(adult.108.insect.disease.pc2, "t",start = list(df=3), lower = c(-1,0.001,1))
plot(fit.norm.pc2)
plot(fit.t.pc2)
#lodgepole seedlings ID
seed.108.insect.disease.pc1<- comb_pcapts %>% 
  filter(age=="seedling", SPCD=="108", Agent==
           "insect.disease") %>% 
  pull(PC1)
seed.108.insect.disease.pc2<- comb_pcapts %>% 
  filter(age=="seedling", SPCD=="108", Agent==
           "insect.disease") %>% 
  pull(PC2)
descdist(seed.108.insect.disease.pc1, discrete = FALSE)
descdist(seed.108.insect.disease.pc2, discrete=FALSE)
fit.norm.pc1 <- fitdist(seed.108.insect.disease.pc1, "norm")
plot(fit.norm.pc1)
fit.norm.pc2 <- fitdist(seed.108.insect.disease.pc2, "norm")
fit.t.pc2 <- fitdist(seed.108.insect.disease.pc2, "t",start = list(df=3), lower = c(-1,0.001,1))
plot(fit.t.pc2)
plot(fit.norm.pc2)

##aspen
#aspen adults none
adult.746.none.pc1<- comb_pcapts %>% 
  filter(age=="adult", SPCD=="746", Agent==
           "none") %>% 
  pull(PC1)
adult.746.none.pc2<- comb_pcapts %>% 
  filter(age=="adult", SPCD=="746", Agent==
           "none") %>% 
  pull(PC2)
descdist(adult.746.none.pc1, discrete = FALSE)
descdist(adult.746.none.pc2, discrete=FALSE)
fit.norm.pc1 <- fitdist(adult.746.none.pc1, "norm")
plot(fit.norm.pc1)
fit.norm.pc2 <- fitdist(adult.746.none.pc2, "norm")
fit.t.pc2 <- fitdist(adult.746.none.pc2, "t",start = list(df=3), lower = c(-1,0.001,1))
plot(fit.norm.pc2)
plot(fit.t.pc2)
#aspen seedlings none
seed.746.none.pc1<- comb_pcapts %>% 
  filter(age=="seedling", SPCD=="746", Agent==
           "none") %>% 
  pull(PC1)
seed.746.none.pc2<- comb_pcapts %>% 
  filter(age=="seedling", SPCD=="746", Agent==
           "none") %>% 
  pull(PC2)
descdist(seed.746.none.pc1, discrete = FALSE)
descdist(seed.746.none.pc2, discrete=FALSE)
fit.norm.pc1 <- fitdist(seed.746.none.pc1, "norm")
plot(fit.norm.pc1)
fit.norm.pc2 <- fitdist(seed.746.none.pc2, "norm")
fit.t.pc2 <- fitdist(seed.746.none.pc2, "t",start = list(df=3), lower = c(-1,0.001,1))
plot(fit.t.pc2)
plot(fit.norm.pc2)
#aspen adults fire
adult.746.fire.pc1<- comb_pcapts %>% 
  filter(age=="adult", SPCD=="746", Agent==
           "fire") %>% 
  pull(PC1)
adult.746.fire.pc2<- comb_pcapts %>% 
  filter(age=="adult", SPCD=="746", Agent==
           "fire") %>% 
  pull(PC2)
descdist(adult.746.fire.pc1, discrete = FALSE)
descdist(adult.746.fire.pc2, discrete=FALSE)
fit.norm.pc1 <- fitdist(adult.746.fire.pc1, "norm")
plot(fit.norm.pc1)
fit.norm.pc2 <- fitdist(adult.746.fire.pc2, "norm")
fit.t.pc2 <- fitdist(adult.746.fire.pc2, "t",start = list(df=3), lower = c(-1,0.001,1))
plot(fit.norm.pc2)
plot(fit.t.pc2)
#aspen seedlings fire
seed.746.fire.pc1<- comb_pcapts %>% 
  filter(age=="seedling", SPCD=="746", Agent==
           "fire") %>% 
  pull(PC1)
seed.746.fire.pc2<- comb_pcapts %>% 
  filter(age=="seedling", SPCD=="746", Agent==
           "fire") %>% 
  pull(PC2)
descdist(seed.746.fire.pc1, discrete = FALSE)
descdist(seed.746.fire.pc2, discrete=FALSE)
fit.norm.pc1 <- fitdist(seed.746.fire.pc1, "norm")
plot(fit.norm.pc1)
fit.norm.pc2 <- fitdist(seed.746.fire.pc2, "norm")
fit.t.pc2 <- fitdist(seed.746.fire.pc2, "t",start = list(df=3), lower = c(-1,0.001,1))
plot(fit.t.pc2)
plot(fit.norm.pc2)
###CAN USE NORMAL ELLIPSES

plot_niche("none","63","Alligator juniper", "#C77CFF")

plot_niche("fire","202","Douglas fir","#F8766D")
plot_niche("insect.disease","202","Douglas fir","#00BFC4")

plot_niche("insect.disease", "814", "Gambel oak", "#00BFC4")

plot_niche("fire", "19", "subalpine fir", "#F8766D")
plot_niche("none", "19", "subalpine fir", "#C77CFF")

plot_niche("fire", "746", "aspen", "#F8766D")
plot_niche("none", "746", "aspen", "#C77CFF")

plot_niche("insect.disease","15","white fir","#00BFC4")

plot_niche("insect.disease", "93", "Engelmann spruce","#00BFC4")
plot_niche("none", "93", "Engelmann spruce","#C77CFF")

##example plots for ESA talk
#adult plot
pc1.mean.adult<- mean(comb_pcapts %>% 
                   filter(Agent == "fire", SPCD == "202",age=="adult") %>% 
                     dplyr::pull("PC1"))
pc2.mean.adult<- mean(comb_pcapts %>% 
                   filter(Agent == "fire", SPCD == "202",age=="adult") %>% 
                     dplyr::pull("PC2"))

ggplot()+
    xlim(c(min(clim_pca$x[,1]),max(clim_pca$x[,1])))+
    ylim(c(min(clim_pca$x[,2]),max(clim_pca$x[,2])))+
      geom_point(data=comb_pcapts %>% 
                   filter(Agent == "fire", SPCD == "202",age=="adult"), aes(x=PC1, y=PC2,color=age),size=1)+
 # geom_vline(xintercept=quantile(comb_pcapts %>%
 #                  filter(Agent == "fire", SPCD == "202",age=="adult") %>%
 #                    dplyr::pull("PC1"),c(0.05,0.5,0.95)),linetype=2,color="blue")+
 # geom_hline(yintercept=quantile(comb_pcapts %>%
 #                  filter(Agent == "fire", SPCD == "202",age=="adult") %>%
 #             dplyr::pull("PC2"),c(0.05,0.5,0.95)),linetype=2,color="red")+
    scale_color_manual(values=c("#000000"))+
  geom_point(aes(x=pc1.mean.seed,y=pc2.mean.seed),shape=23,size=5,color="black",fill="#F8766D")+
    theme_bw()+
    theme(axis.text = element_text(color="black",size=11),text = element_text(size=11),legend.text = element_text(size=11))

##seedling plot 
pc1.mean.seed<- mean(comb_pcapts %>% 
                   filter(Agent == "fire", SPCD == "202",age=="seedling") %>% 
                     dplyr::pull("PC1"))
pc2.mean.seed<- mean(comb_pcapts %>% 
                   filter(Agent == "fire", SPCD == "202",age=="seedling") %>% 
                     dplyr::pull("PC2"))

ggplot()+
    xlim(c(min(clim_pca$x[,1]),max(clim_pca$x[,1])))+
    ylim(c(min(clim_pca$x[,2]),max(clim_pca$x[,2])))+
      geom_point(data=comb_pcapts %>% 
                   filter(Agent == "fire", SPCD == "202",age=="seedling"),
                 aes(x=PC1, y=PC2, color=age),size=1)+
  # geom_vline(xintercept=quantile(comb_pcapts %>% 
  #                  filter(Agent == "fire", SPCD == "202",age=="seedling") %>% 
  #                    dplyr::pull("PC1"),c(0.05,0.5,0.95)),linetype=2,color="blue")+
  # geom_hline(yintercept=quantile(comb_pcapts %>% 
  #                  filter(Agent == "fire", SPCD == "202",age=="seedling") %>% 
  #                    dplyr::pull("PC2"),c(0.05,0.5,0.95)),linetype=2,color="red")+
    scale_color_manual(values=c("#F8766D"))+
  geom_point(aes(x=pc1.mean.seed,y=pc2.mean.seed),shape=23,size=5,color="black",fill="black")+
    theme_bw()+
    theme(axis.text = element_text(color="black",size=11),text = element_text(size=11),legend.text = element_text(size=11))

```

```{r}
#plot niches in parabola form for ESA presentation
ggdensity(comb_pcapts %>% 
       filter(uid=="108_none_cvars") %>% 
       dplyr::select(age, CMD), x= "CMD", color="age")

adult.fake <- rbeta(100000, 5,5)
seedling.fake<- rbeta(100000, 4,5)
fake.data<- data.frame(age=c(rep("adult",length(adult.fake)),rep("seedling",length(seedling.fake))),temperature=c(adult.fake,seedling.fake))

ggdensity(fake.data, x="temperature", color="age")

curve(dbeta(x,8,4),xlim=c(0,1))
curve(dbeta(x,4,8),xlim=c(0,1))
```

an aside...
```{r, echo=FALSE, eval=FALSE}
#investigating pinyon hot plots
pinyon_hotplots<- seedling_pca_unlist %>% 
      filter(Agent == "fire",
              SPCD == 106,
              PC1 < -0.9, 
              PC2 > -1) %>% 
  left_join(plot, by=c("PLT_CN" = "CN"),keep=FALSE)

pinyon_fireplots<- seedling_pca_unlist %>% 
       filter(Agent == "fire",
              SPCD == 106) %>% 
  left_join(plot, by=c("PLT_CN" = "CN"),keep=FALSE)

usa<- map_data("state")

ggplot()+
  geom_polygon(data=usa,aes(x=long, y=lat, group=group),
                color="black", fill="lightblue")+
  geom_point(data=pinyon_fireplots, aes(x=LON, y=LAT))+
  geom_point(data=pinyon_hotplots, aes(x=LON, y=LAT),color="red")+
  coord_quickmap()
```

Here I calculate the centroids in PC space by taking the mean of PC1 coordinates and PC2 coordinates for each species, age, and disturbance agent.

```{r, eval=FALSE}
#calculate centroid in PCA space for adults in each disturbance type
adult_centroids <- data.frame()
for(i in 1:length(adult_data_pcapts)){
m <- adult_data_pcapts %>% 
  pluck(i) %>% 
  dplyr::summarise(SPCD = unique(SPCD), Agent = unique(Agent), mean.pc1 = mean(PC1), mean.pc2 = mean(PC2)) 

adult_centroids <- bind_rows(adult_centroids, m) %>% 
    unite(uid, c(SPCD,Agent), sep="_", remove=FALSE )
}
adult_centroids

#do the same for seedlings
seedling_centroids <- data.frame()
for(i in 1:length(seedling_data_pcapts)){
m <- seedling_data_pcapts %>% 
  pluck(i) %>% 
  dplyr::summarise(SPCD = unique(SPCD), Agent = unique(Agent), mean.pc1 = mean(PC1), mean.pc2 = mean(PC2)) 

seedling_centroids <- bind_rows(seedling_centroids, m) %>% 
    unite(uid, c(SPCD,Agent), sep="_", remove=FALSE )
}
seedling_centroids
```

```{r, eval=FALSE}
##calculating some numbers for McIntire Stennis report
adults
dist
species.names

options(scipen = 9999)

unique.plots <- adults %>% 
  filter(SPCD %in% species.names$species.code) %>% 
  bind_rows(seeds %>% 
              filter(SPCD %in% species.names$species.code)) %>% 
  ungroup %>% 
  dplyr::select(PLT_CN) %>%
  unique()

unique.plotsdist <- dist %>% 
  semi_join(unique.plots, by="PLT_CN")

dist %>% 
  semi_join(unique.plots, by="PLT_CN") %>% 
  group_by(Agent) %>% 
  dplyr::summarise(n = n())

dist %>% 
  semi_join(unique.plots, by="PLT_CN") %>% 
  group_by(Agent) %>% 
  dplyr::summarise(n = n(), max = max(dist_year), min = min(dist_year))

###look at condition data
cond <- read_csv("compiled_data_annual2020/COND.csv")

plot.conds<- cond %>% 
  filter(PLT_CN %in% dist$PLT_CN)

nrow(plot.conds)
summary(plot.conds$COND_STATUS_CD)
plot.conds %>%
  group_by(COND_STATUS_CD) %>% 
  summarize(n=n())

unique.plots.cond <- cond %>% 
  filter(PLT_CN %in% unique.plotsdist$PLT_CN)
unique.plots.cond %>% 
  group_by(COND_STATUS_CD) %>% 
  summarize(n=n())

```

```{r, eval=FALSE}
###make maps for ECOL693 presentation
plot.cut <- plot %>% 
  right_join(dist,by=c("CN" = "PLT_CN"))
head(plot.cut)
nrow(plot.cut)

fire.locations <- plot.cut %>%
  filter(Agent == "fire") %>% 
  dplyr::select(LAT,LON)
none.locations <- plot.cut %>%
  filter(Agent == "none") %>% 
  dplyr::select(LAT,LON)
id.locations <- plot.cut %>%
  filter(Agent == "insect.disease") %>% 
  select(LAT,LON)
harvest.locations <- plot.cut %>%
  filter(Agent == "harvest") %>% 
  dplyr::select(LAT,LON)

usa <- map_data("usa")

ggplot()+
  geom_polygon(data=usa,aes(x=long,y=lat,group=group),fill="white",color="black")+
  geom_point(data=none.locations,aes(x=LON, y= LAT))+
  coord_fixed(1.3)

ggplot()+
  geom_polygon(data=usa,aes(x=long,y=lat,group=group),fill="white",color="black")+
  geom_point(data=none.locations,aes(x=LON, y= LAT), color="#C77CFF")+
  geom_point(data=fire.locations,aes(x=LON, y= LAT),color="#F8766D")+
  geom_point(data=id.locations,aes(x=LON, y= LAT),color="#00BFC4")+
  geom_point(data=harvest.locations,aes(x=LON, y= LAT),color="#7CAE00")+
  coord_fixed(1.3)

leaflet(none.locations) %>%
  addTiles() %>%
  addCircles(lng = ~LON, lat = ~LAT, color="purple")
leaflet(fire.locations) %>%
  addTiles() %>%
  addCircles(lng = ~LON, lat = ~LAT, color="red")
leaflet(id.locations) %>%
  addTiles() %>%
  addCircles(lng = ~LON, lat = ~LAT, color="blue")
leaflet(harvest.locations) %>%
  addTiles() %>%
  addCircles(lng = ~LON, lat = ~LAT, color="green")

```

##Plotting shifts in a different way

Look at whether each species is experiencing contraction, expansion, or shift on PC1. 

```{r}
shifts.raw<- data.frame(species=character(),agent=character(), age=character(), pos.05=numeric(), pos.95=numeric())
for(i in 1:length(adult_data_pcapts)){
adult.95<- adult_data_pcapts %>% 
    pluck(names(adult_data_pcapts)[i]) %>% 
    ungroup() %>% 
    dplyr::select(PC1) %>% 
    dplyr::summarise(quantile = quantile(PC1,probs = 0.95))%>% 
    pull()
adult.05 <- adult_data_pcapts %>% 
    pluck(names(adult_data_pcapts)[i]) %>% 
    ungroup() %>% 
    dplyr::select(PC1) %>% 
    dplyr::summarise(quantile = quantile(PC1,probs = 0.05))%>% 
    pull()
seed.95<- seedling_data_pcapts %>% 
    pluck(names(seedling_data_pcapts)[i]) %>% 
    ungroup() %>% 
    dplyr::select(PC1) %>% 
    dplyr::summarise(quantile = quantile(PC1,probs = 0.95))%>% 
    pull()
seed.05 <- seedling_data_pcapts %>% 
    pluck(names(seedling_data_pcapts)[i]) %>% 
    ungroup() %>% 
    dplyr::select(PC1) %>% 
    dplyr::summarise(quantile = quantile(PC1,probs = 0.05)) %>% 
    pull()
shifts.raw <- rbind(shifts.raw,data.frame(species=rep(unique(adult_data_pcapts[[i]]$SPCD),2), agent = rep(unique(adult_data_pcapts[[i]]$Agent),2), age = c("adult", "seed"),pos.05=c(adult.05,seed.05), pos.95 = c(adult.95,seed.95)))
}

shifts.raw<- shifts.raw %>% 
  left_join(species.names, by=c(species="species.code"))

ggplot(shifts.raw)+
  geom_errorbar(aes(x= agent, ymin=pos.05, ymax=pos.95, color=age, width=0.3), size=0.5, position = "dodge")+
  facet_wrap(~species.name)+
  scale_color_manual(values=c("black","green3"))+
  ggtitle("PC1")+
  theme(axis.text.x = element_text(angle=45,vjust=.7))


```