# df with proportions
dat_prop <- dat1 %>%
mutate(Diatoms_prop = Diatoms/TotalCells,
Dinoflagellates_prop = Dinoflagellates/TotalCells,
Cyanobacteria_prop = Cyanobacteria/TotalCells,
Chlorophyta_prop = Chlorophyta/TotalCells,
Chrysophyta_prop = Chrysophyta/TotalCells,
uniqueID = paste(Date, Site),
Date = as.character(Date),
Month = str_split_fixed(Date, "/", 2)[ ,1],
Day = str_split_fixed(Date, "/", 2)[ ,2],
Date = dmy(paste(Day, Month, 2020)))
# df for pie chart
dat_pie_prop <- dat_prop %>%
select(-Diatoms, -Dinoflagellates, -Cyanobacteria, -Chlorophyta, -Chrysophyta, -TotalCells) %>%
pivot_longer(cols = 4:8, names_to = "phyto_type") %>%
group_by(ID)
# pie chart of proportions for each sample
ggplot(dat_pie_prop, aes(x="", y=value, fill=phyto_type)) +
facet_wrap(~ ID, ncol = 10) +
geom_bar(stat="identity", width=1) +
coord_polar("y") +
scale_fill_manual(values=c("#24AB55", "#FF7A32", "#FFC433",
"#308DF5","#D83535"),
labels=c("Chlorophytes", "Chrysophytes", "Cyanobacteria",
"Diatoms", "Dinoflagellates"),
name="Phytoplankton taxonmic group") +
ylab("Site") + xlab("Date") +
theme(axis.text.x=element_blank()) +
ggtitle("Proportion of phytoplankton taxonomic groups in each sample")
# line plot proportions for each site over time
dat_line_prop <- dat_prop %>%
select(-Diatoms, -Dinoflagellates, -Cyanobacteria, -Chlorophyta, -Chrysophyta, -TotalCells) %>%
pivot_longer(cols = 4:8, names_to = "phyto_type") %>%
group_by(Site)
ggplot(dat_line_prop, aes(x=Date, y=value, color=phyto_type)) +
facet_wrap(~ Site, nrow =2) + geom_line() +
scale_color_manual(values=c("#24AB55", "#FF7A32", "#FFC433",
"#308DF5","#D83535", "black"),
labels=c("Chlorophytes", "Chrysophytes", "Cyanobacteria",
"Diatoms", "Dinoflagellates"),
name="Phytoplankton taxonomic group") +
theme_classic() + ylab("Proportion") +
theme(legend.position="bottom") +
guides(col = guide_legend(nrow = 1, byrow = TRUE)) +
theme(legend.text=element_text(size=12))
# 5 taxonomic groups averaged by date over time
dateavg_datp <- dat_prop %>%
group_by(Date) %>%
summarise(Diatoms_mean = mean(Diatoms_prop),
Dinoflagellates_mean = mean(Dinoflagellates_prop),
Cyanobacteria_mean = mean(Cyanobacteria_prop),
Chlorophyta_mean = mean(Chlorophyta_prop),
Chrysophyta_mean = mean(Chrysophyta_prop)) %>%
pivot_longer(cols = -1, names_to = "phyto_type")
# Create date vector (IMPORTANT FOR ALL PLOTS FOLLOWING)
dates <- unique(dateavg_datp$Date)
# most important capstone plot: phyto community structure over time
ggplot(dateavg_datp, aes(x=Date, y=value, color=phyto_type)) + geom_line() + geom_point() +
scale_color_manual(values=c("#24AB55", "#FF7A32", "#FFC433",
"#308DF5","#D83535", "black"),
labels=c("Chlorophytes", "Chrysophytes", "Cyanobacteria",
"Diatoms", "Dinoflagellates"),
name="Phytoplankton taxonomic group") +
theme_classic() + ylab("Proportion") +
scale_x_date(breaks = dates, date_labels = "%b %d") +
theme(legend.position="bottom") +
guides(col = guide_legend(nrow = 3, byrow = TRUE))
# Simpson and Shannon Diversity indices plots
phytoData2 <- read.csv("sites_data.csv") #load phytoplankton data
phytoData3 <- phytoData2 %>%
mutate(Date = as.character(Date),
Month = str_split_fixed(Date, "/", 2)[ ,1],
Day = str_split_fixed(Date, "/", 2)[ ,2],
Date = dmy(paste(Day, Month, 2020)))
## Warning: 930 failed to parse.
phytoData3$simpDiversity <- diversity(phytoData2[,3:7], "simpson") #simpson index
phytoData3$shanDiversity <- diversity(phytoData2[,3:7], "shannon") #shannon index
# simpson boxplot
ggplot(phytoData3, aes(x = Date, y = simpDiversity, group = Date)) +
geom_boxplot() + ylab("Simpson Diversity Index") +
scale_x_date(breaks = dates, date_labels = "%b %d") + theme_classic()
## Warning: Removed 930 rows containing missing values (stat_boxplot).
# shannon boxplot
ggplot(phytoData3, aes(x = Date, y = shanDiversity, group = Date)) +
geom_boxplot(fill = "bisque") + ylab("Shannon Diversity Index")
## Warning: Removed 930 rows containing missing values (stat_boxplot).
# Boxplot and ANOVA tests
# boxplots
ggplot(dat_prop, aes(x = Date, y = Cyanobacteria_prop, group = Date)) +
geom_boxplot(fill = "bisque") + ylab("Proportion of Cyanobacteria") +
theme_classic() + scale_x_date(breaks = dates, date_labels = "%b %d")
# anova
anova.test <- anova(lm(Cyanobacteria_prop ~ Date, dat_prop))
# anova test for difference in date
as.factor(dat_prop$Date) -> dat_prop$Date
aov(Cyanobacteria_prop ~ Date, dat_prop) -> aov.test
summary(aov.test)
## Df Sum Sq Mean Sq F value Pr(>F)
## Date 5 1.678 0.3356 12.13 6.75e-08 ***
## Residuals 54 1.494 0.0277
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(aov.test)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Cyanobacteria_prop ~ Date, data = dat_prop)
##
## $Date
## diff lwr upr p adj
## 2020-09-15-2020-09-08 0.06438642 -0.15536964 0.28414247 0.9530198
## 2020-09-22-2020-09-08 0.39724737 0.17749131 0.61700342 0.0000272
## 2020-09-29-2020-09-08 0.41949284 0.19973678 0.63924889 0.0000093
## 2020-10-06-2020-09-08 0.36916506 0.14940901 0.58892112 0.0001030
## 2020-10-20-2020-09-08 0.16352612 -0.05622994 0.38328217 0.2554076
## 2020-09-22-2020-09-15 0.33286095 0.11310490 0.55261701 0.0005456
## 2020-09-29-2020-09-15 0.35510642 0.13535037 0.57486248 0.0001982
## 2020-10-06-2020-09-15 0.30477864 0.08502259 0.52453470 0.0018714
## 2020-10-20-2020-09-15 0.09913970 -0.12061635 0.31889576 0.7655872
## 2020-09-29-2020-09-22 0.02224547 -0.19751059 0.24200152 0.9996603
## 2020-10-06-2020-09-22 -0.02808231 -0.24783836 0.19167375 0.9989451
## 2020-10-20-2020-09-22 -0.23372125 -0.45347730 -0.01396519 0.0308790
## 2020-10-06-2020-09-29 -0.05032778 -0.27008383 0.16942828 0.9838049
## 2020-10-20-2020-09-29 -0.25596672 -0.47572277 -0.03621066 0.0136050
## 2020-10-20-2020-10-06 -0.20563894 -0.42539500 0.01411711 0.0791028
# anova test for difference in site
anova.test_cyano_site <- anova(lm(Cyanobacteria_prop ~ Site, dat_prop))
aov.test_cyano_site <- aov(Cyanobacteria_prop ~ Site, dat_prop)
summary(aov.test_cyano_site)
## Df Sum Sq Mean Sq F value Pr(>F)
## Site 9 0.3115 0.03461 0.605 0.787
## Residuals 50 2.8603 0.05721
TukeyHSD(aov.test_cyano_site)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Cyanobacteria_prop ~ Site, data = dat_prop)
##
## $Site
## diff lwr upr p adj
## B-A 0.099133696 -0.3579798 0.5562472 0.9992646
## C-A -0.019027993 -0.4761415 0.4380855 1.0000000
## D-A 0.169575191 -0.2875383 0.6266887 0.9640971
## E-A 0.185667187 -0.2714464 0.6427807 0.9377740
## F-A 0.101288079 -0.3558255 0.5584016 0.9991276
## G-A 0.045459192 -0.4116543 0.5025727 0.9999990
## H-A 0.199222276 -0.2578913 0.6563358 0.9073106
## I-A 0.103293776 -0.3538198 0.5604073 0.9989814
## J-A 0.153669321 -0.3034442 0.6107829 0.9810691
## C-B -0.118161688 -0.5752752 0.3389518 0.9971190
## D-B 0.070441495 -0.3866720 0.5275550 0.9999560
## E-B 0.086533492 -0.3705800 0.5436470 0.9997551
## F-B 0.002154383 -0.4549592 0.4592679 1.0000000
## G-B -0.053674504 -0.5107880 0.4034390 0.9999957
## H-B 0.100088581 -0.3570250 0.5572021 0.9992063
## I-B 0.004160080 -0.4529535 0.4612736 1.0000000
## J-B 0.054535626 -0.4025779 0.5116492 0.9999951
## D-C 0.188603184 -0.2685104 0.6457167 0.9318450
## E-C 0.204695180 -0.2524184 0.6618087 0.8927411
## F-C 0.120316071 -0.3367975 0.5774296 0.9966983
## G-C 0.064487185 -0.3926264 0.5216007 0.9999792
## H-C 0.218250269 -0.2388633 0.6753638 0.8510451
## I-C 0.122321769 -0.3347918 0.5794353 0.9962627
## J-C 0.172697314 -0.2844162 0.6298109 0.9597679
## E-D 0.016091996 -0.4410215 0.4732055 1.0000000
## F-D -0.068287113 -0.5254007 0.3888264 0.9999662
## G-D -0.124115999 -0.5812295 0.3329975 0.9958344
## H-D 0.029647085 -0.4274665 0.4867606 1.0000000
## I-D -0.066281415 -0.5233950 0.3908321 0.9999738
## J-D -0.015905870 -0.4730194 0.4412077 1.0000000
## F-E -0.084379109 -0.5414926 0.3727344 0.9998010
## G-E -0.140207995 -0.5973215 0.3169055 0.9899242
## H-E 0.013555089 -0.4435584 0.4706686 1.0000000
## I-E -0.082373411 -0.5394869 0.3747401 0.9998368
## J-E -0.031997866 -0.4891114 0.4251157 1.0000000
## G-F -0.055828886 -0.5129424 0.4012847 0.9999940
## H-F 0.097934198 -0.3591793 0.5550477 0.9993327
## I-F 0.002005698 -0.4551078 0.4591192 1.0000000
## J-F 0.052381243 -0.4047323 0.5094948 0.9999965
## H-G 0.153763084 -0.3033505 0.6108766 0.9809916
## I-G 0.057834584 -0.3992790 0.5149481 0.9999919
## J-G 0.108210129 -0.3489034 0.5653237 0.9985338
## I-H -0.095928500 -0.5530420 0.3611850 0.9994346
## J-H -0.045552955 -0.5026665 0.4115606 0.9999990
## J-I 0.050375545 -0.4067380 0.5074891 0.9999975
# Environmental Variables plots
# import the data
env_dat <- read.csv("environmental_variables_data.csv")
env_dat2 <- env_dat %>%
mutate(Date = as.character(Date),
Month = str_split_fixed(Date, "/", 2)[ ,1],
Day = str_split_fixed(Date, "/", 2)[ ,2],
Date = dmy(paste(Day, Month, 2020)))
# Chl-a and cyano final plot
coeff <- 100
# create plot
ggplot(env_dat2, aes(x=Date)) +
geom_line(aes(y=Prop_cyano, color = "Proportion of Cyanobacteria")) +
geom_point(aes(y=Prop_cyano, color = "Proportion of Cyanobacteria")) +
geom_line(aes(y=Chl_a_conc / coeff, color = "Chl-a concentration")) +
geom_point(aes(y=Chl_a_conc / coeff, color = "Chl-a concentration")) +
scale_y_continuous(name = "Proportion of Cyanobacteria",
sec.axis = sec_axis(~.*coeff, name="Chl-a concentration")) +
theme_classic() + scale_x_date(breaks = dates, date_labels = "%b %d") +
theme(legend.position="bottom") +
guides(col = guide_legend(nrow = 1, byrow = TRUE)) +
theme(legend.text=element_text(size=12),
axis.text=element_text(size=12),
axis.title =element_text(size =14))
# load the nutrient data
nut_datb <- read.csv("nutrientdat copy.csv")
# correlation: cyano vs nutrient ratio
ggplot(nut_datb, aes(x = din_srp_ratio, y=cyano_prop)) + geom_point() +
ylab("Proportion of Cyanobacteria") + xlab("DIN:SRP ratio") +
theme_classic() + stat_smooth(method ="lm")
regression1 <- lm(cyano_prop ~ din_srp_ratio, dat = nut_datb)
summary(regression1)
##
## Call:
## lm(formula = cyano_prop ~ din_srp_ratio, data = nut_datb)
##
## Residuals:
## 1 2 3 4 5
## -0.28231 -0.14265 0.17243 0.09463 0.15791
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.668e-01 1.686e-01 3.955 0.0289 *
## din_srp_ratio 2.189e-05 5.211e-05 0.420 0.7028
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2336 on 3 degrees of freedom
## Multiple R-squared: 0.05553, Adjusted R-squared: -0.2593
## F-statistic: 0.1764 on 1 and 3 DF, p-value: 0.7028
# Abiotic factors plots
se_samp_dat <- read.csv("sites_environmental_data.csv")
se_samp_dat2 <- se_samp_dat %>%
mutate(Date = as.character(Date),
Month = str_split_fixed(Date, "/", 2)[ ,1],
Day = str_split_fixed(Date, "/", 2)[ ,2],
Date = dmy(paste(Day, Month, 2020))) %>%
select(-Month, -Day)
# average by date
se_samp_dat_sum <- se_samp_dat2 %>%
group_by(Date) %>%
summarise(avg_s.m = mean(S.m),
avg_temp = mean(Temp),
avg_secchi = mean(Secchi))
# errors table
error_se_samp_dat_sum <- se_samp_dat2 %>%
group_by(Date) %>%
summarise(s.m_se = mean(S.m_se),
temp_se = mean(Temp_se),
secchi_se = mean(Secchi_se))
# flip errors table
error_flip <- error_se_samp_dat_sum %>%
rename("Mean Secchi Depth (m)" = secchi_se,
"Mean Temperature (ËšC)" = temp_se,
"Mean Conductivity (S/m)" = s.m_se) %>%
pivot_longer(cols = 2:4, names_to = "Variable") %>%
arrange(Date, Variable) %>%
mutate(unique_id = c(1:18)) %>%
rename("se" = value)
# plot df
se_samp_dat_flip <- se_samp_dat_sum %>%
rename("Mean Secchi Depth (m)" = avg_secchi,
"Mean Temperature (ËšC)" = avg_temp,
"Mean Conductivity (S/m)" = avg_s.m) %>%
pivot_longer(cols = 2:4, names_to = "Variable") %>%
arrange(Date, Variable) %>%
mutate(unique_id = c(1:18))
# join the dfs
se_samp_dat_plot <- left_join(se_samp_dat_flip, error_flip, by = "unique_id")
se_samp_dat_plot2 <- se_samp_dat_plot %>%
rename("Date" = Date.x,
"Variable" = Variable.x) %>%
select(Date, Variable, value, se)
# 3 tier env variable plot for our SAMPLE data:
ggplot(se_samp_dat_plot2, aes(x=Date, y=value, group = 1, col = Variable)) +
geom_line() + geom_point() +
geom_errorbar(aes(ymin=value-se, ymax=value+se), width=.2) +
facet_grid(Variable ~ ., scales = "free") + theme_classic() +
scale_x_date(breaks = dates, date_labels = "%b %d")