Analysis of Goose Point growth data Part 1

This post details analysis of growth data measured by Noah K. from the Goose Point outplants from June to September 2024.

Growth data from November 2024 is in progress and will be analyzed in a Part 2 post.

Overview

All data and scripts for this project are located on GitHub here.

tl;dr

There is no clear signal of treatment effects on growth from June to September 2024. We will add December data in the next round of analysis.

Set up

Set up workspace, set options, and load required packages.

knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

Load libraries.

if ("tidyverse" %in% rownames(installed.packages()) == 'FALSE') install.packages('tidyverse') 
if ("ggplot2" %in% rownames(installed.packages()) == 'FALSE') install.packages('ggplot2') 
if ("ggeffects" %in% rownames(installed.packages()) == 'FALSE') install.packages('ggeffects') 

library("ggplot2")
library("tidyverse")
library("lme4")
library("lmerTest")
library("emmeans")
library("car")
library("mgcv")
library("ggeffects")
library("cowplot")

Load data

Read in data files.

data<-read_csv(file="data/outplanting/GoosePoint/growth_GoosePoint.csv")%>%
  mutate(date=as.factor(date))

Set data attributes.

data$field_cattle_tag<-factor(data$field_cattle_tag)

Set value to 0 for dead or stuck if NA.

data$dead[is.na(data$dead)] <- 0
data$stuck[is.na(data$stuck)] <- 0

Add in treatment information

metadata<-read_csv(file="data/outplanting/GoosePoint/bag_list_GoosePoint.csv")
metadata$field_cattle_tag<-factor(metadata$field_cattle_tag)

data$treatment<-metadata$treatment[match(data$field_cattle_tag, metadata$field_cattle_tag)]

Plot data

Plot a histogram of each data column to identify any outliers.

hist(data$length.mm)

hist(data$width.mm)

hist(data$depth.mm)

Plot a box plot of length, width, and depth as a function of bag number, colored by treatment.

plot1<-data%>%
  ggplot(aes(x=field_cattle_tag, y=length.mm, fill = treatment)) +
  facet_wrap(~date)+
  geom_boxplot(width=.5, outlier.shape= NA, position = position_dodge(width = 0.4)) +
  geom_point(pch = 21, position=position_jitterdodge(dodge.width=0.4)) +
  xlab("Bag") + 
  theme_classic(); plot1

Plot width.

plot2<-data%>%
  ggplot(aes(x=field_cattle_tag, y=width.mm, fill = treatment)) +
  facet_wrap(~date)+
  geom_boxplot(width=.5, outlier.shape= NA, position = position_dodge(width = 0.4)) +
  geom_point(pch = 21, position=position_jitterdodge(dodge.width=0.4)) +
  xlab("Bag") + 
  theme_classic(); plot2

Plot depth

plot3<-data%>%
  ggplot(aes(x=field_cattle_tag, y=depth.mm, fill = treatment)) +
  facet_wrap(~date)+
  geom_boxplot(width=.5, outlier.shape= NA, position = position_dodge(width = 0.4)) +
  geom_point(pch = 21, position=position_jitterdodge(dodge.width=0.4)) +
  xlab("Bag") + 
  theme_classic(); plot3

Calculate oyster volume

Generate an equation to predict depth from length and width using 20240909 data as a training set and then use this equation to calculate predicted volume from length and width for other dates.

Calculate oyster volume as an ellipsoid using known.

data$volume <- (4/3) * pi * (data$length.mm/2) * (data$width.mm/2) * (data$depth.mm/2)

View the relationship between length, width, and volume.

plot(data$volume ~ data$length.mm)

plot(data$volume ~ data$width.mm)

Relationships may be non linear.

Obtain training data with known length width and depth from 20240909 dataset.

training_data<-data%>%
  filter(date=="20240909")%>%
  filter(!is.na(volume))

hist(training_data$volume)

Fit a polynomial regression and a GAM model and compare the fits.

# Fit a polynomial regression model (2nd-degree polynomial)
poly_model <- lm(volume ~ poly(length.mm, 2) + poly(width.mm, 2) + length.mm:width.mm, data = training_data)

# Fit a Generalized Additive Model (GAM) for comparison
gam_model <- gam(volume ~ s(length.mm, bs = "cs") + s(width.mm, bs = "cs"), data = training_data)

# Compare model summaries
summary(poly_model)
## 
## Call:
## lm(formula = volume ~ poly(length.mm, 2) + poly(width.mm, 2) + 
##     length.mm:width.mm, data = training_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6344.4 -1118.9  -139.2   950.4  9785.6 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -914.986   1936.769  -0.472  0.63669    
## poly(length.mm, 2)1 11702.736  10669.495   1.097  0.27290    
## poly(length.mm, 2)2  5426.512   1931.416   2.810  0.00503 ** 
## poly(width.mm, 2)1    751.955  11580.531   0.065  0.94824    
## poly(width.mm, 2)2  -5143.753   1916.599  -2.684  0.00736 ** 
## length.mm:width.mm      8.191      1.357   6.034 2.04e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1778 on 1412 degrees of freedom
## Multiple R-squared:  0.7583, Adjusted R-squared:  0.7574 
## F-statistic: 885.9 on 5 and 1412 DF,  p-value: < 2.2e-16
summary(gam_model)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## volume ~ s(length.mm, bs = "cs") + s(width.mm, bs = "cs")
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10768.05      47.46   226.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                edf Ref.df     F p-value    
## s(length.mm) 8.217      9 179.0  <2e-16 ***
## s(width.mm)  4.642      9 152.8  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.755   Deviance explained = 75.7%
## GCV = 3.2255e+06  Scale est. = 3.194e+06  n = 1418

Poly model adjusted R sq = 0.757 GAM model adjusted R sq = 0.755

Both models have similar fits. Plot the models.

plot(poly_model)

plot(gam_model)

Predict volume using both models and evaluate fits.

# Predict Volume using the trained model
data$Predicted_Volume_GAM <- predict(gam_model, data)
data$Predicted_Volume_Poly <- predict(poly_model, data)

Plot predicted vs actual volume for each model.

plot(data$Predicted_Volume_GAM ~ data$volume)

plot(data$Predicted_Volume_Poly ~ data$volume)

Relationships look similar for both models.

Plot GAM relationships

# Plot the relationship for visualization
ggplot(training_data, aes(x = length.mm, y = volume)) +
  geom_point() +
  geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs"), color = "blue") +
  ggtitle("GAM Fit for Length vs Volume")

ggplot(training_data, aes(x = width.mm, y = volume)) +
  geom_point() +
  geom_smooth(method = "gam", formula = y ~ s(x, bs = "cs"), color = "red") +
  ggtitle("GAM Fit for Width vs Volume")

Plot Poly model relationships

# Plot polynomial fit for Length vs. Volume
ggplot(training_data, aes(x = length.mm, y = volume)) +
  geom_point() +
  stat_smooth(method = "lm", formula = y ~ poly(x, 2), color = "blue", se = TRUE) +
  ggtitle("Polynomial Regression (2nd Degree) Fit for Length vs Volume")

# Plot polynomial fit for Width vs. Volume
ggplot(training_data, aes(x = width.mm, y = volume)) +
  geom_point() +
  stat_smooth(method = "lm", formula = y ~ poly(x, 2), color = "red", se = TRUE) +
  ggtitle("Polynomial Regression (2nd Degree) Fit for Width vs Volume")

I am going to select the polynomial model, because the GAM is more affected by higher observations in width.

View the relationship between known volume and predicted volume using Polynomial regression.

# Plot polynomial fit for Length vs. Volume
ggplot(data%>%filter(date=="20240909"), aes(x = volume, y = Predicted_Volume_Poly)) +
  geom_point() +
  stat_smooth(method = "loess", color = "blue", se = TRUE) +
  ggtitle("Polynomial Regression (2nd Degree) Fit for Known vs Predicted Volume")

Model fit looks good for our approximation.

Proceed with using predicted volume calculated by polynomial regression.

Look for outliers.

hist(data$Predicted_Volume_Poly)

min(data$Predicted_Volume_Poly)
## [1] -1316.614

Remove outliers and reconstruct polynomial model.

There are several outliers. Conduct outlier removal and analyze again.

# Fit a polynomial regression model (2nd-degree polynomial)
poly_model <- lm(volume ~ poly(length.mm, 2) + poly(width.mm, 2) + length.mm:width.mm, data = training_data)

# Extract raw residuals
training_data$raw_resid <- residuals(poly_model)

# Standardize residuals
training_data$std_resid <- training_data$raw_resid / sd(training_data$raw_resid)

# Flag potential outliers
outlier_threshold <- 3
training_data$outlier_flag <- abs(training_data$std_resid) > outlier_threshold

# Filter rows flagged as outliers
outliers <- training_data %>% filter(outlier_flag == TRUE)
print(outliers)
## # A tibble: 17 × 14
##    field_cattle_tag date     oyster length.mm width.mm depth.mm  dead stuck
##    <fct>            <fct>     <dbl>     <dbl>    <dbl>    <dbl> <dbl> <dbl>
##  1 83               20240909     63      58.0     38.6     10.5     0     0
##  2 91               20240909     68      42.4     35.2     22.6     0     0
##  3 86               20240909     79      44.2     30.7     22.6     0     0
##  4 95               20240909     20      43.8     38.3     24.1     0     0
##  5 95               20240909     32      46.2     29.9     22.1     0     0
##  6 88               20240909      4      43.8     41.3     20.6     0     0
##  7 88               20240909     32      49.5     41.7     20.9     0     0
##  8 88               20240909     92      41.8     35.5     23.0     0     0
##  9 96               20240909     53      53.5     31.4     22.1     0     0
## 10 97               20240909     20      40.6     37.4     26.7     0     0
## 11 97               20240909     27      53.0     42.9     21.9     0     0
## 12 97               20240909     31      40.5     32.1     22.6     0     0
## 13 97               20240909     85      43.3     41.3     24.4     0     0
## 14 80               20240909      9      45.8     37.8     23.0     0     0
## 15 80               20240909     23      40.5     39.2     21.0     0     0
## 16 80               20240909     26      62.4     37.2     24       0     0
## 17 80               20240909     37      35.9     44.6     22.2     0     0
## # ℹ 6 more variables: image.name <chr>, treatment <chr>, volume <dbl>,
## #   raw_resid <dbl>, std_resid <dbl>, outlier_flag <lgl>
# Plot standardized residuals
ggplot(training_data, aes(x = seq_along(std_resid), y = std_resid)) +
  geom_point(aes(color = outlier_flag), size = 2) +
  geom_hline(yintercept = c(-outlier_threshold, outlier_threshold), linetype = "dashed", color = "red") +
  labs(title = "Standardized Residuals with Outliers", x = "Index", y = "Standardized Residual") +
  theme_minimal()

Re run the model after removing outliers.

training_data<-training_data%>%
  filter(!outlier_flag==TRUE)
hist(training_data$volume)

Fit a polynomial regression.

# Fit a polynomial regression model (2nd-degree polynomial)
poly_model <- lm(volume ~ poly(length.mm, 2) + poly(width.mm, 2) + length.mm:width.mm, data = training_data)

# Compare model summaries
summary(poly_model)
## 
## Call:
## lm(formula = volume ~ poly(length.mm, 2) + poly(width.mm, 2) + 
##     length.mm:width.mm, data = training_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4833.7 -1081.6  -107.2   972.8  5346.1 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -1193.278   1754.953  -0.680  0.49665    
## poly(length.mm, 2)1  9908.733   9580.290   1.034  0.30118    
## poly(length.mm, 2)2  4987.905   1740.529   2.866  0.00422 ** 
## poly(width.mm, 2)1  -2523.763  10441.295  -0.242  0.80904    
## poly(width.mm, 2)2  -5249.791   1731.549  -3.032  0.00248 ** 
## length.mm:width.mm      8.331      1.233   6.756 2.07e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1605 on 1395 degrees of freedom
## Multiple R-squared:  0.7865, Adjusted R-squared:  0.7857 
## F-statistic:  1028 on 5 and 1395 DF,  p-value: < 2.2e-16

R sq improved to 0.785

plot(poly_model)

Predict volume using poly model and evaluate fits.

# Predict Volume using the trained model
data$Predicted_Volume_Poly <- predict(poly_model, data)

Plot predicted vs actual volume for each model.

plot(data$Predicted_Volume_Poly ~ data$volume)

Plot Poly model relationships.

# Plot polynomial fit for Length vs. Volume
ggplot(training_data, aes(x = length.mm, y = volume)) +
  geom_point() +
  stat_smooth(method = "lm", formula = y ~ poly(x, 2), color = "blue", se = TRUE) +
  ggtitle("Polynomial Regression (2nd Degree) Fit for Length vs Volume")

# Plot polynomial fit for Width vs. Volume
ggplot(training_data, aes(x = width.mm, y = volume)) +
  geom_point() +
  stat_smooth(method = "lm", formula = y ~ poly(x, 2), color = "red", se = TRUE) +
  ggtitle("Polynomial Regression (2nd Degree) Fit for Width vs Volume")

View the relationship between known volume and predicted volume using Polynomial regression.

# Plot polynomial fit for Length vs. Volume
ggplot(data%>%filter(date=="20240909"), aes(x = volume, y = Predicted_Volume_Poly)) +
  geom_point() +
  stat_smooth(method = "loess", color = "blue", se = TRUE) +
  ggtitle("Polynomial Regression (2nd Degree) Fit for Known vs Predicted Volume")

Model fit looks good for our approximation. Proceed with using predicted volume calculated by polynomial regression.

Look for outliers.

hist(data$Predicted_Volume_Poly)

min(data$Predicted_Volume_Poly)
## [1] -1124.542

There are fewer negative values than the first iteration of the model.

Remove any observation that resulted in a negative value.

data<-data%>%filter(Predicted_Volume_Poly>0)
hist(data$Predicted_Volume_Poly)

Proceed with the calculated volume data.

Analyze growth (volume) over time

Plot data

plot4<-data%>%
  ggplot(aes(x=field_cattle_tag, y=Predicted_Volume_Poly, fill = treatment)) +
  facet_wrap(~date)+
  geom_boxplot(width=.5, outlier.shape= NA, position = position_dodge(width = 0.4)) +
  geom_point(pch = 21, position=position_jitterdodge(dodge.width=0.4)) +
  xlab("Bag") + 
  theme_classic(); plot4

Plot summarized by treatment

plot5<-data%>%
  ggplot(aes(x=treatment, y=Predicted_Volume_Poly, fill = treatment)) +
  facet_wrap(~date)+
  geom_boxplot(width=.5, outlier.shape= NA, position = position_dodge(width = 0.4)) +
  geom_point(pch = 21, position=position_jitterdodge(dodge.width=0.4)) +
  xlab("Treatment") + 
  theme_classic() + 
  theme(axis.text.x=element_text(angle=45, vjust=1, hjust=1)); plot5

Plot summarized by mean and standard error for each bag.

plot6<-data%>%
  group_by(field_cattle_tag, treatment, date)%>%
  summarise(mean=mean(Predicted_Volume_Poly, na.rm=TRUE), se=sd(Predicted_Volume_Poly, na.rm=TRUE)/sqrt(length(Predicted_Volume_Poly)))%>%
  
  ggplot(aes(x=field_cattle_tag, y=mean, fill = treatment)) +
  facet_wrap(~date)+
  geom_point(pch = 21) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=0)+
  xlab("Bag") + 
  ylab("Volume")+
  #ylim(2000, 15000)+
  theme_classic() + 
  theme(axis.text.x=element_text(angle=45, vjust=1, hjust=1)); plot6

plot7<-data%>%
  group_by(treatment, date)%>%
  summarise(mean=mean(Predicted_Volume_Poly, na.rm=TRUE), se=sd(Predicted_Volume_Poly, na.rm=TRUE)/sqrt(length(Predicted_Volume_Poly)))%>%
  
  ggplot(aes(x=treatment, y=mean, fill = treatment)) +
  facet_wrap(~date)+
  geom_point(pch = 21, size=4) +
  geom_errorbar(aes(ymin=mean-se, ymax=mean+se), width=0)+
  xlab("Treatment") + 
  ylab("Volume")+
  #ylim(5000, 15000)+
  theme_classic() + 
  theme(axis.text.x=element_text(angle=45, vjust=1, hjust=1)); plot7

Sizes vary between treatments at the start of the outplant. We will examine growth using models to calculate slopes of change in volume over time.

Run linear mixed models models

model<-lmer(sqrt(Predicted_Volume_Poly) ~ treatment * date + (1|field_cattle_tag:treatment), data=data)

summary(model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## sqrt(Predicted_Volume_Poly) ~ treatment * date + (1 | field_cattle_tag:treatment)
##    Data: data
## 
## REML criterion at convergence: 22902.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.9553 -0.6477 -0.0375  0.6260  3.5300 
## 
## Random effects:
##  Groups                     Name        Variance Std.Dev.
##  field_cattle_tag:treatment (Intercept)  18.11    4.256  
##  Residual                               203.22   14.255  
## Number of obs: 2808, groups:  field_cattle_tag:treatment, 14
## 
## Fixed effects:
##                                                   Estimate Std. Error        df
## (Intercept)                                        40.2750     2.0088   11.1260
## treatmentWeekly Fresh Water Treated                -1.5695     3.2799   11.1201
## treatmentWeekly Temperature Control                26.8105     3.2788   11.1059
## treatmentWeekly Temperature Treated                13.8884     3.2788   11.1059
## date20240909                                       58.3058     0.9026 2790.0356
## treatmentWeekly Fresh Water Treated:date20240909    7.6232     1.4715 2790.0148
## treatmentWeekly Temperature Control:date20240909  -20.0032     1.4722 2790.0148
## treatmentWeekly Temperature Treated:date20240909   -9.5667     1.4677 2790.0206
##                                                  t value Pr(>|t|)    
## (Intercept)                                       20.050 4.38e-10 ***
## treatmentWeekly Fresh Water Treated               -0.479  0.64155    
## treatmentWeekly Temperature Control                8.177 4.98e-06 ***
## treatmentWeekly Temperature Treated                4.236  0.00137 ** 
## date20240909                                      64.597  < 2e-16 ***
## treatmentWeekly Fresh Water Treated:date20240909   5.181 2.37e-07 ***
## treatmentWeekly Temperature Control:date20240909 -13.588  < 2e-16 ***
## treatmentWeekly Temperature Treated:date20240909  -6.518 8.41e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trWFWT trtWTC trtWTT d20240 tWFWT: tWTC:2
## trtmntWkFWT -0.612                                          
## trtmntWklTC -0.613  0.375                                   
## trtmntWklTT -0.613  0.375  0.375                            
## dat20240909 -0.227  0.139  0.139  0.139                     
## tWFWT:20240  0.139 -0.227 -0.085 -0.085 -0.613              
## tWTC:202409  0.139 -0.085 -0.226 -0.085 -0.613  0.376       
## tWTT:202409  0.140 -0.086 -0.086 -0.226 -0.615  0.377  0.377
anova(model)
## Type III Analysis of Variance Table with Satterthwaite's method
##                 Sum Sq Mean Sq NumDF DenDF  F value    Pr(>F)    
## treatment         6393    2131     3    10   10.486  0.001976 ** 
## date           1867588 1867588     1  2790 9190.108 < 2.2e-16 ***
## treatment:date   67141   22380     3  2790  110.130 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qqPlot(residuals(model))

## [1] 1013  676
hist(residuals(model))

Different sizes by treatment and treatment by date.

There are several outliers. Conduct outlier removal and analyze again.

# Extract raw residuals
data$raw_resid <- residuals(model)

# Standardize residuals
data$std_resid <- data$raw_resid / sd(data$raw_resid)

# Flag potential outliers
outlier_threshold <- 3
data$outlier_flag <- abs(data$std_resid) > outlier_threshold

# Filter rows flagged as outliers
outliers <- data %>% filter(outlier_flag == TRUE)
print(outliers)
## # A tibble: 15 × 16
##    field_cattle_tag date     oyster length.mm width.mm depth.mm  dead stuck
##    <fct>            <fct>     <dbl>     <dbl>    <dbl>    <dbl> <dbl> <dbl>
##  1 85               20240909     78     11.7      45.9    13.8      0     0
##  2 84               20240909     49     24.6      22.5     9.38     0     0
##  3 83               20240909    104     25.3      22.5     5.68     1     0
##  4 82               20240909     98     29.0      19.1    11        1     1
##  5 95               20240909      3     59.6      42.3    18.5      0     0
##  6 95               20240909     68     17.0      16.3     8.32     1     0
##  7 95               20240909     70     63.6      43.1    13.1      0     0
##  8 94               20240909    102     20.3      15.4     3.11     1     0
##  9 97               20240909    101     25.5      22.1     8.69     1     0
## 10 80               20240909     76      8.01     26.5     9.28     0     0
## 11 80               20240624     49     42.8      35.1    NA        0     0
## 12 83               20240624     55     41.3      36.7    NA        0     0
## 13 83               20240624     66     19.9      14.2    NA        0     0
## 14 83               20240624     67     19.6      14.1    NA        0     0
## 15 94               20240624     71     39.2      25.8    NA        0     0
## # ℹ 8 more variables: image.name <chr>, treatment <chr>, volume <dbl>,
## #   Predicted_Volume_GAM <dbl[1d]>, Predicted_Volume_Poly <dbl>,
## #   raw_resid <dbl>, std_resid <dbl>, outlier_flag <lgl>
# Plot standardized residuals
ggplot(data, aes(x = seq_along(std_resid), y = std_resid)) +
  geom_point(aes(color = outlier_flag), size = 2) +
  geom_hline(yintercept = c(-outlier_threshold, outlier_threshold), linetype = "dashed", color = "red") +
  labs(title = "Standardized Residuals with Outliers", x = "Index", y = "Standardized Residual") +
  theme_minimal()

Remove identified outliers.

data<-data%>%
  filter(!outlier_flag==TRUE)

Analyze again.

model<-lmer(sqrt(Predicted_Volume_Poly) ~ treatment * date + (1|field_cattle_tag:treatment), data=data)

summary(model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## sqrt(Predicted_Volume_Poly) ~ treatment * date + (1 | field_cattle_tag:treatment)
##    Data: data
## 
## REML criterion at convergence: 22557.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.12708 -0.67600 -0.04688  0.64731  3.07344 
## 
## Random effects:
##  Groups                     Name        Variance Std.Dev.
##  field_cattle_tag:treatment (Intercept)  18.7     4.324  
##  Residual                               187.6    13.696  
## Number of obs: 2793, groups:  field_cattle_tag:treatment, 14
## 
## Fixed effects:
##                                                   Estimate Std. Error        df
## (Intercept)                                        40.2744     2.0299   11.0044
## treatmentWeekly Fresh Water Treated                -1.7141     3.3148   11.0035
## treatmentWeekly Temperature Control                26.6656     3.3138   10.9907
## treatmentWeekly Temperature Treated                14.0772     3.3145   10.9993
## date20240909                                       58.3689     0.8689 2775.0377
## treatmentWeekly Fresh Water Treated:date20240909    8.1513     1.4170 2775.0156
## treatmentWeekly Temperature Control:date20240909  -19.5194     1.4177 2775.0147
## treatmentWeekly Temperature Treated:date20240909   -9.4890     1.4149 2775.0403
##                                                  t value Pr(>|t|)    
## (Intercept)                                       19.840 5.79e-10 ***
## treatmentWeekly Fresh Water Treated               -0.517  0.61532    
## treatmentWeekly Temperature Control                8.047 6.21e-06 ***
## treatmentWeekly Temperature Treated                4.247  0.00137 ** 
## date20240909                                      67.174  < 2e-16 ***
## treatmentWeekly Fresh Water Treated:date20240909   5.753 9.75e-09 ***
## treatmentWeekly Temperature Control:date20240909 -13.769  < 2e-16 ***
## treatmentWeekly Temperature Treated:date20240909  -6.707 2.40e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trWFWT trtWTC trtWTT d20240 tWFWT: tWTC:2
## trtmntWkFWT -0.612                                          
## trtmntWklTC -0.613  0.375                                   
## trtmntWklTT -0.612  0.375  0.375                            
## dat20240909 -0.216  0.132  0.132  0.132                     
## tWFWT:20240  0.132 -0.216 -0.081 -0.081 -0.613              
## tWTC:202409  0.132 -0.081 -0.215 -0.081 -0.613  0.376       
## tWTT:202409  0.132 -0.081 -0.081 -0.216 -0.614  0.377  0.376
anova(model)
## Type III Analysis of Variance Table with Satterthwaite's method
##                 Sum Sq Mean Sq NumDF DenDF   F value    Pr(>F)    
## treatment         5822    1941     3    10    10.346  0.002076 ** 
## date           1880530 1880530     1  2775 10024.953 < 2.2e-16 ***
## treatment:date   66465   22155     3  2775   118.106 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qqPlot(residuals(model))

## [1] 1330  819
hist(residuals(model))

Residuals are much better.

Analyze growth rates over time

# Fit a mixed-effects model to estimate growth rate
growth_model <- lmer(Predicted_Volume_Poly ~ date * treatment + (1|treatment:field_cattle_tag), data = data)

# Summarize the model
summary(growth_model)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## Predicted_Volume_Poly ~ date * treatment + (1 | treatment:field_cattle_tag)
##    Data: data
## 
## REML criterion at convergence: 51052.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1956 -0.5970 -0.1151  0.4921  4.5788 
## 
## Random effects:
##  Groups                     Name        Variance Std.Dev.
##  treatment:field_cattle_tag (Intercept)  473472   688.1  
##  Residual                               5209925  2282.5  
## Number of obs: 2793, groups:  treatment:field_cattle_tag, 14
## 
## Fixed effects:
##                                                  Estimate Std. Error       df
## (Intercept)                                       1792.91     324.44    11.10
## date20240909                                      8141.35     144.81  2775.04
## treatmentWeekly Fresh Water Treated               -157.21     529.80    11.10
## treatmentWeekly Temperature Control               2918.52     529.63    11.08
## treatmentWeekly Temperature Treated               1435.59     529.75    11.09
## date20240909:treatmentWeekly Fresh Water Treated  1468.19     236.15  2775.02
## date20240909:treatmentWeekly Temperature Control -1463.34     236.26  2775.01
## date20240909:treatmentWeekly Temperature Treated  -518.73     235.79  2775.04
##                                                  t value Pr(>|t|)    
## (Intercept)                                        5.526 0.000173 ***
## date20240909                                      56.221  < 2e-16 ***
## treatmentWeekly Fresh Water Treated               -0.297 0.772143    
## treatmentWeekly Temperature Control                5.510 0.000178 ***
## treatmentWeekly Temperature Treated                2.710 0.020158 *  
## date20240909:treatmentWeekly Fresh Water Treated   6.217 5.82e-10 ***
## date20240909:treatmentWeekly Temperature Control  -6.194 6.74e-10 ***
## date20240909:treatmentWeekly Temperature Treated  -2.200 0.027892 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) d20240 trWFWT trtWTC trtWTT d20FWT d202TC
## dat20240909 -0.225                                          
## trtmntWkFWT -0.612  0.138                                   
## trtmntWklTC -0.613  0.138  0.375                            
## trtmntWklTT -0.612  0.138  0.375  0.375                     
## d2024090FWT  0.138 -0.613 -0.225 -0.085 -0.084              
## d20240909TC  0.138 -0.613 -0.084 -0.224 -0.084  0.376       
## d20240909TT  0.138 -0.614 -0.085 -0.085 -0.225  0.377  0.376
anova(growth_model)
## Type III Analysis of Variance Table with Satterthwaite's method
##                    Sum Sq    Mean Sq NumDF DenDF   F value  Pr(>F)    
## date           4.2734e+10 4.2734e+10     1  2775 8202.4558 < 2e-16 ***
## treatment      9.9235e+07 3.3078e+07     3    10    6.3491 0.01105 *  
## date:treatment 6.7488e+08 2.2496e+08     3  2775   43.1790 < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Extract fixed effects (overall growth rate) and random effects (bag-specific deviations)
fixed_effects <- fixef(growth_model)
random_effects <- ranef(growth_model)$`treatment:field_cattle_tag`

# Plot growth trajectories
ggplot(data, aes(x = date, y = Predicted_Volume_Poly, color = factor(treatment), group=treatment)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  theme_minimal() +
  labs(title = "Oyster Growth Over Time",
       x = "Date",
       y = "Volume",
       color = "Treatment")

#plot model trajectories 
ggpred <- ggpredict(growth_model, terms = c("date", "treatment"))
ggplot(ggpred, aes(x = x, y = predicted, color = group, fill=group)) +
  geom_line(aes(group=group)) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high, group=group), alpha = 0.2) +
  theme_classic() +
  labs(title = "Modeled Oyster Growth Over Time",
       x = "Date",
       y = "Volume",
       color = "Treatment", 
       fill = "Treatment")

No difference between treated and control within each group.

Analyze each effort separately.

Weekly fresh water group:

fresh_data<-data[grepl("Fresh Water", data$treatment), ]

# Fit a mixed-effects model to estimate growth rate
growth_model_FW <- lmer(Predicted_Volume_Poly ~ date * treatment + (1|treatment:field_cattle_tag), data = fresh_data)

# Summarize the model
summary(growth_model_FW)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## Predicted_Volume_Poly ~ date * treatment + (1 | treatment:field_cattle_tag)
##    Data: fresh_data
## 
## REML criterion at convergence: 28899.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3460 -0.5536 -0.1049  0.4554  4.9178 
## 
## Random effects:
##  Groups                     Name        Variance Std.Dev.
##  treatment:field_cattle_tag (Intercept)  486653   697.6  
##  Residual                               4513515  2124.5  
## Number of obs: 1593, groups:  treatment:field_cattle_tag, 8
## 
## Fixed effects:
##                                                  Estimate Std. Error       df
## (Intercept)                                      1792.932    326.323    6.556
## date20240909                                     8141.322    134.784 1583.019
## treatmentWeekly Fresh Water Treated              -157.225    532.872    6.555
## date20240909:treatmentWeekly Fresh Water Treated 1468.205    219.798 1583.007
##                                                  t value Pr(>|t|)    
## (Intercept)                                        5.494  0.00114 ** 
## date20240909                                      60.403  < 2e-16 ***
## treatmentWeekly Fresh Water Treated               -0.295  0.77708    
## date20240909:treatmentWeekly Fresh Water Treated   6.680  3.3e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) d20240 trWFWT
## dat20240909 -0.208              
## trtmntWkFWT -0.612  0.127       
## d2024090FWT  0.128 -0.613 -0.208
anova(growth_model_FW)
## Type III Analysis of Variance Table with Satterthwaite's method
##                    Sum Sq    Mean Sq NumDF DenDF   F value    Pr(>F)    
## date           2.9438e+10 2.9438e+10     1  1583 6522.1421 < 2.2e-16 ***
## treatment      5.5298e+06 5.5298e+06     1     6    1.2252    0.3108    
## date:treatment 2.0139e+08 2.0139e+08     1  1583   44.6196 3.303e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Extract fixed effects (overall growth rate) and random effects (bag-specific deviations)
fixed_effects <- fixef(growth_model_FW)
random_effects <- ranef(growth_model_FW)$`treatment:field_cattle_tag`

# Plot growth trajectories
ggplot(fresh_data, aes(x = date, y = Predicted_Volume_Poly, color = factor(treatment))) +
  geom_point() +
  geom_smooth(aes(group=treatment), method = "lm", se = FALSE) +
  theme_minimal() +
  labs(title = "Oyster Growth Over Time",
       x = "Date",
       y = "Volume",
       color = "Treatment")

#plot model trajectories 
ggpred <- ggpredict(growth_model_FW, terms = c("date", "treatment"))
plot1<-ggplot(ggpred, aes(x = x, y = predicted, color = group, fill=group, group=group)) +
  geom_line() +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2) +
  theme_classic() +
  
  labs(title = "Modeled Oyster Growth Over Time (Fresh Water)",
       x = "Date",
       y = "Predicted Volume",
       color = "Treatment", 
       fill = "Treatment");plot1

No clear difference between control and treated in the fresh water experiment.

Weekly temperature group:

temp_data<-data[grepl("Temperature", data$treatment), ]

# Fit a mixed-effects model to estimate growth rate
growth_model_TEMP <- lmer(Predicted_Volume_Poly ~ date * treatment + (1|treatment:field_cattle_tag), data = temp_data)

# Summarize the model
summary(growth_model_TEMP)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## Predicted_Volume_Poly ~ date * treatment + (1 | treatment:field_cattle_tag)
##    Data: temp_data
## 
## REML criterion at convergence: 22120.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9479 -0.6362 -0.1351  0.5586  3.6442 
## 
## Random effects:
##  Groups                     Name        Variance Std.Dev.
##  treatment:field_cattle_tag (Intercept)  454370   674.1  
##  Residual                               6134770  2476.8  
## Number of obs: 1200, groups:  treatment:field_cattle_tag, 6
## 
## Fixed effects:
##                                                   Estimate Std. Error        df
## (Intercept)                                       4711.456    414.698     4.526
## date20240909                                      6678.016    202.572  1192.001
## treatmentWeekly Temperature Treated              -1483.034    586.591     4.530
## date20240909:treatmentWeekly Temperature Treated   944.687    286.026  1192.013
##                                                  t value Pr(>|t|)    
## (Intercept)                                       11.361 0.000169 ***
## date20240909                                      32.966  < 2e-16 ***
## treatmentWeekly Temperature Treated               -2.528 0.057639 .  
## date20240909:treatmentWeekly Temperature Treated   3.303 0.000986 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) d20240 trtWTT
## dat20240909 -0.244              
## trtmntWklTT -0.707  0.173       
## d20240909TT  0.173 -0.708 -0.245
anova(growth_model_TEMP)
## Type III Analysis of Variance Table with Satterthwaite's method
##                    Sum Sq    Mean Sq NumDF DenDF  F value    Pr(>F)    
## date           1.5336e+10 1.5336e+10     1  1192 2499.794 < 2.2e-16 ***
## treatment      1.9379e+07 1.9379e+07     1     4    3.159 0.1501326    
## date:treatment 6.6921e+07 6.6921e+07     1  1192   10.909 0.0009857 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Extract fixed effects (overall growth rate) and random effects (bag-specific deviations)
fixed_effects <- fixef(growth_model_TEMP)
random_effects <- ranef(growth_model_TEMP)$`treatment:field_cattle_tag`

# Plot growth trajectories
ggplot(temp_data, aes(x = date, y = Predicted_Volume_Poly, color = factor(treatment), group=treatment)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  theme_minimal() +
  labs(title = "Oyster Growth Over Time",
       x = "Date",
       y = "Volume",
       color = "Treatment")

#plot model trajectories 
ggpred <- ggpredict(growth_model_TEMP, terms = c("date", "treatment"))
plot2<-ggplot(ggpred, aes(x = x, y = predicted, color = group, fill=group, group=group)) +
  geom_line() +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2) +
  theme_classic() +
  
  labs(title = "Modeled Oyster Growth Over Time (Temperature)",
       x = "Date",
       y = "Predicted Volume",
       color = "Treatment", 
       fill = "Treatment");plot2

No clear difference between control and treated in the temperature experiment.

plots<-plot_grid(plot1, plot2, ncol=2, nrow=1);plots

ggsave(filename="figures/growth/goose_point/growth.png", plots, width=14, height=6)
Written on February 13, 2025