Script Run Rudaz 03.01.2024
Kohl_Bodenbedeckung_Burren Block 3 und 4.R
library(here )
here()
library(readxl )
# setwd("~/Masterarbeit/Statistik")
library(tidyverse ); theme_set(theme_bw())
library(knitr )
library(lubridate )
library(readxl )
library(brms )
library(qqplotr )
library(kableExtra )
df <- read_xlsx(" Mulchkohl_Winterweizen_Bodenbedeckung_bereinigt_2024_burren.xlsx" ,
sheet = " Rohdaten" )
df
# A tibble: 168 × 9
Datum Block Verfahren jpg.ID vegetationType Kohl_BB_Total Unkraut_BB_Total LM_BB_Total Freie_Bodenflaeche_Mulch_Total
<dbl> <dbl> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 230623 1 Kontrolle IMG_4939.JPG Kohl 0.0874 0.0046 0 0.908
2 230623 1 Tmin IMG_4935.JPG Kohl 0.042 0 0 0.958
3 230623 1 Tmin-LM IMG_4934.JPG Kohl 0.028 0 0 0.972
4 230623 1 LM IMG_4933.JPG Kohl 0.0359 0.00111 0 0.963
5 230623 2 Kontrolle IMG_4924.JPG Kohl 0.0568 0.00116 0 0.942
6 230623 2 Tmax IMG_4926.JPG Kohl 0.001 0 0 0.999
7 230623 2 Tmax-LM IMG_4931.JPG Kohl 0.008 0 0 0.992
8 230623 2 Tmin IMG_4932.JPG Kohl 0.036 0 0 0.964
9 230623 2 Tmin-LM IMG_4927.JPG Kohl 0.021 0 0 0.979
10 230623 2 LM IMG_4929.JPG Kohl 0.0238 0.00024 0 0.976
# ℹ 158 more rows
# ℹ Use `print(n = ...)` to see more rows
df $ Kohl_BB_Total [df $ Kohl_BB_Total == 0 ] <- 0.1
df $ Kohl_BB_Total [df $ Kohl_BB_Total == 100 ] <- 99.9
df $ Unkraut_BB_Total [df $ Unkraut_BB_Total == 0 ] <- 0.1
df $ Unkraut_BB_Total [df $ Unkraut_BB_Total == 100 ] <- 99.9
df $ LM_BB_Total [df $ LM_BB_Total == 0 ] <- 0.1
df $ LM_BB_Total [df $ LM_BB_Total == 100 ] <- 99.9
df $ Freie_Bodenflaeche_Mulch_Total [df $ Freie_Bodenflaeche_Mulch_Total == 0 ] <- 0.1
df $ Freie_Bodenflaeche_Mulch_Total [df $ Freie_Bodenflaeche_Mulch_Total == 100 ] <- 99.9
rs <- df %> %
dplyr :: select(Kohl_BB_Total , Unkraut_BB_Total , LM_BB_Total , Freie_Bodenflaeche_Mulch_Total ) %> %
rowSums
df $ Kohl_BB_Total <- df $ Kohl_BB_Total / rs
df $ Unkraut_BB_Total <- df $ Unkraut_BB_Total / rs
df $ LM_BB_Total <- df $ LM_BB_Total / rs
df $ Freie_Bodenflaeche_Mulch_Total <- df $ Freie_Bodenflaeche_Mulch_Total / rs
# # Response variable
df $ y <- with(df ,
cbind(Unkraut = Unkraut_BB_Total ,
LM = LM_BB_Total ,
KohlKohl_BB = Kohl_BB_Total ,
Freie_Bodenflaeche = Freie_Bodenflaeche_Mulch_Total ))
# # Date as character
df $ Datum <- as.character(df $ Datum )
# # Block as factors
df $ Block <- factor (df $ Block )
# # Verfahren as factor
df $ Verfahren <- factor (df $ Verfahren )
# # Verfahren as factor
df $ Datum.f <- factor (df $ Datum )
summary(df )
table(df $ Datum.f ,df $ Verfahren )
# Filtern nach Block
library(dplyr )
df <- filter(df , Block %in% c(" 3" , " 4" )) %> % droplevels()
summary(df )
Datum Block Verfahren jpg.ID vegetationType Kohl_BB_Total Unkraut_BB_Total
Length:168 1:42 Kontrolle:28 Length:168 Length:168 Min. :0.0005455 Min. :0.0002182
Class :character 2:42 LM :28 Class :character Class :character 1st Qu.:0.0675917 1st Qu.:0.0122200
Mode :character 3:42 Tmax :28 Mode :character Mode :character Median :0.2899000 Median :0.0658545
4:42 Tmax-LM :28 Mean :0.2897483 Mean :0.1014953
Tmin :28 3rd Qu.:0.4794750 3rd Qu.:0.1105664
Tmin-LM :28 Max. :0.7575700 Max. :0.6328636
LM_BB_Total Freie_Bodenflaeche_Mulch_Total
Min. :0.00034 Min. :0.07545
1st Qu.:0.08333 1st Qu.:0.30100
Median :0.09091 Median :0.47814
Mean :0.08283 Mean :0.52592
3rd Qu.:0.09091 3rd Qu.:0.79771
Max. :0.59052 Max. :0.96600
y.Unkraut y.LM y.KohlKohl_BB y.Freie_Bodenflaeche Datum.f
Min. :0.0002182 Min. :0.0003400 Min. :0.0005455 Min. :0.0754545 230623:24
1st Qu.:0.0122200 1st Qu.:0.0833333 1st Qu.:0.0675917 1st Qu.:0.3010000 230704:24
Median :0.0658545 Median :0.0909091 Median :0.2899000 Median :0.4781364 230717:24
Mean :0.1014953 Mean :0.0828345 Mean :0.2897483 Mean :0.5259220 230804:24
3rd Qu.:0.1105664 3rd Qu.:0.0909091 3rd Qu.:0.4794750 3rd Qu.:0.7977083 230825:24
Max. :0.6328636 Max. :0.5905200 Max. :0.7575700 Max. :0.9660000 230929:24
240419:24
table(df $ Datum.f ,df $ Verfahren )
Kontrolle LM Tmax Tmax-LM Tmin Tmin-LM
230623 4 4 4 4 4 4
230704 4 4 4 4 4 4
230717 4 4 4 4 4 4
230804 4 4 4 4 4 4
230825 4 4 4 4 4 4
230929 4 4 4 4 4 4
240419 4 4 4 4 4 4
# Filtern nach Block
library(dplyr )
df <- filter(df , Block %in% c(" 3" , " 4" )) %> % droplevels()
summary(df )
# # Long data set for plotting
df_long <- df %> %
gather(Kohl_BB_Total , Unkraut_BB_Total , LM_BB_Total , Freie_Bodenflaeche_Mulch_Total , key = " Typ" , value = " value" ) %> %
mutate(Typ = factor (Typ ,
levels = c(" Unkraut_BB_Total" , " LM_BB_Total" , " Kohl_BB_Total" , " Freie_Bodenflaeche_Mulch_Total" ),
labels = c(" Unkraut" , " LM" , " KohlKohl_BB" , " Freie_Bodenflaeche" )))
# # Plot of all data
df_long %> %
ggplot(aes(x = Datum.f , y = value )) +
geom_boxplot() +
facet_grid(Verfahren ~ Typ ) +
scale_x_discrete(labels = gsub(pattern = " " , replacement = " " , x = unique(df $ Datum.f ))) +
scale_y_continuous(breaks = seq(0 , 1 , by = .25 ), labels = seq(0 , 1 , by = .25 ) * 100 ) +
labs(" Anteil" ) +
theme(legend.position = " top" )
# # Modell
fit_cat <- brm(y ~ Verfahren * Datum.f + (1 | Block ),
family = dirichlet(),
data = df ,
control = list (adapt_delta = 0.99 ),
iter = 6000 ,
cores = 2L ,
save_pars = save_pars(all = TRUE ))
Click the Refresh button to see progress of the chains
starting worker pid=4376 on localhost:11427 at 10:48:27.409
starting worker pid=6848 on localhost:11427 at 10:48:27.661
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 0.000253 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 2.53 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 6000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 0.000429 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 4.29 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 6000 [ 0%] (Warmup)
Chain 1: Iteration: 600 / 6000 [ 10%] (Warmup)
Chain 2: Iteration: 600 / 6000 [ 10%] (Warmup)
Chain 1: Iteration: 1200 / 6000 [ 20%] (Warmup)
Chain 2: Iteration: 1200 / 6000 [ 20%] (Warmup)
Chain 1: Iteration: 1800 / 6000 [ 30%] (Warmup)
Chain 2: Iteration: 1800 / 6000 [ 30%] (Warmup)
Chain 1: Iteration: 2400 / 6000 [ 40%] (Warmup)
Chain 2: Iteration: 2400 / 6000 [ 40%] (Warmup)
Chain 2: Iteration: 3000 / 6000 [ 50%] (Warmup)
Chain 2: Iteration: 3001 / 6000 [ 50%] (Sampling)
Chain 1: Iteration: 3000 / 6000 [ 50%] (Warmup)
Chain 1: Iteration: 3001 / 6000 [ 50%] (Sampling)
Chain 2: Iteration: 3600 / 6000 [ 60%] (Sampling)
Chain 2: Iteration: 4200 / 6000 [ 70%] (Sampling)
Chain 2: Iteration: 4800 / 6000 [ 80%] (Sampling)
Chain 1: Iteration: 3600 / 6000 [ 60%] (Sampling)
Chain 2: Iteration: 5400 / 6000 [ 90%] (Sampling)
Chain 2: Iteration: 6000 / 6000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 662.08 seconds (Warm-up)
Chain 2: 691.304 seconds (Sampling)
Chain 2: 1353.38 seconds (Total)
Chain 2:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 0.000313 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 3.13 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 6000 [ 0%] (Warmup)
Chain 1: Iteration: 4200 / 6000 [ 70%] (Sampling)
Chain 3: Iteration: 600 / 6000 [ 10%] (Warmup)
Chain 3: Iteration: 1200 / 6000 [ 20%] (Warmup)
Chain 3: Iteration: 1800 / 6000 [ 30%] (Warmup)
Chain 1: Iteration: 4800 / 6000 [ 80%] (Sampling)
Chain 3: Iteration: 2400 / 6000 [ 40%] (Warmup)
Chain 3: Iteration: 3000 / 6000 [ 50%] (Warmup)
Chain 3: Iteration: 3001 / 6000 [ 50%] (Sampling)
Chain 1: Iteration: 5400 / 6000 [ 90%] (Sampling)
Chain 3: Iteration: 3600 / 6000 [ 60%] (Sampling)
Chain 3: Iteration: 4200 / 6000 [ 70%] (Sampling)
Chain 1: Iteration: 6000 / 6000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 754.18 seconds (Warm-up)
Chain 1: 1698.5 seconds (Sampling)
Chain 1: 2452.68 seconds (Total)
Chain 1:
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 0.000479 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 4.79 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 6000 [ 0%] (Warmup)
Chain 3: Iteration: 4800 / 6000 [ 80%] (Sampling)
Chain 4: Iteration: 600 / 6000 [ 10%] (Warmup)
Chain 3: Iteration: 5400 / 6000 [ 90%] (Sampling)
Chain 4: Iteration: 1200 / 6000 [ 20%] (Warmup)
Chain 3: Iteration: 6000 / 6000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 657.677 seconds (Warm-up)
Chain 3: 737.851 seconds (Sampling)
Chain 3: 1395.53 seconds (Total)
Chain 3:
Chain 4: Iteration: 1800 / 6000 [ 30%] (Warmup)
Chain 4: Iteration: 2400 / 6000 [ 40%] (Warmup)
Chain 4: Iteration: 3000 / 6000 [ 50%] (Warmup)
Chain 4: Iteration: 3001 / 6000 [ 50%] (Sampling)
Chain 4: Iteration: 3600 / 6000 [ 60%] (Sampling)
Chain 4: Iteration: 4200 / 6000 [ 70%] (Sampling)
Chain 4: Iteration: 4800 / 6000 [ 80%] (Sampling)
Chain 4: Iteration: 5400 / 6000 [ 90%] (Sampling)
Chain 4: Iteration: 6000 / 6000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 593.464 seconds (Warm-up)
Chain 4: 577.679 seconds (Sampling)
Chain 4: 1171.14 seconds (Total)
Chain 4:
Compiling Stan program...
Start sampling
Warnmeldungen:
1: There were 3 divergent transitions after warmup. See
https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
to find out why this is a problem and how to eliminate them.
2: There were 7458 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
3: Examine the pairs() plot to diagnose sampling problems
save(fit_cat , file = " fit_cat_narm.RData" )
fit_cat <- add_criterion(fit_cat , " loo" , moment_match = TRUE )
Error : alpha must be positive.
Warnmeldungen:
1: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
2: Found 39 observations with a pareto_k > 0.7 in model 'fit_cat'. With this many problematic observations, it may be more appropriate to use 'kfold' with argument 'K = 10' to perform 10-fold cross-validation rather than LOO.
# # Fitted values
res_fit <- fitted(fit_cat )
# # Point estimate for dispersion parameter
phi <- posterior_summary(fit_cat , pars = " phi" )[1 ]
# # Data set of residuals and fitted values
df_resid <- tibble(
fit = c(res_fit [,1 ,1 ],
res_fit [,1 ,2 ],
res_fit [,1 ,3 ],
res_fit [,1 ,4 ]),
observed = c(df $ Unkraut_BB_Total ,
df $ LM_BB_Total ,
df $ Kohl_BB_Total ,
df $ Freie_Bodenflaeche_Mulch_Total ),
Typ = c(rep(" Unkraut" , nrow(df )),
rep(" LM" , nrow(df )),
rep(" KohlKohl_BB" , nrow(df )),
rep(" Freie_Bodenflaeche" , nrow(df )))
) %> %
mutate(resid = (observed - fit ) / sqrt(phi )) # calculate pearson residuals
# # QQ-plots
df_resid %> %
ggplot(aes(sample = resid )) +
# stat_qq_band() +
stat_qq_line() +
stat_qq_point() +
facet_wrap(~ Typ , scales = " free" ) +
labs(x = " Theoretical Quantiles" , y = " Sample Quantiles" )
# # New data set
nd <- expand.grid(
Verfahren = unique(df $ Verfahren ),
Datum.f = unique(df $ Datum.f )
)
# # Fitted values for new data
res_anteil <- fitted(fit_cat ,
newdata = nd ,
re_formula = NA )
# # Some processing
res_anteil <- lapply(1 : 4 , function (i ) bind_cols(nd , as.data.frame(res_anteil [,,i ])) %> %
mutate(Typ = dimnames(res_anteil )[[3 ]][i ])) %> %
bind_rows
res_anteil <- res_anteil %> %
mutate(Typ = gsub(pattern = " P(Y = " , replacement = " " , x = Typ , fixed = TRUE ),
Typ = gsub(pattern = " )" , replacement = " " , x = Typ , fixed = TRUE ),
Typ = gsub(pattern = " " , replacement = " " , x = Typ ),
Typ = factor (Typ , levels = c(" Unkraut" , " LM" , " KohlKohl_BB" , " Freie_Bodenflaeche" )),
Verfahren = factor (Verfahren , levels = levels(df_long $ Verfahren )))
# # Plot der gefitteten Daten gegen die beobachteten Daten
res_anteil %> %
ggplot(aes(x = factor (Datum.f ), y = Estimate , ymin = Q2.5 , ymax = Q97.5 )) +
geom_boxplot(data = df_long , aes(y = value , ymin = NULL , ymax = NULL , group = Datum.f )) +
geom_pointrange(colour = " blue" ) +
scale_y_continuous(breaks = seq(0 , 1 , by = .25 ), labels = seq(0 , 1 , by = .25 ) * 100 ) +
scale_x_discrete(labels = gsub(pattern = " " , replacement = " " , x = unique(df $ Datum.f ))) +
facet_wrap(Verfahren ~ Typ , scales = " free_y" , ncol = 4 ) +
labs(x = " Zeitpunkt" )
# # Und noch einmal nur die gefitteten Daten
res_anteil %> %
ggplot(aes(x = factor (Datum.f ), y = Estimate , ymin = Q2.5 , ymax = Q97.5 )) +
geom_pointrange() +
scale_y_continuous(breaks = seq(0 , 1 , by = .25 ),
labels = seq(0 , 1 , by = .25 ) * 100 ,
limits = 0 : 1 ) +
scale_x_discrete(labels = gsub(pattern = " " , replacement = " " , x = unique(df $ Datum.f ))) +
facet_grid(Verfahren ~ Typ ) +
labs(x = " Zeitpunkt" )
## Und nochmal entlang der Verfahren:
res_anteil %>%
ggplot(aes(x = factor(Datum.f), y = Estimate, ymin = Q2.5, ymax = Q97.5, colour = Verfahren)) +
geom_pointrange(position = position_dodge(width = .5)) +
scale_y_continuous(breaks = seq(0, 1, by = .25),
labels = seq(0, 1, by = .25) * 100,
limits = 0:1) +
scale_x_discrete(labels = gsub(pattern = "", replacement = "", x = unique(df$Datum.f))) +
facet_wrap(~ Typ) +
labs(x = "Zeitpunkt") +
theme(legend.position = "top")
Outcome Script Run 2: Dummy vars set from 0 to 0.1% | ex ->((df$Kohl_BB_Total[df$Kohl_BB_Total == 0] <- 0.001))
plots