Skip to content

Instantly share code, notes, and snippets.

@toebR
Created January 3, 2025 11:25
Show Gist options
  • Select an option

  • Save toebR/3bf6082cede945ce2479a7fd1a27cc8d to your computer and use it in GitHub Desktop.

Select an option

Save toebR/3bf6082cede945ce2479a7fd1a27cc8d to your computer and use it in GitHub Desktop.
Rudaz Script Run

Script Run Rudaz 03.01.2024

Kohl_Bodenbedeckung_Burren Block 3 und 4.R

library(here)
here()
library(readxl)
#setwd("~/Masterarbeit/Statistik")

library(tidyverse); theme_set(theme_bw())
library(knitr)
library(lubridate)
library(readxl)
library(brms)
library(qqplotr)
library(kableExtra)

df <- read_xlsx("Mulchkohl_Winterweizen_Bodenbedeckung_bereinigt_2024_burren.xlsx", 
                sheet = "Rohdaten")
df
# A tibble: 168 × 9
    Datum Block Verfahren jpg.ID       vegetationType Kohl_BB_Total Unkraut_BB_Total LM_BB_Total Freie_Bodenflaeche_Mulch_Total
    <dbl> <dbl> <chr>     <chr>        <chr>                  <dbl>            <dbl>       <dbl>                          <dbl>
 1 230623     1 Kontrolle IMG_4939.JPG Kohl                  0.0874          0.0046            0                          0.908
 2 230623     1 Tmin      IMG_4935.JPG Kohl                  0.042           0                 0                          0.958
 3 230623     1 Tmin-LM   IMG_4934.JPG Kohl                  0.028           0                 0                          0.972
 4 230623     1 LM        IMG_4933.JPG Kohl                  0.0359          0.00111           0                          0.963
 5 230623     2 Kontrolle IMG_4924.JPG Kohl                  0.0568          0.00116           0                          0.942
 6 230623     2 Tmax      IMG_4926.JPG Kohl                  0.001           0                 0                          0.999
 7 230623     2 Tmax-LM   IMG_4931.JPG Kohl                  0.008           0                 0                          0.992
 8 230623     2 Tmin      IMG_4932.JPG Kohl                  0.036           0                 0                          0.964
 9 230623     2 Tmin-LM   IMG_4927.JPG Kohl                  0.021           0                 0                          0.979
10 230623     2 LM        IMG_4929.JPG Kohl                  0.0238          0.00024           0                          0.976
# ℹ 158 more rows
# ℹ Use `print(n = ...)` to see more rows
df$Kohl_BB_Total[df$Kohl_BB_Total == 0] <- 0.1
df$Kohl_BB_Total[df$Kohl_BB_Total == 100] <- 99.9
df$Unkraut_BB_Total[df$Unkraut_BB_Total == 0] <- 0.1
df$Unkraut_BB_Total[df$Unkraut_BB_Total == 100] <- 99.9
df$LM_BB_Total[df$LM_BB_Total == 0] <- 0.1
df$LM_BB_Total[df$LM_BB_Total == 100] <- 99.9
df$Freie_Bodenflaeche_Mulch_Total[df$Freie_Bodenflaeche_Mulch_Total == 0] <- 0.1
df$Freie_Bodenflaeche_Mulch_Total[df$Freie_Bodenflaeche_Mulch_Total == 100] <- 99.9


rs <- df %>%
  dplyr::select(Kohl_BB_Total, Unkraut_BB_Total, LM_BB_Total, Freie_Bodenflaeche_Mulch_Total) %>%
  rowSums
df$Kohl_BB_Total <- df$Kohl_BB_Total / rs
df$Unkraut_BB_Total <- df$Unkraut_BB_Total / rs
df$LM_BB_Total <- df$LM_BB_Total / rs
df$Freie_Bodenflaeche_Mulch_Total <- df$Freie_Bodenflaeche_Mulch_Total / rs

## Response variable
df$y <- with(df, 
             cbind(Unkraut = Unkraut_BB_Total, 
                   LM = LM_BB_Total, 
                   KohlKohl_BB = Kohl_BB_Total,
                   Freie_Bodenflaeche = Freie_Bodenflaeche_Mulch_Total))


## Date as character
df$Datum <- as.character(df$Datum)

## Block as factors
df$Block <- factor(df$Block)

## Verfahren as factor
df$Verfahren <- factor(df$Verfahren)

## Verfahren as factor
df$Datum.f <- factor(df$Datum)

summary(df)
table(df$Datum.f,df$Verfahren)

# Filtern nach Block
library(dplyr)
df <- filter(df, Block %in% c("3", "4")) %>% droplevels()
summary(df)
    Datum           Block      Verfahren     jpg.ID          vegetationType     Kohl_BB_Total       Unkraut_BB_Total   
 Length:168         1:42   Kontrolle:28   Length:168         Length:168         Min.   :0.0005455   Min.   :0.0002182  
 Class :character   2:42   LM       :28   Class :character   Class :character   1st Qu.:0.0675917   1st Qu.:0.0122200  
 Mode  :character   3:42   Tmax     :28   Mode  :character   Mode  :character   Median :0.2899000   Median :0.0658545  
                    4:42   Tmax-LM  :28                                         Mean   :0.2897483   Mean   :0.1014953  
                           Tmin     :28                                         3rd Qu.:0.4794750   3rd Qu.:0.1105664  
                           Tmin-LM  :28                                         Max.   :0.7575700   Max.   :0.6328636  
                                                                                                                       
  LM_BB_Total      Freie_Bodenflaeche_Mulch_Total
 Min.   :0.00034   Min.   :0.07545               
 1st Qu.:0.08333   1st Qu.:0.30100               
 Median :0.09091   Median :0.47814               
 Mean   :0.08283   Mean   :0.52592               
 3rd Qu.:0.09091   3rd Qu.:0.79771               
 Max.   :0.59052   Max.   :0.96600               
                                                 
      y.Unkraut              y.LM             y.KohlKohl_BB     y.Freie_Bodenflaeche   Datum.f  
 Min.   :0.0002182    Min.   :0.0003400    Min.   :0.0005455    Min.   :0.0754545    230623:24  
 1st Qu.:0.0122200    1st Qu.:0.0833333    1st Qu.:0.0675917    1st Qu.:0.3010000    230704:24  
 Median :0.0658545    Median :0.0909091    Median :0.2899000    Median :0.4781364    230717:24  
 Mean   :0.1014953    Mean   :0.0828345    Mean   :0.2897483    Mean   :0.5259220    230804:24  
 3rd Qu.:0.1105664    3rd Qu.:0.0909091    3rd Qu.:0.4794750    3rd Qu.:0.7977083    230825:24  
 Max.   :0.6328636    Max.   :0.5905200    Max.   :0.7575700    Max.   :0.9660000    230929:24  
                                                                                     240419:24 
table(df$Datum.f,df$Verfahren)
         Kontrolle LM Tmax Tmax-LM Tmin Tmin-LM
  230623         4  4    4       4    4       4
  230704         4  4    4       4    4       4
  230717         4  4    4       4    4       4
  230804         4  4    4       4    4       4
  230825         4  4    4       4    4       4
  230929         4  4    4       4    4       4
  240419         4  4    4       4    4       4
# Filtern nach Block
library(dplyr)
df <- filter(df, Block %in% c("3", "4")) %>% droplevels()
summary(df)
## Long data set for plotting
df_long <- df %>%
  gather(Kohl_BB_Total, Unkraut_BB_Total, LM_BB_Total, Freie_Bodenflaeche_Mulch_Total, key = "Typ", value = "value") %>%
  mutate(Typ = factor(Typ, 
                      levels = c("Unkraut_BB_Total", "LM_BB_Total", "Kohl_BB_Total", "Freie_Bodenflaeche_Mulch_Total"),
                      labels = c("Unkraut", "LM", "KohlKohl_BB", "Freie_Bodenflaeche")))

## Plot of all data
df_long %>%
  ggplot(aes(x = Datum.f, y = value)) +
  geom_boxplot() +
  facet_grid(Verfahren ~ Typ) +
  scale_x_discrete(labels = gsub(pattern = "", replacement = "", x = unique(df$Datum.f))) +
  scale_y_continuous(breaks = seq(0, 1, by = .25), labels = seq(0, 1, by = .25) * 100) +
  labs("Anteil") +
  theme(legend.position = "top")

Rplot

## Modell
fit_cat <- brm(y ~ Verfahren * Datum.f + (1 | Block),
               family = dirichlet(),
               data = df,
               control = list(adapt_delta = 0.99),
               iter = 6000,
               cores = 2L,
               save_pars = save_pars(all = TRUE))
Click the Refresh button to see progress of the chains
starting worker pid=4376 on localhost:11427 at 10:48:27.409
starting worker pid=6848 on localhost:11427 at 10:48:27.661

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.000253 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 2.53 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 6000 [  0%]  (Warmup)

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 0.000429 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 4.29 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:    1 / 6000 [  0%]  (Warmup)
Chain 1: Iteration:  600 / 6000 [ 10%]  (Warmup)
Chain 2: Iteration:  600 / 6000 [ 10%]  (Warmup)
Chain 1: Iteration: 1200 / 6000 [ 20%]  (Warmup)
Chain 2: Iteration: 1200 / 6000 [ 20%]  (Warmup)
Chain 1: Iteration: 1800 / 6000 [ 30%]  (Warmup)
Chain 2: Iteration: 1800 / 6000 [ 30%]  (Warmup)
Chain 1: Iteration: 2400 / 6000 [ 40%]  (Warmup)
Chain 2: Iteration: 2400 / 6000 [ 40%]  (Warmup)
Chain 2: Iteration: 3000 / 6000 [ 50%]  (Warmup)
Chain 2: Iteration: 3001 / 6000 [ 50%]  (Sampling)
Chain 1: Iteration: 3000 / 6000 [ 50%]  (Warmup)
Chain 1: Iteration: 3001 / 6000 [ 50%]  (Sampling)
Chain 2: Iteration: 3600 / 6000 [ 60%]  (Sampling)
Chain 2: Iteration: 4200 / 6000 [ 70%]  (Sampling)
Chain 2: Iteration: 4800 / 6000 [ 80%]  (Sampling)
Chain 1: Iteration: 3600 / 6000 [ 60%]  (Sampling)
Chain 2: Iteration: 5400 / 6000 [ 90%]  (Sampling)
Chain 2: Iteration: 6000 / 6000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 662.08 seconds (Warm-up)
Chain 2:                691.304 seconds (Sampling)
Chain 2:                1353.38 seconds (Total)
Chain 2: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 0.000313 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 3.13 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:    1 / 6000 [  0%]  (Warmup)
Chain 1: Iteration: 4200 / 6000 [ 70%]  (Sampling)
Chain 3: Iteration:  600 / 6000 [ 10%]  (Warmup)
Chain 3: Iteration: 1200 / 6000 [ 20%]  (Warmup)
Chain 3: Iteration: 1800 / 6000 [ 30%]  (Warmup)
Chain 1: Iteration: 4800 / 6000 [ 80%]  (Sampling)
Chain 3: Iteration: 2400 / 6000 [ 40%]  (Warmup)
Chain 3: Iteration: 3000 / 6000 [ 50%]  (Warmup)
Chain 3: Iteration: 3001 / 6000 [ 50%]  (Sampling)
Chain 1: Iteration: 5400 / 6000 [ 90%]  (Sampling)
Chain 3: Iteration: 3600 / 6000 [ 60%]  (Sampling)
Chain 3: Iteration: 4200 / 6000 [ 70%]  (Sampling)
Chain 1: Iteration: 6000 / 6000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 754.18 seconds (Warm-up)
Chain 1:                1698.5 seconds (Sampling)
Chain 1:                2452.68 seconds (Total)
Chain 1: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 0.000479 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 4.79 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:    1 / 6000 [  0%]  (Warmup)
Chain 3: Iteration: 4800 / 6000 [ 80%]  (Sampling)
Chain 4: Iteration:  600 / 6000 [ 10%]  (Warmup)
Chain 3: Iteration: 5400 / 6000 [ 90%]  (Sampling)
Chain 4: Iteration: 1200 / 6000 [ 20%]  (Warmup)
Chain 3: Iteration: 6000 / 6000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 657.677 seconds (Warm-up)
Chain 3:                737.851 seconds (Sampling)
Chain 3:                1395.53 seconds (Total)
Chain 3: 
Chain 4: Iteration: 1800 / 6000 [ 30%]  (Warmup)
Chain 4: Iteration: 2400 / 6000 [ 40%]  (Warmup)
Chain 4: Iteration: 3000 / 6000 [ 50%]  (Warmup)
Chain 4: Iteration: 3001 / 6000 [ 50%]  (Sampling)
Chain 4: Iteration: 3600 / 6000 [ 60%]  (Sampling)
Chain 4: Iteration: 4200 / 6000 [ 70%]  (Sampling)
Chain 4: Iteration: 4800 / 6000 [ 80%]  (Sampling)
Chain 4: Iteration: 5400 / 6000 [ 90%]  (Sampling)
Chain 4: Iteration: 6000 / 6000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 593.464 seconds (Warm-up)
Chain 4:                577.679 seconds (Sampling)
Chain 4:                1171.14 seconds (Total)
Chain 4: 
Compiling Stan program...
Start sampling
Warnmeldungen:
1: There were 3 divergent transitions after warmup. See
https://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
to find out why this is a problem and how to eliminate them. 
2: There were 7458 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded 
3: Examine the pairs() plot to diagnose sampling problems
save(fit_cat, file="fit_cat_narm.RData")
fit_cat <- add_criterion(fit_cat, "loo", moment_match = TRUE)
Error : alpha must be positive.
Warnmeldungen:
1: Some Pareto k diagnostic values are too high. See help('pareto-k-diagnostic') for details.
 
2: Found 39 observations with a pareto_k > 0.7 in model 'fit_cat'. With this many problematic observations, it may be more appropriate to use 'kfold' with argument 'K = 10' to perform 10-fold cross-validation rather than LOO. 
## Fitted values
res_fit <- fitted(fit_cat)

## Point estimate for dispersion parameter
phi <- posterior_summary(fit_cat, pars = "phi")[1]

## Data set of residuals and fitted values
df_resid <- tibble(
  fit = c(res_fit[,1,1],
          res_fit[,1,2],
          res_fit[,1,3],
          res_fit[,1,4]),
  observed = c(df$Unkraut_BB_Total,
               df$LM_BB_Total,
               df$Kohl_BB_Total,
               df$Freie_Bodenflaeche_Mulch_Total),
  Typ = c(rep("Unkraut", nrow(df)),
          rep("LM", nrow(df)),
          rep("KohlKohl_BB", nrow(df)),
          rep("Freie_Bodenflaeche", nrow(df)))
) %>%
  mutate(resid = (observed - fit) / sqrt(phi)) # calculate pearson residuals
  
  ## QQ-plots
df_resid %>%
  ggplot(aes(sample = resid)) +
  #stat_qq_band() +
  stat_qq_line() +
  stat_qq_point() +
  facet_wrap(~ Typ, scales = "free") +
  labs(x = "Theoretical Quantiles", y = "Sample Quantiles")

Rplot01

## New data set
nd <- expand.grid(
  Verfahren = unique(df$Verfahren),
  Datum.f = unique(df$Datum.f)
)

## Fitted values for new data
res_anteil <- fitted(fit_cat, 
                     newdata = nd, 
                     re_formula = NA)

## Some processing
res_anteil <- lapply(1:4, function(i) bind_cols(nd, as.data.frame(res_anteil[,,i])) %>%
                       mutate(Typ = dimnames(res_anteil)[[3]][i])) %>%
  bind_rows

res_anteil <- res_anteil %>%
  mutate(Typ = gsub(pattern = "P(Y = ", replacement = "", x = Typ, fixed = TRUE),
         Typ = gsub(pattern = ")", replacement = "", x = Typ, fixed = TRUE),
         Typ = gsub(pattern = "", replacement = "", x = Typ),
         Typ = factor(Typ, levels = c("Unkraut", "LM", "KohlKohl_BB", "Freie_Bodenflaeche")),
         Verfahren = factor(Verfahren, levels = levels(df_long$Verfahren)))

## Plot der gefitteten Daten gegen die beobachteten Daten
res_anteil %>%
  ggplot(aes(x = factor(Datum.f), y = Estimate, ymin = Q2.5, ymax = Q97.5)) +
  geom_boxplot(data = df_long, aes(y = value, ymin = NULL, ymax = NULL, group = Datum.f)) +
  geom_pointrange(colour = "blue") +
  scale_y_continuous(breaks = seq(0, 1, by = .25), labels = seq(0, 1, by = .25) * 100) +
  scale_x_discrete(labels = gsub(pattern = "", replacement = "", x = unique(df$Datum.f))) +
  facet_wrap(Verfahren ~ Typ, scales = "free_y", ncol = 4) +
  labs(x = "Zeitpunkt")

Rplot02

## Und noch einmal nur die gefitteten Daten
res_anteil %>%
  ggplot(aes(x = factor(Datum.f), y = Estimate, ymin = Q2.5, ymax = Q97.5)) +
  geom_pointrange() +
  scale_y_continuous(breaks = seq(0, 1, by = .25), 
                     labels = seq(0, 1, by = .25) * 100,
                     limits = 0:1) +
  scale_x_discrete(labels = gsub(pattern = "", replacement = "", x = unique(df$Datum.f))) +
  facet_grid(Verfahren ~ Typ) +
  labs(x = "Zeitpunkt")

Rplot03

## Und nochmal entlang der Verfahren:
res_anteil %>%
  ggplot(aes(x = factor(Datum.f), y = Estimate, ymin = Q2.5, ymax = Q97.5, colour = Verfahren)) +
  geom_pointrange(position = position_dodge(width = .5)) +
  scale_y_continuous(breaks = seq(0, 1, by = .25), 
                     labels = seq(0, 1, by = .25) * 100,
                     limits = 0:1) +
  scale_x_discrete(labels = gsub(pattern = "", replacement = "", x = unique(df$Datum.f))) +
  facet_wrap(~ Typ) +
  labs(x = "Zeitpunkt") +
  theme(legend.position = "top")

Rplot04

@toebR
Copy link
Author

toebR commented Jan 3, 2025

Outcome Script Run 2: Dummy vars set from 0 to 0.1% | ex ->((df$Kohl_BB_Total[df$Kohl_BB_Total == 0] <- 0.001))

plots

overall

qqplot

fitted vs observed

only fitted
fitted verfahren

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment