## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.align = "center" ) library(rsofun) library(dplyr) library(ggplot2) library(tidyr) library(sensitivity) library(BayesianTools) ## ----------------------------------------------------------------------------- # Define log-likelihood function ll_pmodel <- function( par_v # a vector of all calibratable parameters including errors ){ rsofun::cost_likelihood_pmodel( # reuse likelihood cost function par_v, obs = rsofun::p_model_validation, drivers = rsofun::p_model_drivers, targets = "gpp" ) } # Compute log-likelihood for a given set of parameters ll_pmodel( par_v = c( kphio = 0.09423773, # setup ORG in Stocker et al. 2020 GMD kphio_par_a = 0.0, # set to zero to disable temperature-dependence of kphio kphio_par_b = 1.0, soilm_thetastar = 0.6 * 240, # to recover old setup with soil moisture stress soilm_betao = 0.0, beta_unitcostratio = 146.0, rd_to_vcmax = 0.014, # value from Atkin et al. 2015 for C3 herbaceous tau_acclim = 30.0, kc_jmax = 0.41, error_gpp = 0.9 # value from previous simulations )) ## ----------------------------------------------------------------------------- # best parameter values (from previous literature) par_cal_best <- c( kphio = 0.09423773, kphio_par_a = -0.0025, kphio_par_b = 20, soilm_thetastar = 0.6*240, soilm_betao = 0.2, beta_unitcostratio = 146.0, rd_to_vcmax = 0.014, tau_acclim = 30.0, kc_jmax = 0.41, error_gpp = 1 ) # lower bound par_cal_min <- c( kphio = 0.03, kphio_par_a = -0.004, kphio_par_b = 10, soilm_thetastar = 0, soilm_betao = 0, beta_unitcostratio = 50.0, rd_to_vcmax = 0.01, tau_acclim = 7.0, kc_jmax = 0.2, error_gpp = 0.01 ) # upper bound par_cal_max <- c( kphio = 0.15, kphio_par_a = -0.001, kphio_par_b = 30, soilm_thetastar = 240, soilm_betao = 1, beta_unitcostratio = 200.0, rd_to_vcmax = 0.1, tau_acclim = 60.0, kc_jmax = 0.8, error_gpp = 4 ) ## ----eval = FALSE------------------------------------------------------------- # morris_setup <- BayesianTools::createBayesianSetup( # likelihood = ll_pmodel, # prior = BayesianTools::createUniformPrior(par_cal_min, par_cal_max, par_cal_best), # names = names(par_cal_best) # ) ## ----eval = FALSE, echo = TRUE------------------------------------------------ # set.seed(432) # morrisOut <- sensitivity::morris( # model = morris_setup$posterior$density, # factors = names(par_cal_best), # r = 1000, # design = list(type = "oat", levels = 20, grid.jump = 3), # binf = par_cal_min, # bsup = par_cal_max, # scale = TRUE) ## ----simulate_morrisOut, include = FALSE-------------------------------------- # TODO: get rid of this and always fully run the vignettes # fake output since Morris sensitivity isn't run # saveRDS(morrisOut, "files/sensitivity_analysis.Rmd__morrisOut.RDS") morrisOut <- readRDS("files/sensitivity_analysis.Rmd__morrisOut.RDS") ## ----eval = TRUE, fig.width=7, fig.height=4----------------------------------- # Summarise the morris output morrisOut.df <- data.frame( parameter = names(par_cal_best), mu.star = apply(abs(morrisOut$ee), 2, mean, na.rm = T), sigma = apply(morrisOut$ee, 2, sd, na.rm = T) ) %>% arrange( mu.star ) morrisOut.df |> tidyr::pivot_longer( -parameter, names_to = "variable", values_to = "value") |> ggplot(aes( reorder(parameter, value), value, fill = variable), color = NA) + geom_bar(position = position_dodge(), stat = 'identity') + scale_fill_brewer("", labels = c('mu.star' = expression(mu * "*"), 'sigma' = expression(sigma)), palette = "Greys") + theme_classic() + theme( axis.text = element_text(size = 6), axis.title = element_blank(), legend.position = c(0.05, 0.95), legend.justification = c(0.05, 0.95) ) ## ----eval = FALSE, echo = TRUE------------------------------------------------ # # Calibrates kphio, kphio_par_b, kc_jmax - top 3 model params # set.seed(2023) # # # Define calibration settings # settings_calib <- list( # method = "BayesianTools", # metric = rsofun::cost_likelihood_pmodel, # control = list( # sampler = "DEzs", # settings = list( # burnin = 3000, # iterations = 9000, # nrChains = 3, # number of independent chains # startValue = 3 # number of internal chains to be sampled # )), # par = list( # kphio = list(lower = 0.03, upper = 0.15, init = 0.05), # #kphio_par_a = list(lower = -0.004, upper = -0.001, init = -0.0025), # kphio_par_b = list(lower = 10, upper = 30, init =25), # kc_jmax = list(lower = 0.2, upper = 0.8, init = 0.41), # err_gpp = list(lower = 0.1, upper = 3, init = 0.8) # ) # ) # par_fixed <- list( # soilm_thetastar = 0.6*240, # soilm_betao = 0.2, # beta_unitcostratio = 146.0, # rd_to_vcmax = 0.014, # kphio_par_a = -0.0025, # tau_acclim = 30.0) # # # Calibrate kphio-related parameters and err_gpp # par_calib <- calib_sofun( # drivers = p_model_drivers, # obs = p_model_validation, # settings = settings_calib, # par_fixed = par_fixed, # targets = "gpp" # ) # # # This code takes 15 minutes to run ## ----simulate_par_calib, include = FALSE-------------------------------------- # TODO: get rid of this and always fully run the vignettes # fake output since calibration isn't run # saveRDS(par_calib, "files/sensitivity_analysis.Rmd__par_calib.RDS") par_calib <- readRDS("files/sensitivity_analysis.Rmd__par_calib.RDS") ## ----fig.height = 10, fig.width = 7------------------------------------------- plot(par_calib$mod) ## ----fig.height = 10, fig.width = 7, eval = FALSE, echo = FALSE--------------- # # Define function for plotting chains separately # plot_acf_mcmc <- function(chains, par_names){ # # chains: from the BayesianTools output # n_chains <- length(chains) # n_internal_chains <- length(chains[[1]]$chain) # par(mfrow = c(length(par_names), n_chains)) # for(par_name in par_names){ # for(i in 1:n_chains){ # stopifnot(n_internal_chains<=3); color = c("blue", "red", "darkgreen") # spacing = 0.5/n_internal_chains # for(j in 1:n_internal_chains){ # autocorr_internal_chain <- pacf(getSample(chains[[i]]$chain[[j]])[, par_name], plot = FALSE) # if(j==1){ # plot(autocorr_internal_chain, col = color[j], # main = sprintf("Series of %s , chain (%i)", par_name, i)) # } else { # lines(autocorr_internal_chain$lag + spacing*(j-1), # autocorr_internal_chain$acf, # col = color[j], type = "h") # } # } # } # } # } # plot_acf_mcmc(par_calib$mod, c("kphio", "kc_jmax", "kphio_par_b", "err_gpp")) ## ----fig.width=5, fig.height=5------------------------------------------------ correlationPlot(par_calib$mod, thin = 1) # use all samples, no thinning ## ----------------------------------------------------------------------------- gelmanDiagnostics(par_calib$mod) ## ----------------------------------------------------------------------------- summary(par_calib$mod) ## ----echo = TRUE, eval = FALSE------------------------------------------------ # # Evaluation of the uncertainty coming from the model parameters' uncertainty # # # Sample parameter values from the posterior distribution # samples_par <- getSample(par_calib$mod, numSamples = 600) |> # as.data.frame() |> # dplyr::mutate(mcmc_id = 1:n()) |> # tidyr::nest(.by = mcmc_id, .key = "pars") # # run_pmodel <- function(sample_par){ # # Function that runs the P-model for a sample of parameters # # and also adds the new observation error # out <- runread_pmodel_f( # drivers = p_model_drivers, # par = c(par_fixed, sample_par) # err_gpp is also here, but simply ignored # ) # # # return modelled GPP and prediction for a new GPP observation # gpp <- out$data[[1]][, "gpp"] # data.frame(gpp = gpp, # gpp_pred = gpp + rnorm(n = length(gpp), mean = 0, # sd = sample_par$err_gpp), # date = out$data[[1]][, "date"]) # } # # set.seed(2023) # # Run the P-model for each set of parameters # pmodel_runs <- samples_par |> # dplyr::mutate(sim = purrr::map(pars, ~run_pmodel(.x))) |> # # format to obtain 90% credible intervals # dplyr::select(mcmc_id, sim) |> # tidyr::unnest(sim) |> # dplyr::group_by(date) |> # # compute quantiles for each day # dplyr::summarise( # gpp_q05 = quantile(gpp, 0.05, na.rm = TRUE), # gpp_q50 = quantile(gpp, 0.5, na.rm = TRUE), # get median # gpp_q95 = quantile(gpp, 0.95, na.rm = TRUE), # gpp_pred_q05 = quantile(gpp_pred, 0.05, na.rm = TRUE), # gpp_pred_q95 = quantile(gpp_pred, 0.95, na.rm = TRUE) # ) ## ----simulate_pmodel_runs, include = FALSE------------------------------------ # TODO: get rid of this and always fully run the vignettes # fake output since calibration isn't run # saveRDS(pmodel_runs, "files/sensitivity_analysis.Rmd__pmodel_runs.RDS") pmodel_runs <- readRDS("files/sensitivity_analysis.Rmd__pmodel_runs.RDS") ## ----fig.width=7, fig.height=5------------------------------------------------ # Plot the credible intervals computed above # for the first year only data_to_plot <- pmodel_runs |> # Plot only first year dplyr::slice(1:365) |> dplyr::left_join( # Merge GPP validation data (first year) p_model_validation$data[[1]][1:365, ] |> dplyr::rename(gpp_obs = gpp), by = "date") # TODO: clean below # plot_gpp_error <- ggplot(data = data_to_plot) + # # geom_ribbon( # # aes(ymin = gpp_pred_q05, # # ymax = gpp_pred_q95, # # x = date), # # fill = 'grey40', alpha = 0.2) + # geom_ribbon( # aes(ymin = gpp_q05, # ymax = gpp_q95, # x = date), # fill = 'blue', alpha = 0.5) + # geom_ribbon( # aes(ymin = gpp_pred_q05, # ymax = gpp_pred_q95, # x = date), # fill = 'grey40', alpha = 0.2) + # geom_line( # aes(x = date, # y = gpp_q50), # colour = "grey40", # alpha = 0.8) + # # # Include observations in the plot # geom_point(shape = 3, # aes(x = date, # y = gpp_obs),# color = "Observations"), # alpha = 0.8) + # theme_classic() + # theme(panel.grid.major.y = element_line(), # legend.position = "bottom") + # labs( # x = 'Date', # y = expression(paste("GPP (g C m"^-2, "s"^-1, ")")) # ) # plot_gpp_error plot_gpp_error <- ggplot(data = data_to_plot) + geom_ribbon( aes(ymin = gpp_pred_q05, ymax = gpp_pred_q95, x = date, fill = "Model uncertainty")) + geom_ribbon( aes(ymin = gpp_q05, ymax = gpp_q95, x = date, fill = "Parameter uncertainty")) + geom_line( aes(x = date, y = gpp_q50, color = "Predictions")) + # Include observations in the plot geom_point(shape = 3, aes(x = date, y = gpp_obs, color = "Observations")) + theme_classic() + theme(panel.grid.major.y = element_line(), legend.position = "bottom") + labs( x = 'Date', y = expression(paste("GPP (g C m"^-2, "s"^-1, ")")) ) t_col <- function(color, percent = 50, name = NULL) { # color = color name # percent = % transparency # name = an optional name for the color ## Get RGB values for named color rgb.val <- col2rgb(color) ## Make new color using input color as base and alpha set by transparency t.col <- rgb(rgb.val[1], rgb.val[2], rgb.val[3], max = 255, alpha = (100 - percent) * 255 / 100, names = name) ## Save the color invisible(t.col) } plot_gpp_error <- plot_gpp_error + scale_color_manual(name = "", breaks = c("Observations", "Predictions", "Non-calibrated predictions"), values = c(t_col("black", 10), t_col("#E69F00", 10), t_col("#56B4E9", 10))) + scale_fill_manual(name = "", breaks = c("Model uncertainty", "Parameter uncertainty"), values = c(t_col("#E69F00", 60), t_col("#009E73", 40))) plot_gpp_error #dir.create("./analysis/paper_results_files2") ## ----fig.width=7, fig.height=5, echo = FALSE, eval = FALSE-------------------- # # # # Plot observed and predicted GPP, with a 95% confidence interval using err_gpp # data_to_plot_MAP <- runread_pmodel_f(drivers = p_model_drivers, # par = c(par_fixed, par_calib$par)) |> # dplyr::select(sitename, data) |> # tidyr::unnest(data) |> # dplyr::slice(1:365) # # plot_gpp_error <- ggplot(data = data_to_plot_MAP) + # Plot only first year # geom_ribbon( # aes(ymin = gpp - 2*par_calib$par[4], # ymax = gpp + 2*par_calib$par[4], # x = date), # fill = 'grey40', alpha = 0.2) + # geom_line( # aes( # date, # gpp # ), # colour = "grey40", # alpha = 0.8 # ) + # theme_classic() + # theme(panel.grid.major.y = element_line()) + # labs( # x = 'Date', # y = expression(paste("GPP (g C m"^-2, "s"^-1, ")")) # ) # # # Define GPP validation data (first year) # validation_data <- p_model_validation$data[[1]][1:365, ] # # # Include observations in the plot # plot_gpp_error + # geom_point( # data = validation_data, # aes( # date, # gpp # ), # alpha = 0.8 # )