Modelling maturity & sex with multiple stocks

This vignette walks through a script that will generate a gadget3 model, explaining concepts along the way.

Code blocks that make up the gadget3 script are marked in blue, like this:

### Modelling maturity & sex with multiple stocks

When combined they will form a full model, see the appendix for the entire script.

Stocks & substocks

As mentioned before in vignette('introduction-single-stock'), gadget3 stock objects do not have to correspond 1:1 with a species.

We can have multiple stock objects representing the same species in a different stage in their life-cycle, most commonly mature and immature versions, male and female versions, or all 4.

The set up is much the same as before, but major differences will be highlighted.

Initial setup & time-keeping is identical:

library(gadget3)
library(dplyr)

actions <- list()
area_names <- g3_areas(c('IXa', 'IXb'))

# Create time definitions ####################

actions_time <- list(
  g3a_time(
    1979L, 2023L,
    step_lengths = c(3L, 3L, 3L, 3L)),
  NULL)

actions <- c(actions, actions_time)

Stocks

We define 2 stocks instead of one, and a list() containing both for convenience:

# Create stock definition for fish ####################
st_imm <- g3_stock(c(species = "fish", 'imm'), seq(5L, 25L, 5)) |>
  g3s_livesonareas(area_names["IXa"]) |>
  g3s_age(1L, 5L)

st_mat <- g3_stock(c(species = "fish", 'mat'), seq(5L, 25L, 5)) |>
  g3s_livesonareas(area_names["IXa"]) |>
  g3s_age(3L, 10L)
stocks = list(imm = st_imm, mat = st_mat)

Notice that:

Stock actions

Stock actions now need to include interactions between immature & mature:

actions_st_imm <- list(
  g3a_growmature(st_imm,
    g3a_grow_impl_bbinom(
      maxlengthgroupgrowth = 4L ),
    # Add maturation
    maturity_f = g3a_mature_continuous(),
    output_stocks = list(st_mat),
    transition_f = ~TRUE ),
  g3a_naturalmortality(st_imm),
  g3a_initialconditions_normalcv(st_imm),
  g3a_renewal_normalparam(st_imm),
  g3a_age(st_imm, output_stocks = list(st_mat)),
  NULL)

actions_st_mat <- list(
  g3a_growmature(st_mat,
    g3a_grow_impl_bbinom(
      maxlengthgroupgrowth = 4L )),
  g3a_naturalmortality(st_mat),
  g3a_initialconditions_normalcv(st_mat),
  g3a_age(st_mat),
  NULL)

actions_likelihood_st <- list(
  g3l_understocking(stocks, nll_breakdown = TRUE),
  NULL)

actions <- c(actions, actions_st_imm, actions_st_mat, actions_likelihood_st)

actions_st_imm and actions_st_imm are largely similar to our actions_fish from the previous model, but:

Fleet actions

There is very little difference defining a fleet for a multiple stock model vs. single stocks.

To define a fleet, we need to introduce historical data into the model. In our case we will generate random data to use later:

# Fleet data for f_surv #################################

# Landings data: For each year/step/area
expand.grid(year = 1990:1994, step = 2, area = 'IXa') |>
    # Generate a random total landings by weight
    mutate(weight = rnorm(n(), mean = 1000, sd = 100)) |>
    # Assign result to landings_f_surv
    identity() -> landings_f_surv

# Length distribution data: Generate 100 random samples in each year/step/area
expand.grid(year = 1990:1994, step = 2, area = 'IXa', length = rep(NA, 100)) |>
  # Generate random lengths for these samples
  mutate(length = rnorm(n(), mean = 50, sd = 20)) |>
  # Save unagggregated data into ldist_f_surv.raw
  identity() -> ldist_f_surv.raw

# Aggregate .raw data
ldist_f_surv.raw |>
  # Group into length bins
  group_by(
      year = year,
      step = step,
      length = cut(length, breaks = c(seq(0, 80, 20), Inf), right = FALSE) ) |>
  # Report count in each length bin
  summarise(number = n(), .groups = 'keep') |>
  # Save into ldist_f_surv
  identity() -> ldist_f_surv

# Assume 5 * 5 samples in each year/step/area
expand.grid(year = 1990:1994, step = 2, area = 'IXa', age = rep(NA, 5), length = rep(NA, 5)) |>
  # Generate random lengths/ages for these samples
  mutate(length = rnorm(n(), mean = 50, sd = 20)) |>
  # Generate random whole numbers for age
  mutate(age = floor(runif(n(), min = 1, max = 5))) |>
  # Group into length/age bins
  group_by(
      year = year,
      step = step,
      age = age,
      length = cut(length, breaks = c(seq(0, 80, 20), Inf), right = FALSE) ) |>
  # Report count in each length bin
  summarise(number = n(), .groups = 'keep') ->
  aldist_f_surv

For more information on how this works, see vignette("incorporating-observation-data").

Our fleet is defined with the same set of actions as the single-species model:

# Create fleet definition for f_surv ####################
f_surv <- g3_fleet("f_surv") |> g3s_livesonareas(area_names["IXa"])

actions_f_surv <- list(
  g3a_predate_fleet(
    f_surv,
    stocks,
    suitabilities = g3_suitability_exponentiall50(by_stock = 'species'),
    catchability_f = g3a_predate_catchability_totalfleet(
      g3_timeareadata("landings_f_surv", landings_f_surv, "weight", areas = area_names))),
  NULL)
actions_likelihood_f_surv <- list(
  g3l_catchdistribution(
    "ldist_f_surv",
    obs_data = ldist_f_surv,
    fleets = list(f_surv),
    stocks = stocks,
    function_f = g3l_distribution_sumofsquares(),
    area_group = area_names,
    report = TRUE,
    nll_breakdown = TRUE),
  g3l_catchdistribution(
    "aldist_f_surv",
    obs_data = aldist_f_surv,
    fleets = list(f_surv),
    stocks = stocks,
    function_f = g3l_distribution_sumofsquares(),
    area_group = area_names,
    report = TRUE,
    nll_breakdown = TRUE),
  NULL)

actions <- c(actions, actions_f_surv, actions_likelihood_f_surv)

There are 2 differences to before:

The by_stock parameter is passed through to g3_parameterized(). We can see the result of setting this in the parameter template. If by_stock = TRUE (the default) then we get parameters for both fish_imm.f_surv.l50 & fish_mat.f_surv.l50:

simple_model <- g3_to_r(list(g3a_time(1990, 1994), g3a_predate_fleet(
    f_surv,
    stocks,
    suitabilities = g3_suitability_exponentiall50(by_stock = TRUE),
    catchability_f = g3a_predate_catchability_totalfleet(1) )))
names(attr(simple_model, "parameter_template"))
## [1] "retro_years"           "fish_imm.f_surv.alpha" "fish_imm.f_surv.l50"  
## [4] "fish_mat.f_surv.alpha" "fish_mat.f_surv.l50"   "project_years"

If by_stock = 'species', then there is a single, shared fish.f_surv.l50 parameter:

simple_model <- g3_to_r(list(g3a_time(1990, 1994), g3a_predate_fleet(
    f_surv,
    stocks,
    suitabilities = g3_suitability_exponentiall50(by_stock = 'species'),
    catchability_f = g3a_predate_catchability_totalfleet(1) )))
names(attr(simple_model, "parameter_template"))
## [1] "retro_years"       "fish.f_surv.alpha" "fish.f_surv.l50"  
## [4] "project_years"

The by_stock parameter is just a convenient shortcut to change the default settings, if we specify g3_parameterized() ourselves we can change the parameterization in other ways, for example by_year = TRUE gives us per-year l50:

simple_model <- g3_to_r(list(g3a_time(1990, 1994), g3a_predate_fleet(
    f_surv,
    stocks,
    suitabilities = g3_suitability_exponentiall50(
        l50 = g3_parameterized("l50", by_stock = 'species', by_predator = TRUE, by_year = TRUE)),
    catchability_f = g3a_predate_catchability_totalfleet(1) )))
names(attr(simple_model, "parameter_template"))
## [1] "retro_years"           "fish_imm.f_surv.alpha" "fish.f_surv.l50.1990" 
## [4] "fish.f_surv.l50.1991"  "fish.f_surv.l50.1992"  "fish.f_surv.l50.1993" 
## [7] "fish.f_surv.l50.1994"  "fish_mat.f_surv.alpha" "project_years"

See ?vignette('model-customisation') for more.

If we had data on the distribution of mature vs. immature, our observation data could contain a stock column with fish_imm or fish_mat. See vignette("incorporating-observation-data").

Again, further fleets can be added by repeating the code above.

Survey indices

Survey indices should be handed the full stocks list, instead of a single stock, but are otherwise the same as before:

# Create abundance index for si_cpue ########################

# Generate random data
expand.grid(year = 1990:1994, step = 3, area = 'IXa') |>
    # Fill in a weight column with total biomass for the year/step/area combination
    mutate(weight = runif(n(), min = 10000, max = 100000)) ->
    dist_si_cpue

actions_likelihood_si_cpue <- list(

  g3l_abundancedistribution(
    "dist_si_cpue",
    dist_si_cpue,

    stocks = stocks,
    function_f = g3l_distribution_surveyindices_log(alpha = NULL, beta = 1),
    area_group = area_names,
    report = TRUE,
    nll_breakdown = TRUE),
  NULL)

actions <- c(actions, actions_likelihood_si_cpue)

Creating model functions and Parameterization

At this point, we are ready to convert our model into code:

# Create model objective function ####################

model_code <- g3_to_tmb(c(actions, list(
    g3a_report_detail(actions),
    g3l_bounds_penalty(actions) )))

Now we should be configuring parameters based on the template.

Thanks to using wildcards in the g3_init_val() calls, a lot of the parameter settings will work regardless of the model being single- or multi-stock, so we don’t need to change the initial values from the previous model:

# Guess l50 / linf based on stock sizes
estimate_l50 <- g3_stock_def(st_imm, "midlen")[[length(g3_stock_def(st_imm, "midlen")) / 2]]
estimate_linf <- max(g3_stock_def(st_imm, "midlen"))
estimate_t0 <- g3_stock_def(st_imm, "minage") - 0.8

attr(model_code, "parameter_template") |>
  g3_init_val("*.rec|init.scalar", 10, lower = 0.001, upper = 200) |>
  g3_init_val("*.init.#", 10, lower = 0.001, upper = 200) |>
  g3_init_val("*.rec.#", 100, lower = 1e-6, upper = 1000) |>
  g3_init_val("*.rec.sd", 5, lower = 4, upper = 20) |>
  g3_init_val("*.M.#", 0.15, lower = 0.001, upper = 1) |>
  g3_init_val("init.F", 0.5, lower = 0.1, upper = 1) |>
  g3_init_val("*.Linf", estimate_linf, spread = 0.2) |>
  g3_init_val("*.K", 0.3, lower = 0.04, upper = 1.2) |>
  g3_init_val("*.t0", estimate_t0, spread = 2) |>
  g3_init_val("*.walpha", 0.01, optimise = FALSE) |>
  g3_init_val("*.wbeta", 3, optimise = FALSE) |>
  g3_init_val("*.*.alpha", 0.07, lower = 0.01, upper = 0.2) |>
  g3_init_val("*.*.l50", estimate_l50, spread = 0.25) |>
  g3_init_val("*.bbin", 100, lower = 1e-05, upper = 1000) |>
  identity() -> params.in

Finally we are ready for optimisation runs. g3_tmb_adfun() is a wrapper around TMB::MakeADFun() and TMB::compile, producing a TMB objective function. gadgetutils::g3_iterative() then optimises based on iterative reweighting

# Optimise model ################################
obj.fn <- g3_tmb_adfun(model_code, params.in)

params.out <- gadgetutils::g3_iterative(getwd(),
    wgts = "WGTS",
    model = model_code,
    params.in = params.in,
    grouping = list(
        fleet = c("ldist_f_surv", "aldist_f_surv"),
        abund = c("dist_si_cpue")),
    method = "BFGS",
    control = list(maxit = 1000, reltol = 1e-10),
    cv_floor = 0.05)

Once this has finished, we can view the output using gadgetplots::gadget_plots().

# Generate detailed report ######################
fit <- gadgetutils::g3_fit(model_code, params.out)
gadgetplots::gadget_plots(fit, "figs", file_type = "html")

Once finished, you can view the output in your web browser:

utils::browseURL("figs/model_output_figures.html")

Appendix: Full model script

For convenience, here is all the sections of the model script above joined together: