Source code for rrmpg.models.gr4j

# -*- coding: utf-8 -*-
# This file is part of RRMPG.
#
# RRMPG is free software with the aim to provide a playground for experiments
# with hydrological rainfall-runoff-models while achieving competitive
# performance results.
#
# You should have received a copy of the MIT License along with RRMPG. If not,
# see <https://opensource.org/licenses/MIT>
"""Interface to the GR4J hydrological model."""

import numbers

import numpy as np

from scipy import optimize

from .basemodel import BaseModel
from .gr4j_model import run_gr4j
from ..utils.array_checks import validate_array_input, check_for_negatives
from ..utils.metrics import calc_mse


[docs]class GR4J(BaseModel): """Interface to the GR4J hydrological model. This class builds an interface to the GR4J model, as presented in [1]. This model should only be used with daily data. If no model parameters are passed upon initialization, generates random parameter set. [1] Perrin, Charles, Claude Michel, and Vazken Andréassian. "Improvement of a parsimonious model for streamflow simulation." Journal of hydrology 279.1 (2003): 275-289. Args: params: (optional) Dictionary containing all model parameters as a separate key/value pairs. Raises: ValueError: If a dictionary of model parameters is passed but one of the parameters is missing. """ # List of model parameters _param_list = ['x1', 'x2', 'x3', 'x4'] # Dictonary with the default parameter bounds _default_bounds = {'x1': (100, 1200), 'x2': (-5, 3), 'x3': (20, 300), 'x4': (1.1, 2.9)} # Custom numpy datatype needed for the numba function _dtype = np.dtype([('x1', np.float64), ('x2', np.float64), ('x3', np.float64), ('x4', np.float64)]) def __init__(self, params=None): """Initialize a GR4J model object. Args: params: (optional) Dictionary containing all model parameters as a separate key/value pairs. Raises: ValueError: If a dictionary of model parameters is passed but one of of the parameters is missing. """ super().__init__(params=params)
[docs] def simulate(self, prec, etp, s_init=0., r_init=0., return_storage=False, params=None): """Simulate rainfall-runoff process for given input. This function bundles the model parameters and validates the meteorological inputs, then calls the optimized model routine. The meteorological inputs can be either list, numpy arrays or pandas Series. Args: prec: Array of daily precipitation sum [mm] etp: Array of mean potential evapotranspiration [mm] s_init: (optional) Initial value of the production storage as fraction of x1. r_init: (optional) Initial value of the routing storage as fraction of x3. return_stprage: (optional) Boolean, indicating if the model storages should also be returned. params: (optional) Numpy array of parameter sets, that will be evaluated a once in parallel. Must be of the models own custom data type. If nothing is passed, the parameters, stored in the model object, will be used. Returns: An array with the simulated streamflow and optional one array for each of the two storages. Raises: ValueError: If one of the inputs contains invalid values. TypeError: If one of the inputs has an incorrect datatype. RuntimeError: If there is a size mismatch between the precipitation and the pot. evapotranspiration input. """ # Validation check of the inputs prec = validate_array_input(prec, np.float64, 'precipitation') etp = validate_array_input(etp, np.float64, 'pot. evapotranspiration') # Check if there exist negative precipitation values in the input if check_for_negatives(prec): msg = "The precipitation array contains negative values." raise ValueError(msg) # Check for same size of inputs if len(prec) != len(etp): msg = ["The arrays of precipitation and pot. evapotranspiration," " must be of the same size."] raise RuntimeError("".join(msg)) # validate initial state inputs if not isinstance(s_init, numbers.Number): raise TypeError("'s1_init' must be a Number.") if not isinstance(r_init, numbers.Number): raise TypeError("'r_init' must be a Number.") # Make sure the intial storage values are floating point numbers s_init = float(s_init) r_init = float(r_init) # check if the intial values are in rage [0, 1] if (s_init < 0) or (s_init > 1): msg = ["The initial value of the production storage must be in ", "the range [0,1]."] raise ValueError("".join(msg)) if (r_init < 0) or (r_init > 1): msg = ["The initial value of the routing storage must be in the", " range [0,1]."] raise ValueError("".join(msg)) # If no parameters were passed, prepare array w. params from attributes if params is None: params = np.zeros(1, dtype=self._dtype) for param in self._param_list: params[param] = getattr(self, param) # Else, check the param input for correct datatype else: if params.dtype != self._dtype: msg = ["The model parameters must be a numpy array of the ", "models own custom data type."] raise TypeError("".join(msg)) # if only one parameter set is passed, expand dimensions to 1D if isinstance(params, np.void): params = np.expand_dims(params, params.ndim) # Create output arrays qsim = np.zeros((prec.shape[0], params.size), np.float64) if return_storage: s_store = np.zeros((prec.shape[0], params.size), np.float64) r_store = np.zeros((prec.shape[0], params.size), np.float64) # call simulation function for each parameter set for i in range(params.size): if return_storage: qsim[:,i], s_store[:,i], r_store[:,i] = run_gr4j(prec, etp, s_init, r_init, params[i]) else: qsim[:,i], _, _ = run_gr4j(prec, etp, s_init, r_init, params[i]) return qsim if return_storage: return qsim, s_store, r_store else: return qsim
[docs] def fit(self, qobs, prec, etp, s_init=0., r_init=0.): """Fit the GR4J model to a timeseries of discharge. This functions uses scipy's global optimizer (differential evolution) to find a good set of parameters for the model, so that the observed discharge is simulated as good as possible. Args: qobs: Array of observed streamflow discharge [mm/day] prec: Array of daily precipitation sum [mm] etp: Array of mean potential evapotranspiration [mm] s_init: (optional) Initial value of the production storage as fraction of x1. r_init: (optional) Initial value of the routing storage as fraction of x3. Returns: res: A scipy OptimizeResult class object. Raises: ValueError: If one of the inputs contains invalid values. TypeError: If one of the inputs has an incorrect datatype. RuntimeErrror: If there is a size mismatch between the precipitation and the pot. evapotranspiration input. """ # Validation check of the inputs prec = validate_array_input(prec, np.float64, 'precipitation') etp = validate_array_input(etp, np.float64, 'pot. evapotranspiration') qobs = validate_array_input(qobs, np.float64, 'observed discharge') # Check if there exist negative precipitation values in the input if check_for_negatives(prec): msg = "The precipitation array contains negative values." raise ValueError(msg) # Check for same size of inputs if len(prec) != len(etp): msg = ["The arrays of precipitation and pot. evapotranspiration," " must be of the same size."] raise RuntimeError("".join(msg)) # Make sure the intial storage values are floating point numbers s_init = float(s_init) r_init = float(r_init) # check if the intial values are in rage [0, 1] if (s_init < 0) or (s_init > 1): msg = ["The initial value of the production storage must be in ", "the range [0,1]."] raise ValueError("".join(msg)) if (r_init < 0) or (r_init > 1): msg = ["The initial value of the routing storage must be in the", " range [0,1]."] raise ValueError("".join(msg)) # pack input arguments for scipy optimizer args = (qobs, prec, etp, s_init, r_init, self._dtype) bnds = tuple([self._default_bounds[p] for p in self._param_list]) # call scipy's global optimizer res = optimize.differential_evolution(_loss, bounds=bnds, args=args) return res
def _loss(X, *args): """Return the loss value for the current parameter set.""" # Unpack static arrays qobs = args[0] prec = args[1] etp = args[2] s_init = args[3] r_init = args[4] dtype = args[5] # Create custom numpy array of model parameters params = np.zeros(1, dtype=dtype) params['x1'] = X[0] params['x2'] = X[1] params['x3'] = X[2] params['x4'] = X[3] # Calculate simulated streamflow qsim, _, _ = run_gr4j(prec, etp, s_init, r_init, params[0]) # Calculate the loss of the fit as the mean squared error loss_value = calc_mse(qobs, qsim) return loss_value