Source code for rrmpg.data.camelsloader

# -*- coding: utf-8 -*-
# This file is part of RRMPG.
#
# RRMPG is free software with the aim to provide a playground for experiments
# with hydrological rainfall-runoff-models while achieving competitive
# performance results.
#
# You should have received a copy of the MIT License along with RRMPG. If not,
# see <https://opensource.org/licenses/MIT>
import pandas as pd

from pathlib import Path

[docs]class CAMELSLoader(object):
    """Interface for loading basin data from the CAMELS dataset.
    
    This class provides an easy to use interface to load different basins from
    the CAMELS [1] dataset provided within this Python package. CAMELS stands 
    for Catchment Attributes for Large-Sample Studies and is a hydrological 
    dataset provided by NCAR for 671 catchments in the USA. The data entire 
    data can be downloaded for free at [2]. Within this package we provide the 
    data of just a few catchments as toy data for this package.
    
    [1] Addor, N., A.J. Newman, N. Mizukami, and M.P. Clark, 2017: The CAMELS 
    data set: catchment attributes and meteorology for large-sample studies. 
    version 2.0. Boulder, CO: UCAR/NCAR. doi:10.5065/D6G73C3Q
    
    [2] https://ncar.github.io/hydrology/datasets/CAMELS_attributes
    
    """
    VALID_BASINS = ['01031500']
    
    def __init__(self):
        pass
    
[docs]    def load_basin(self, basin_number):
        """Load basin data pandas Dataframe.
        
        Load the meteorological data, as well as observed discharge and modeled
        potential evapotranspiration of the specified basin from the CAMELS
        data set.
        
        Args:
            basin_number: String of the basin number that shall be loaded.
        
        Returns:
            A pandas DataFrame with the data of the basin.
            
        Raises:
            ValueError: If the basin number is an invalid number. Check the
                .get_basin_numbers() function for a list of all available 
                basins.
        """
        if basin_number not in self.VALID_BASINS:
            msg = [f"Invalid basin number {basin_number}. Must be one of ",
                   f"{self.VALID_BASINS}."]
            raise ValueError("".join(msg))
        
        # Path object to data folder
        data_dir = Path(__file__).parent / 'data' / 'camels'
        
        # Path object to the two needed text files
        met_file = data_dir / f"{basin_number}_lump_cida_forcing_leap.txt"
        streamflow_file = data_dir / f"{basin_number}_05_model_output.txt"
        
        # read metorological input file
        df = pd.read_csv(met_file, sep='\s+', header=3)
        
        # create datetime index
        dates = df.Year.map(str) +'/'+ df.Mnth.map(str) +'/'+ df.Day.map(str)
        df.index = pd.to_datetime(dates, format="%Y/%m/%d")
        
        
        # load model output data, which contains normalized qobs
        df2 = pd.read_csv(streamflow_file, sep='\s+', header=0)
        dates = df2.YR.map(str) +'/'+ df2.MNTH.map(str) +'/'+ df2.DY.map(str)
        df2.index = pd.to_datetime(dates, format="%Y/%m/%d")
        
        # copy qobs and pet
        df['PET'] = df2['PET']
        df['QObs(mm/d)'] = df2['OBS_RUN']
 
        # drop unnecessary columns
        df = df.drop(['Year', 'Mnth', 'Day', 'Hr'], axis=1)
                
        # only return values of complete hydrological years
        start_date = pd.to_datetime(f"{df.index[0].year}/10/01", 
                                    format="%Y/%m/%d")
        end_date = pd.to_datetime(f"{df.index[-1].year}/09/30", 
                                  format="%Y/%m/%d")
            
        return df[start_date:end_date]
        
[docs]    def get_basin_numbers(self):
        """Return a list of all available basin numbers."""            
        return self.VALID_BASINS
    
[docs]    def get_station_height(self, basin_number):
        """Return the elevation of the meteorological station of one basin.
        
        Args:
            basin_number: String of the basin number that shall be loaded.
        
        Returns:
            The elevation of the meteorological station.
            
        Raises:
            ValueError: If the basin number is an invalid number. Check the
                .get_basin_numbers() function for a list of all available 
                basins.
        """
        if basin_number not in self.VALID_BASINS:
            msg = [f"Invalid basin number {basin_number}. Must be one of ",
                   f"{self.VALID_BASINS}."]
            raise ValueError("".join(msg))
        
        # Path object to data folder
        data_dir = Path(__file__).parent / 'data' / 'camels'
        
        # Path object meteorological file
        met_file = data_dir / f"{basin_number}_lump_cida_forcing_leap.txt"
        
        with open(met_file, 'r') as fp:
            # elevation is specified in the second line
            for i, line in enumerate(fp):
                if i == 1:
                    height = float(line.strip())
                    
        return height