Source code for supy.supy_module

# ###########################################################################
# SuPy: SUEWS for Python
#
# Authors:
# Ting Sun, ting.sun@reading.ac.uk
#
# History:
# 20 Jan 2018: first alpha release
# 01 Feb 2018: performance improvement
# 03 Feb 2018: improvement in output processing
# 08 Mar 2018: pypi packaging
# 04 Oct 2018: overhual of structure
# 05 Oct 2018: added sample run data
###########################################################################


from __future__ import division, print_function

# import functools
from pathlib import Path
from typing import Tuple

import numpy as np
import pandas as pd
import pandas

from .supy_env import path_supy_module
from .supy_load import (load_InitialCond_grid_df, load_SUEWS_dict_ModConfig,
                        load_SUEWS_Forcing_ESTM_df_raw,
                        load_SUEWS_Forcing_met_df_raw, resample_forcing_met,
                        resample_linear)
from .supy_post import pack_df_output, pack_df_output_array, pack_df_state
from .supy_run import (pack_df_state_final, pack_grid_dict, suews_cal_tstep,
                       suews_cal_tstep_multi)

##############################################################################
# 1. compact wrapper for loading SUEWS settings
# @functools.lru_cache(maxsize=16)


[docs]def init_supy(path_runcontrol: str)->pd.DataFrame:
    '''Initialise supy by loading initial model states.

    Parameters
    ----------
    path_runcontrol : str
        Path to SUEWS RunControl.nml

    Returns
    -------
    df_state_init: pandas.DataFrame
        Initial model states.
        See `df_state_var` for details.

    Examples
    --------
    >>> path_runcontrol = "~/SUEWS_sims/RunControl.nml" # a valid path to `RunControl.nml`
    >>> df_state_init = supy.init_supy(path_runcontrol)

    '''


    try:
        path_runcontrol_x = Path(path_runcontrol).expanduser().resolve()
    except FileNotFoundError:
        print('{path} does not exists!'.format(path=path_runcontrol_x))
    else:
        # df_state_init: initial conditions for SUEWS simulations
        df_state_init = load_InitialCond_grid_df(path_runcontrol_x)
        return df_state_init


[docs]def load_forcing_grid(path_runcontrol: str, grid: int)->pd.DataFrame:
    '''Load forcing data for a specific grid included in the index of `df_state_init`.

    Parameters
    ----------
    path_runcontrol : str
        Path to SUEWS RunControl.nml
    grid : int
        Grid number

    Returns
    -------
    df_forcing: pandas.DataFrame
        Forcing data. See `df_forcing_var` for details.

    Examples
    --------
    >>> path_runcontrol = "~/SUEWS_sims/RunControl.nml"  # a valid path to `RunControl.nml`
    >>> df_state_init = supy.init_supy(path_runcontrol) # get `df_state_init`
    >>> grid = df_state_init.index[0] # first grid number included in `df_state_init`
    >>> df_forcing = supy.load_forcing_grid(path_runcontrol, grid) # get df_forcing


    '''

    try:
        path_runcontrol = Path(path_runcontrol).expanduser().resolve()
    except FileNotFoundError:
        print('{path} does not exists!'.format(path=path_runcontrol))
    else:
        dict_mod_cfg = load_SUEWS_dict_ModConfig(path_runcontrol)
        df_state_init = init_supy(path_runcontrol)

        # load setting variables from ser_mod_cfg
        (
            filecode,
            kdownzen,
            tstep_met_in,
            tstep_ESTM_in,
            multiplemetfiles,
            multipleestmfiles,
            dir_input_cfg
        ) = (dict_mod_cfg[x] for x in
             [
            'filecode',
            'kdownzen',
            'resolutionfilesin',
            'resolutionfilesinestm',
            'multiplemetfiles',
            'multipleestmfiles',
            'fileinputpath'
        ]
        )
        tstep_mod, lat, lon, alt, timezone = df_state_init.loc[
            grid,
            [(x, '0') for x in ['tstep', 'lat', 'lng', 'alt', 'timezone']]
        ].values

        path_site = path_runcontrol.parent
        path_input = path_site / dict_mod_cfg['fileinputpath']

        # load raw data
        # met forcing
        df_forcing_met = load_SUEWS_Forcing_met_df_raw(
            path_input, filecode, grid, tstep_met_in, multiplemetfiles)

        # resample raw data from tstep_in to tstep_mod
        df_forcing_met_tstep = resample_forcing_met(
            df_forcing_met, tstep_met_in, tstep_mod,
            lat, lon, alt, timezone, kdownzen)

        # merge forcing datasets (met and ESTM)
        df_forcing_tstep = df_forcing_met_tstep.copy()

        # disable the AnOHM and ESTM components for now and for better performance
        # |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
        # TS 28 Dec 2018
        # pack all records of `id` into `metforcingdata_grid` for AnOHM
        # df_grp = df_forcing_tstep.groupby('id')
        # dict_id_all = {xid: df_grp.get_group(xid)
        #                for xid in df_forcing_tstep['id'].unique()}
        # id_all = df_forcing_tstep['id'].apply(lambda xid: dict_id_all[xid])
        # df_forcing_tstep = df_forcing_tstep.merge(
        #     id_all.to_frame(name='metforcingdata_grid'),
        #     left_index=True,
        #     right_index=True)
        # # add Ts forcing for ESTM
        # if np.asscalar(df_state_init.iloc[0]['storageheatmethod'].values) == 4:
        #     # load ESTM forcing
        #     df_forcing_estm = load_SUEWS_Forcing_ESTM_df_raw(
        #         path_input, filecode, grid, tstep_ESTM_in, multipleestmfiles)
        #     # resample raw data from tstep_in to tstep_mod
        #     df_forcing_estm_tstep = resample_linear(
        #         df_forcing_estm, tstep_met_in, tstep_mod)
        #     df_forcing_tstep = df_forcing_tstep.merge(
        #         df_forcing_estm_tstep,
        #         left_on=['iy', 'id', 'it', 'imin'],
        #         right_on=['iy', 'id', 'it', 'imin'])
        #     # insert `ts5mindata_ir` into df_forcing_tstep
        #     ts_col = df_forcing_estm.columns[4:]
        #     df_forcing_tstep['ts5mindata_ir'] = (
        #         df_forcing_tstep.loc[:, ts_col].values.tolist())
        #     df_forcing_tstep['ts5mindata_ir'] = df_forcing_tstep[
        #         'ts5mindata_ir'].map(lambda x: np.array(x, order='F'))
        # else:
        #     # insert some placeholder values
        #     df_forcing_tstep['ts5mindata_ir'] = df_forcing_tstep['Tair']
        # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        # disable the AnOHM and ESTM components for now and for better performance

        # coerced precision here to prevent numerical errors inside Fortran
        df_forcing = np.around(df_forcing_tstep, decimals=10)
        # new columns for later use in main calculation
        df_forcing[['iy', 'id', 'it', 'imin']] = df_forcing[[
            'iy', 'id', 'it', 'imin']].astype(np.int64)

    return df_forcing


# load sample data for quickly starting a demo run
[docs]def load_SampleData()->Tuple[pandas.DataFrame, pandas.DataFrame]:
    '''Load sample data for quickly starting a demo run.

    Returns
    -------
    df_state_init, df_forcing: Tuple[pandas.DataFrame, pandas.DataFrame]
        - df_state_init: `initial model states <df_state_var>`
        - df_forcing: `forcing data <df_forcing_var>`

    Examples
    --------

    >>> df_state_init, df_forcing = supy.load_SampleData()

    '''

    path_SampleData = Path(path_supy_module) / 'sample_run'
    path_runcontrol = path_SampleData / 'RunControl.nml'
    df_state_init = init_supy(path_runcontrol)
    # path_input = path_runcontrol.parent / ser_mod_cfg['fileinputpath']
    df_forcing = load_forcing_grid(
        path_runcontrol,
        df_state_init.index[0]
    )
    return df_state_init, df_forcing

# input processing code end here
##############################################################################


##############################################################################
# 2. compact wrapper for running a whole simulation
# # main calculation
# input as DataFrame

[docs]def run_supy(
        df_forcing: pandas.DataFrame,
        df_state_init: pandas.DataFrame,
        save_state=False,
)->Tuple[pandas.DataFrame, pandas.DataFrame]:
    '''Perform supy simulaiton.

    Parameters
    ----------
    df_forcing : pandas.DataFrame
        forcing data.
    df_state_init : pandas.DataFrame
        initial model states.
    save_state : bool, optional
        flag for saving model states at each timestep, which can be useful in diagnosing model runtime performance or performing a restart run.
        (the default is False, which intructs supy not to save runtime model states).

    Returns
    -------
    df_output, df_state_final : Tuple[pandas.DataFrame, pandas.DataFrame]
        - df_output: `output results <df_output_var>`
        - df_state_final: `final model states <df_state_var>`

    Examples
    --------

    >>> df_output, df_state_final = supy.run_supy(df_forcing, df_state_init)


    '''

    # save df_init without changing its original data
    # df.copy() in pandas does work as a standard python deepcopy
    df_init = df_state_init.copy()
    # add placeholder variables for df_forcing
    # `metforcingdata_grid` and `ts5mindata_ir` are used by AnOHM and ESTM, respectively
    # they are now temporarily disabled in supy
    df_forcing = df_forcing\
        .assign(
            metforcingdata_grid=0,
            ts5mindata_ir=0,
        )\
        .rename(
            # remanae is a workaround to resolve naming inconsistency between
            # suews fortran code interface and input forcing file hearders
            columns={
                '%' + 'iy': 'iy',
                'id': 'id',
                'it': 'it',
                'imin': 'imin',
                'qn': 'qn1_obs',
                'qh': 'qh_obs',
                'qe': 'qe',
                'qs': 'qs_obs',
                'qf': 'qf_obs',
                'U': 'avu1',
                'RH': 'avrh',
                'Tair': 'temp_c',
                'pres': 'press_hpa',
                'rain': 'precip',
                'kdown': 'avkdn',
                'snow': 'snow_obs',
                'ldown': 'ldown_obs',
                'fcld': 'fcld_obs',
                'Wuh': 'wu_m3',
                'xsmd': 'xsmd',
                'lai': 'lai_obs',
                'kdiff': 'kdiff',
                'kdir': 'kdir',
                'wdir': 'wdir',
            }
        )
    # grid list determined by initial states
    grid_list = df_init.index

    # initialise dicts for holding results and model states
    dict_state = {}
    dict_output = {}

    if save_state:
        # use slower more functional single step wrapper
        # start tstep retrived from forcing data
        t_start = df_forcing.index[0]
        # convert df to dict with `itertuples` for better performance
        dict_forcing = {row.Index: row._asdict()
                        for row in df_forcing.itertuples()}

        # dict_state is used to save model states for later use
        dict_state = {
            # (t_start, grid): series_state_init.to_dict()
            (t_start, grid): pack_grid_dict(series_state_init)
            for grid, series_state_init
            in df_init.iterrows()
        }
        for tstep in df_forcing.index:
            # temporal loop
            # initialise output of tstep:
            # load met_forcing if the same across all grids:
            met_forcing_tstep = dict_forcing[tstep]
            # spatial loop
            for grid in grid_list:
                dict_state_start = dict_state[(tstep, grid)]
                # calculation at one step:
                # series_state_end, series_output_tstep = suews_cal_tstep_df(
                #     series_state_start, met_forcing_tstep)
                dict_state_end, dict_output_tstep = suews_cal_tstep(
                    dict_state_start, met_forcing_tstep)

                # update output & model state at tstep for the current grid
                dict_output.update({(tstep, grid): dict_output_tstep})
                dict_state.update({(tstep + 1, grid): dict_state_end})

        # pack results as easier DataFrames
        df_output = pack_df_output(dict_output).swaplevel(0, 1)
        # drop unnecessary 'datetime' as it is already included in the index
        df_output = df_output.drop(columns=['datetime'], level=0)
        df_state_final = pack_df_state(dict_state).swaplevel(0, 1)

    else:
        # use higher level wrapper that calculate at a `block` level
        # for better performance
        # last timestep for this run
        tstep_init = df_forcing.index[0]
        tstep_final = df_forcing.index[-1]
        dict_state = {
            # grid: df_init.loc[grid]
            (tstep_init, grid): pack_grid_dict(series_state_init)
            for grid, series_state_init
            in df_init.iterrows()
        }

        for grid in grid_list:
            dict_state_start_grid = dict_state[(tstep_init, grid)]
            dict_state_end, dict_output_array = suews_cal_tstep_multi(
                dict_state_start_grid, df_forcing)
            # update output & model state at tstep for the current grid
            dict_output.update({grid: dict_output_array})
            # model state for the next run
            dict_state.update({(tstep_final + 1, grid): dict_state_end})

        # save results as time-aware DataFrame
        df_output0 = pack_df_output_array(dict_output, df_forcing)
        df_output = df_output0.replace(-999., np.nan)
        df_state_final = pack_df_state(dict_state).swaplevel(0, 1)
        # df_state = pd.DataFrame(dict_state).T
        # df_state.index.set_names('grid')

    # drop ESTM for now as it is not supported yet
    # select only those supported output groups
    df_output = df_output.loc[:, ['SUEWS', 'snow', 'DailyState']]
    # trim multiindex based columns
    df_output.columns = df_output.columns.remove_unused_levels()

    # pack final model states into a proper dataframe
    df_state_final = pack_df_state_final(df_state_final, df_init)

    return df_output, df_state_final