Source code for komanawa.basgra_nz_py.basgra_python

"""
This is a place to create a python wrapper for the BASGRA fortran model in fortarn_BASGRA_NZ

 Author: Matt Hanson
 Created: 12/08/2020 9:32 AM
 """
import numpy as np
import pandas as pd
from copy import deepcopy
from komanawa.basgra_nz_py.input_output_keys import param_keys, out_cols, days_harvest_keys, matrix_weather_keys_pet, \
    matrix_weather_keys_penman
from warnings import warn
from komanawa.basgra_nz_py.get_fortran_module import get_fortran_basgra

# this is the maximum number of weather days,
# it is hard coded into fortran_BASGRA_NZ/environment.f95 line 9
_max_weather_size = 36600



[docs]
def run_basgra_nz(params, matrix_weather, days_harvest, doy_irr, verbose=False,
                  supply_pet=True, auto_harvest=False, run_365_calendar=False, binname='gfortran-12', recompile=False,
                  compile_verbose=False):
    """
    python wrapper for the fortran BASGRA code changes to the fortran code may require changes to this function runs the model for the period of the weather data

    :param params: dictionary, see input_output_keys.py, https://github.com/Komanawa-Solutions-Ltd/BASGRA_NZ_PY for more details
    :param matrix_weather: pandas dataframe of weather data, maximum entries set in _max_weather_size in line 24 of this file (currently 36600) see documentation for input columns at https://github.com/Komanawa-Solutions-Ltd/BASGRA_NZ_PY , note expected DOY will change depending on expect_no_leap_days
    :param days_harvest: days harvest dataframe must be same length as matrix_weather entries see documentation for input columns at https://github.com/Komanawa-Solutions-Ltd/BASGRA_NZ_PY , note expected DOY will change depending on expect_no_leap_days
    :param doy_irr: a list of the days of year to irrigate on, must be integers acceptable values: (0-366)
    :param verbose: boolean, if True the fortran function prints a number of statements for debugging purposes(depreciated)
    :param supply_pet: boolean, if True BASGRA expects pet to be supplied, if False the parameters required to calculate pet from the peyman equation are expected, the version must match the DLL if dll_path != 'default'
    :param auto_harvest: boolean, if True then assumes data is formated correctly for auto harvesting, if False, then assumes data is formatted for manual harvesting (e.g. previous version) and re-formats internally
    :param run_365_calendar: boolean, if True then run on a 365 day calender This expects that all leap days will be removed from matrix_weather and days_harvest. DOY is expected to be between 1 and 365.  This means that datetime objects defined by year and doy will be incorrect. instead use get_month_day_to_nonleap_doy to map DOY to datetime via month and day. This is how the index of the returned datetime will be passed.  For example for date 2024-03-01 (2024 is a leap year) the dayofyear via a datetime object will be 61, but if expect_no_leap_days=True basgra expects day of year to be 60. the index of the results will be a datetime object of equivalent to 2024-03-01, so  the output doy will not match the index doy and there will be no value on 2020-02-29. default False
    :param binname: str, the name of the gfortran compiler to use, default 'gfortran-12'
    :param recompile: bool, if True force recompile the fortran code
    :param compile_verbose: bool, if True print the fortran compilation output
    :return: pd.DataFrame(index=datetime index, columns = out_cols)
    """

    assert isinstance(supply_pet, bool), 'supply_pet param must be boolean'
    assert isinstance(auto_harvest, bool), 'auto_harvest param must be boolean'
    assert isinstance(run_365_calendar, bool), 'expect_no_leap_days must be boolean'

    # define DLL library path
    fortran_basgra = get_fortran_basgra(supply_pet, binname=binname, recomplile=recompile, verbose=compile_verbose)

    # define expected weather keys
    if supply_pet:
        _matrix_weather_keys = matrix_weather_keys_pet
    else:
        _matrix_weather_keys = matrix_weather_keys_penman

    doy_irr = np.atleast_1d(doy_irr)
    # test the input variables
    _test_basgra_inputs(params, matrix_weather, days_harvest, verbose, _matrix_weather_keys,
                        auto_harvest, doy_irr, run_365_calendar=run_365_calendar)

    nout = len(out_cols)
    ndays = len(matrix_weather)
    nirr = len(doy_irr)

    # define output indexes before data manipulation
    out_index = matrix_weather.index

    # copy everything and ensure order is correct
    params = deepcopy(params)
    matrix_weather = deepcopy(matrix_weather.loc[:, _matrix_weather_keys])
    days_harvest = deepcopy(days_harvest.loc[:, days_harvest_keys])

    # translate manual harvest inputs into fortran format
    if not auto_harvest:
        days_harvest = _trans_manual_harv(days_harvest, matrix_weather)

    # get variables into right python types
    params = np.array([params[e] for e in param_keys]).astype(float)
    matrix_weather = matrix_weather.values.astype(float)
    days_harvest = days_harvest.values.astype(float)
    doy_irr = doy_irr.astype(np.int32)

    # manage weather size,
    weather_size = len(matrix_weather)
    if weather_size < _max_weather_size:
        temp = np.zeros((_max_weather_size - weather_size, matrix_weather.shape[1]), float)
        matrix_weather = np.concatenate((matrix_weather, temp), 0)
    elif weather_size > _max_weather_size:
        raise ValueError(f'weather data is too long, maximum is {_max_weather_size} days, '
                         f'though this value can be modified in the fortran code: '
                         f'fortran_BASGRA_NZ/environment.f95 line 9 and {__file__} line 29')

    y = np.asfortranarray(np.zeros((ndays, nout), float))  # cannot set these to nan's or it breaks fortran

    y = fortran_basgra.basgra(params,
                              np.asfortranarray(matrix_weather),
                              days_harvest,
                              ndays,
                              nout,
                              nirr,
                              doy_irr,
                              verbose,
                              y=deepcopy(y))
    out = pd.DataFrame(y, out_index, out_cols)
    if run_365_calendar:
        mapper = get_month_day_to_nonleap_doy(key_doy=True)
        strs = [f'{y}-{mapper[doy][0]:02d}-{mapper[doy][1]:02d}' for y, doy in zip(out.year.values.astype(int),
                                                                                   out.doy.values.astype(int))]
        out.loc[:, 'date'] = pd.to_datetime(strs)
    else:
        strs = ['{}-{:03d}'.format(int(e), int(f)) for e, f in out[['year', 'doy']].itertuples(False, None)]
        out.loc[:, 'date'] = pd.to_datetime(strs, format='%Y-%j')

    out.set_index('date', inplace=True)

    return out



def _trans_manual_harv(days_harvest, matrix_weather):
    """
    translates manual harvest data to the format expected by fortran, check the details of the data in here.

    :param days_harvest: manual harvest data
    :param matrix_weather: weather data, mostly to get the right size
    :return: days_harvest (correct format for fortran code)
    """
    days_harvest = days_harvest.set_index(['year', 'doy'])
    days_harvest_out = pd.DataFrame({'year': matrix_weather.loc[:, 'year'],
                                     'doy': matrix_weather.loc[:, 'doy'],
                                     'frac_harv': np.zeros(len(matrix_weather)),  # set filler values
                                     'harv_trig': np.zeros(len(matrix_weather)) - 1,  # set flag to not harvest
                                     'harv_targ': np.zeros(len(matrix_weather)),  # set filler values
                                     'weed_dm_frac': np.zeros(len(matrix_weather)) * np.nan,  # set nas, filled later
                                     'reseed_trig': np.zeros(len(matrix_weather)) - 1,  # set flag to not reseed
                                     'reseed_basal': np.zeros(len(matrix_weather)),  # set filler values
                                     })
    days_harvest_out = days_harvest_out.set_index(['year', 'doy'])
    for k in set(days_harvest_keys) - {'year', 'doy'}:
        days_harvest_out.loc[days_harvest.index, k] = days_harvest.loc[:, k]

    days_harvest_out = days_harvest_out.reset_index()

    # fill the weed fraction so that DMH_WEED is always calculated

    if pd.isna(days_harvest_out.weed_dm_frac).iloc[0]:
        warn('weed_dm_frac is na for the first day of simulation, setting to first valid weed_dm_frac\n'
             'this does not affect the harvesting only the calculation of the DMH_weed variable.')

        idx = np.where(pd.notna(days_harvest_out.weed_dm_frac))[0][0]  # get first non-nan value
        id_val = pd.Series(days_harvest_out.index).iloc[0]
        days_harvest_out.loc[id_val, 'weed_dm_frac'] = days_harvest_out.loc[:, 'weed_dm_frac'].iloc[idx]

    days_harvest_out.loc[:, 'weed_dm_frac'] = days_harvest_out.loc[:, 'weed_dm_frac'].ffill()

    return days_harvest_out


def _test_basgra_inputs(params, matrix_weather, days_harvest, verbose, _matrix_weather_keys,
                        auto_harvest, doy_irr, run_365_calendar):
    # check parameters
    assert isinstance(verbose, bool), 'verbose must be boolean'
    assert isinstance(params, dict)
    assert set(params.keys()) == set(param_keys), 'incorrect params keys'
    assert not any([np.isnan(e) for e in params.values()]), 'params cannot have na data'

    assert params['reseed_harv_delay'] >= 1, 'harvest delay must be >=1'
    assert params['reseed_harv_delay'] % 1 < 1e5, 'harvest delay must effectively be an integer'

    # check matrix weather
    assert isinstance(matrix_weather, pd.DataFrame)
    assert set(matrix_weather.keys()) == set(_matrix_weather_keys), 'incorrect keys for matrix_weather'
    assert pd.api.types.is_integer_dtype(matrix_weather.doy), 'doy must be an integer datatype in matrix_weather'
    assert pd.api.types.is_integer_dtype(matrix_weather.year), 'year must be an integer datatype in matrix_weather'
    assert len(matrix_weather) <= _max_weather_size, 'maximum run size is {} days'.format(_max_weather_size)
    assert not matrix_weather.isna().any().any(), 'matrix_weather cannot have na values'

    # check to make sure there are no missing days in matrix_weather
    start_year = matrix_weather['year'].min()
    start_day = matrix_weather.loc[matrix_weather.year == start_year, 'doy'].min()

    stop_year = matrix_weather['year'].max()
    stop_day = matrix_weather.loc[matrix_weather.year == stop_year, 'doy'].max()

    if run_365_calendar:
        assert matrix_weather.doy.max() <= 365, 'expected to have leap days removed, and all doy between 1-365'
        doy_day_mapper = get_month_day_to_nonleap_doy()
        inv_doy_mapper = get_month_day_to_nonleap_doy(key_doy=True)
        start_mon, start_dom = inv_doy_mapper[start_day]
        stop_mon, stop_dom = inv_doy_mapper[stop_day]
        expected_datetimes = pd.date_range(start=f'{start_year}-{start_mon:02d}-{start_dom:02d}',
                                           end=f'{stop_year}-{stop_mon:02d}-{stop_dom:02d}')
        expected_datetimes = expected_datetimes[~((expected_datetimes.month == 2) & (expected_datetimes.day == 29))]
        expected_years = expected_datetimes.year.values
        expected_days = np.array(
            [doy_day_mapper[(m, d)] for m, d in zip(expected_datetimes.month, expected_datetimes.day)])
        addmess = ' note that leap days are expected to have been removed from matrix weather'
    else:
        expected_datetimes = pd.date_range(start=pd.to_datetime('{}-{}'.format(start_year, start_day), format='%Y-%j'),
                                           end=pd.to_datetime('{}-{}'.format(stop_year, stop_day), format='%Y-%j'))
        expected_years = expected_datetimes.year.values
        expected_days = expected_datetimes.dayofyear.values
        addmess = ''

    check = ((matrix_weather['year'].values == expected_years).all() and
             (matrix_weather['doy'].values == expected_days).all())
    assert check, 'the date range of matrix_weather contains missing or duplicate days' + addmess

    # check harvest data
    assert isinstance(days_harvest, pd.DataFrame)
    assert set(days_harvest.keys()) == set(days_harvest_keys), 'incorrect keys for days_harvest'
    assert pd.api.types.is_integer_dtype(days_harvest.doy), 'doy must be an integer datatype in days_harvest'
    assert pd.api.types.is_integer_dtype(days_harvest.year), 'year must be an integer datatype in days_harvest'
    assert not days_harvest.isna().any().any(), 'days_harvest cannot have na data'
    assert (days_harvest['frac_harv'] <= 1).all(), 'frac_harv cannot be greater than 1'
    if run_365_calendar:
        assert days_harvest.doy.max() <= 365
    if params['fixed_removal'] > 0.9:
        assert (days_harvest['harv_trig'] >=
                days_harvest['harv_targ']).all(), 'when using fixed harvest mode the harv_trig>=harv_targ'

    if auto_harvest:
        assert len(matrix_weather) == len(
            days_harvest), 'days_harvest and matrix_weather must be the same length(ndays)'

        check = (days_harvest['year'].values == matrix_weather.year.values).all() and (
                days_harvest['doy'].values == matrix_weather.doy.values).all()
        assert check, 'the date range of days_harvest does not match matrix_weather' + addmess
    else:
        if run_365_calendar:
            mapper = get_month_day_to_nonleap_doy(key_doy=True)
            strs = [f'{y}-{mapper[doy][0]:02d}-{mapper[doy][1]:02d}' for y, doy in zip(days_harvest.year.values,
                                                                                       days_harvest.doy.values)]
            harvest_dt = pd.to_datetime(strs)

        else:
            strs = ['{}-{:03d}'.format(int(e), int(f)) for e, f in
                    days_harvest[['year', 'doy']].itertuples(False, None)]
            harvest_dt = pd.to_datetime(strs, format='%Y-%j')
        assert harvest_dt.min() >= expected_datetimes.min(), 'days_harvest must start at or after first day of simulation'
        assert harvest_dt.max() <= expected_datetimes.max(), 'days_harvest must stop at or before last day of simulation'

    # doy_irr tests
    assert isinstance(doy_irr, np.ndarray), 'doy_irr must be convertable to a numpy array'
    assert doy_irr.ndim == 1, 'doy_irr must be 1d'
    assert pd.api.types.is_integer_dtype(doy_irr), 'doy_irr must be integers'
    assert doy_irr.max() <= 366, 'entries doy_irr must not be greater than 366'
    assert doy_irr.min() >= 0, 'entries doy_irr must not be less than 0'

    # pass a warning if max_irr is greater than abs_max_irr
    if matrix_weather.loc[:, 'max_irr'].max() > params['abs_max_irr']:
        warn(f'maximum weather_matrix max_irr ({matrix_weather.loc[:, "max_irr"].max()}) > absolute maximum '
             f'irrigation {params["abs_max_irr"]}.  The extra irrigation can never be applied but may be available for '
             f'storage.')



[docs]
def get_month_day_to_nonleap_doy(key_doy=False):
    """

    :param key_doy: bool, if true the keys are doy, else keys are (month, dayofmonth)
    :return: dictionary if not inverse: {(m,d}:doy} if inverse: {doy: (m,d)}
    """
    temp = pd.date_range('2025-01-01', '2025-12-31')  # a random non leap year
    day = temp.day
    month = temp.month
    doy = temp.dayofyear
    if key_doy:
        out = {dd: (m, d) for m, d, dd in zip(month, day, doy)}
    else:
        out = {(m, d): dd for m, d, dd in zip(month, day, doy)}

    return out



if __name__ == '__main__':
    pass