Source code for komanawa.gw_age_tools.source_predictions

"""
created matt_dumont 
on: 22/09/23
"""
import warnings
import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
from komanawa.gw_age_tools.exponential_piston_flow import make_age_dist, check_age_inputs



[docs]
def predict_source_future_past_conc_bepm(initial_conc, mrt, mrt_p1, frac_p1, f_p1, f_p2,
                                         prev_slope, fut_slope, age_range, max_conc, min_conc,
                                         max_fut_conc=np.inf, min_fut_conc=0, precision=2):
    """
    predict the source and receptor concentration in the future and past based on the current concentration the historical observed slope and the future predicted/scenario slope

    :param initial_conc: initial concentration (at time = 0 yrs)
    :param mrt: mean residence time of the source (yrs)
    :param mrt_p1: mean residence time of the first piston (yrs)
    :param frac_p1: fraction of the source that is in the first piston
    :param f_p1: fraction of exponential flow the first piston
    :param f_p2: fraction of exponential flow the second piston
    :param prev_slope: slope of the previous trend (conc/yr)
    :param fut_slope: slope of the future trend (conc/yr)
    :param age_range: range of ages to predict the source concentration for (yrs) (start, stop) where start is negative and stop is positive years from the present time = 0 yrs is the present
    :param max_conc: maximum concentration of the source (to limit optimisation)
    :param min_conc: minimum concentration of the source (to limit optimisation)
    :param max_fut_conc: maximum concentration of the source in the future (to limit the future slope creating crazy numbers)
    :param min_fut_conc: minimum concentration of the source in the future (to limit the slope creating negative numbers)
    :param precision: precision of the age distribution times (decimal places) default is 2, approximately monthly
    :return: (source_conc, receptor_conc) where source_conc is a pandas series of the source concentration and receptor_conc is a pandas series of the receptor concentration both indexed by age in years relative to the initial concentration at time = 0 yrs (- for past, + for future)
    """
    start, stop = age_range
    assert start < stop, 'start must be less than stop'
    assert start <= 0, 'start must be less than or equal to 0'
    assert stop >= 0, 'stop must be greater than or equal to 0'

    mrt_p2 = None
    mrt, mrt_p2 = check_age_inputs(mrt, mrt_p1, mrt_p2, frac_p1, precision, f_p1, f_p2)
    age_step, ages, age_fractions = make_age_dist(mrt, mrt_p1, mrt_p2, frac_p1, precision, f_p1, f_p2, start=np.nan)

    source_conc_past = predict_historical_source_conc(init_conc=initial_conc,
                                                      mrt=mrt, mrt_p1=mrt_p1, mrt_p2=mrt_p2,
                                                      frac_p1=frac_p1, f_p1=f_p1, f_p2=f_p2,
                                                      prev_slope=prev_slope, max_conc=max_conc, min_conc=min_conc,
                                                      start_age=start, precision=precision)

    fut_idx = np.arange(0, stop + age_step, age_step).round(precision)

    source_future_conc = pd.Series(
        index=fut_idx,
        data=source_conc_past.loc[0] + fut_slope * fut_idx
    )
    source_future_conc = source_future_conc.clip(lower=min_fut_conc, upper=max_fut_conc)

    total_source_conc = pd.concat([source_conc_past.drop(index=0), source_future_conc]).sort_index()
    out_years = np.arange(start, stop, age_step).round(precision)
    out_conc = np.full_like(out_years, np.nan)
    for i, t in enumerate(out_years):
        out_conc[i] = (total_source_conc.loc[(t - ages).round(precision)] * age_fractions).sum()
    receptor_conc = pd.Series(index=out_years, data=out_conc)

    return total_source_conc, receptor_conc




[docs]
def predict_future_conc_bepm(once_and_future_source_conc: pd.Series, predict_start, predict_stop,
                             mrt_p1, frac_p1, f_p1, f_p2, mrt=None, mrt_p2=None, fill_value=1,
                             fill_threshold=0.05, precision=2, pred_step=0.01):
    """
    predict the receptor concentration based on the source concentration time series and the  binary piston flow model parameters

    :param once_and_future_source_conc: pd.Series of the source concentration index by age in decimal years the Series can have missing values and will be interpolated onto a 0.01 yr regular index therefore the once_and_future_source_conc may be passed with values only at the start, stop, and inflection points
    :param predict_start: start of the prediction period (decimal years)
    :param predict_stop: end of the prediction period (decimal years)
    :param mrt_p1: mean residence time of the first piston (yrs)
    :param frac_p1: fraction of the source that is in the first piston
    :param f_p1: fraction of exponential flow the first piston
    :param f_p2: fraction of exponential flow the second piston
    :param mrt: mean residence time of the source (yrs) or None one of mrt or mrt_p2 must be passed
    :param mrt_p2: mean residence time of the second piston (yrs) or None one of mrt or mrt_p2 must be passed
    :param fill_value: value to prepend to the source concentration to meet the full age distribution needed (e.g. the concentration of very old water), up to the fill_threshold may be filled with the fill_value before an error is raised
    :param fill_threshold: threshold for the source concentration to be filled with the minimum value default is 0.05 (5% of the concentration at the start time)
    :param precision: precision of the age distribution (decimal places)
    :param pred_step: step size for the prediction (yrs) default is 0.01 (approximately monthly), but this can result in longer run times for large prediction periods, so a larger step size can be used.  Note that pred_step must be greater or equal to the precision used
    :return: receptor_conc a pandas series of the receptor concentration indexed by age in years
    """
    mrt, mrt_p2 = check_age_inputs(mrt, mrt_p1, mrt_p2, frac_p1, precision, f_p1, f_p2)
    assert isinstance(pred_step, float), 'pred_step must be a float'

    age_step, ages, age_fractions = make_age_dist(mrt, mrt_p1, mrt_p2, frac_p1, precision, f_p1, f_p2)
    assert pred_step >= age_step, f'{pred_step=} must be greater than or equal to the {precision=}'

    assert isinstance(once_and_future_source_conc, pd.Series), 'once_and_future_source_conc must be a pandas Series'
    assert pd.api.types.is_float_dtype(once_and_future_source_conc.index), ('index of once_and_future_source_conc'
                                                                            'must be float')
    assert pd.api.types.is_number(predict_start), 'predict_start must be a number'
    assert pd.api.types.is_number(predict_stop), 'predict_stop must be a number'
    assert pd.api.types.is_number(fill_value), 'min_value must be a number'
    assert pd.api.types.is_number(fill_threshold), 'fill_threshold must be a number'

    # make the source concentration a regular series
    once_and_future_source_conc = once_and_future_source_conc.copy(True)
    once_and_future_source_conc.index = once_and_future_source_conc.index.values.round(precision)
    expect_idx_vals = np.arange(once_and_future_source_conc.index.min(),
                                once_and_future_source_conc.index.max() + age_step,
                                age_step).round(precision)
    temp = pd.Series(index=expect_idx_vals, data=np.nan)
    temp.loc[once_and_future_source_conc.index] = once_and_future_source_conc.values
    once_and_future_source_conc = temp
    once_and_future_source_conc = once_and_future_source_conc.sort_index()
    once_and_future_source_conc = once_and_future_source_conc.interpolate(method='linear', limit_direction='both')

    # check that enough concentration data has been passed for the stop,
    if predict_stop > once_and_future_source_conc.index.max():
        raise ValueError(f'predict_stop ({predict_stop}) must be less than or equal to the max age of the source '
                         f'({once_and_future_source_conc.index.max()})')

    # check the start
    pred_ages = (predict_start - ages).round(precision)
    idx = np.isin(pred_ages, once_and_future_source_conc.index)
    if not idx.all():
        missing_age_frac = age_fractions[~idx].sum()
        minium_pass_age = np.flip(pred_ages)[(np.flip(age_fractions).cumsum() >= fill_threshold).argmax()]
        if missing_age_frac > fill_threshold:
            raise ValueError(
                f'the source concentration is missing {missing_age_frac * 100:0.2f}% of the concentration on'
                f' the old end of the source.  This is greater than the fill_threshold of '
                f'{fill_threshold} and the prediction cannot be made, concentration data must be passed'
                f'from at least {minium_pass_age} years')
        else:
            warnings.warn(f'the source concentration is missing {missing_age_frac * 100:0.2f}% of the concentration on'
                          f' the old end of the source.  This is less than the fill_threshold of '
                          f'{fill_threshold} and the missing values will be filled with the minimum value of '
                          f'{fill_value} and the prediction will be made. To avoid this warning pass concentration '
                          f'data from at least {pred_ages.min()} years')

            once_and_future_source_conc = pd.concat((once_and_future_source_conc,
                                                     pd.Series(index=pred_ages[~idx], data=fill_value)))

    out_times = np.arange(predict_start, predict_stop, pred_step).round(precision)
    out_conc = np.full_like(out_times, np.nan)
    for i, t in enumerate(out_times):
        out_conc[i] = (once_and_future_source_conc.loc[(t - ages).round(precision)] * age_fractions).sum()
    receptor_conc = pd.Series(index=out_times, data=out_conc)
    return receptor_conc




[docs]
def predict_historical_source_conc(init_conc, mrt, mrt_p1, mrt_p2, frac_p1, f_p1, f_p2, prev_slope, max_conc,
                                   min_conc, start_age=np.nan, precision=2, p0=None):
    """
    Estimate the historical source concentration based on the receptor initial concentration the previous slope and the BEPEM model parameters

    :param init_conc: the receptor initial concentration
    :param mrt: mean residence time of the source (yrs)
    :param mrt_p1: mean residence time of the first piston (yrs)
    :param mrt_p2: mean residence time of the second piston (yrs)
    :param frac_p1: fraction of the source that is in the first piston
    :param f_p1: fraction of exponential flow the first piston
    :param f_p2: fraction of exponential flow the second piston (use a dummy value if frac_p1 = 1)
    :param prev_slope: slope of the previous trend (conc/yr)
    :param max_conc: maximum concentration of the source (to limit optimisation)
    :param min_conc: minimum concentration of the source (to limit optimisation)
    :param start_age: set a start age for the source concentration (yrs) default is np.nan which will use the maximum of the mrt_p1 and mrt_p2
    :param precision: precision of the age distribution (decimal places) default is 2, approximately monthly
    :param p0: initial guess for the optimisation (slope, intercept) default is None which will use the previous slope and the initial concentration
    :return: source_conc_past a pandas series of the source concentration indexed by age in years
    """
    assert prev_slope>0,'This approach only works for increasing trends'
    if p0 is None:
        p0 = [prev_slope, init_conc]
    mrt, mrt_p2 = check_age_inputs(mrt, mrt_p1, mrt_p2, frac_p1, precision, f_p1, f_p2)
    age_step, ages, age_fractions = make_age_dist(mrt, mrt_p1, mrt_p2, frac_p1, precision, f_p1, f_p2)
    t = np.arange(-5, 1, 1).astype(float)
    ydata = pd.Series(index=t, data=init_conc + prev_slope * t)

    def opt_func(t_vals, source_slope, source_init_conc):
        ages_source = np.arange(0, np.nanmax([mrt_p1, mrt_p2]) * 5 + 5 + age_step, age_step).round(precision)
        total_source_conc = pd.Series(index=-ages_source, data=source_init_conc - source_slope * ages_source)
        total_source_conc.loc[total_source_conc < min_conc] = min_conc

        t_vals.round(precision)
        out_conc = np.full_like(t_vals, np.nan)
        for i, t in enumerate(t_vals):
            out_conc[i] = (total_source_conc.loc[(t - ages).round(precision)] * age_fractions).sum()
        return out_conc

    (s_slope, s_init), pcov = curve_fit(opt_func, t, ydata, p0=p0,
                                        bounds=([0, 0], [np.inf, max_conc]))
    ages = np.arange(0., np.nanmax([mrt_p1, mrt_p2, np.abs(start_age)]) * 5 * 2 + age_step, age_step).round(
        precision)
    source_conc_past = pd.Series(index=ages * -1, data=s_init - s_slope * ages)
    source_conc_past.loc[source_conc_past < min_conc] = min_conc
    return source_conc_past