"""
created matt_dumont
on: 22/09/23
"""
import warnings
import numpy as np
import pandas as pd
from scipy.optimize import curve_fit
from komanawa.gw_age_tools.exponential_piston_flow import make_age_dist, check_age_inputs
[docs]
def predict_source_future_past_conc_bepm(initial_conc, mrt, mrt_p1, frac_p1, f_p1, f_p2,
prev_slope, fut_slope, age_range, max_conc, min_conc,
max_fut_conc=np.inf, min_fut_conc=0, precision=2):
"""
predict the source and receptor concentration in the future and past based on the current concentration the historical observed slope and the future predicted/scenario slope
:param initial_conc: initial concentration (at time = 0 yrs)
:param mrt: mean residence time of the source (yrs)
:param mrt_p1: mean residence time of the first piston (yrs)
:param frac_p1: fraction of the source that is in the first piston
:param f_p1: fraction of exponential flow the first piston
:param f_p2: fraction of exponential flow the second piston
:param prev_slope: slope of the previous trend (conc/yr)
:param fut_slope: slope of the future trend (conc/yr)
:param age_range: range of ages to predict the source concentration for (yrs) (start, stop) where start is negative and stop is positive years from the present time = 0 yrs is the present
:param max_conc: maximum concentration of the source (to limit optimisation)
:param min_conc: minimum concentration of the source (to limit optimisation)
:param max_fut_conc: maximum concentration of the source in the future (to limit the future slope creating crazy numbers)
:param min_fut_conc: minimum concentration of the source in the future (to limit the slope creating negative numbers)
:param precision: precision of the age distribution times (decimal places) default is 2, approximately monthly
:return: (source_conc, receptor_conc) where source_conc is a pandas series of the source concentration and receptor_conc is a pandas series of the receptor concentration both indexed by age in years relative to the initial concentration at time = 0 yrs (- for past, + for future)
"""
start, stop = age_range
assert start < stop, 'start must be less than stop'
assert start <= 0, 'start must be less than or equal to 0'
assert stop >= 0, 'stop must be greater than or equal to 0'
mrt_p2 = None
mrt, mrt_p2 = check_age_inputs(mrt, mrt_p1, mrt_p2, frac_p1, precision, f_p1, f_p2)
age_step, ages, age_fractions = make_age_dist(mrt, mrt_p1, mrt_p2, frac_p1, precision, f_p1, f_p2, start=np.nan)
source_conc_past = predict_historical_source_conc(init_conc=initial_conc,
mrt=mrt, mrt_p1=mrt_p1, mrt_p2=mrt_p2,
frac_p1=frac_p1, f_p1=f_p1, f_p2=f_p2,
prev_slope=prev_slope, max_conc=max_conc, min_conc=min_conc,
start_age=start, precision=precision)
fut_idx = np.arange(0, stop + age_step, age_step).round(precision)
source_future_conc = pd.Series(
index=fut_idx,
data=source_conc_past.loc[0] + fut_slope * fut_idx
)
source_future_conc = source_future_conc.clip(lower=min_fut_conc, upper=max_fut_conc)
total_source_conc = pd.concat([source_conc_past.drop(index=0), source_future_conc]).sort_index()
out_years = np.arange(start, stop, age_step).round(precision)
out_conc = np.full_like(out_years, np.nan)
for i, t in enumerate(out_years):
out_conc[i] = (total_source_conc.loc[(t - ages).round(precision)] * age_fractions).sum()
receptor_conc = pd.Series(index=out_years, data=out_conc)
return total_source_conc, receptor_conc
[docs]
def predict_future_conc_bepm(once_and_future_source_conc: pd.Series, predict_start, predict_stop,
mrt_p1, frac_p1, f_p1, f_p2, mrt=None, mrt_p2=None, fill_value=1,
fill_threshold=0.05, precision=2, pred_step=0.01):
"""
predict the receptor concentration based on the source concentration time series and the binary piston flow model parameters
:param once_and_future_source_conc: pd.Series of the source concentration index by age in decimal years the Series can have missing values and will be interpolated onto a 0.01 yr regular index therefore the once_and_future_source_conc may be passed with values only at the start, stop, and inflection points
:param predict_start: start of the prediction period (decimal years)
:param predict_stop: end of the prediction period (decimal years)
:param mrt_p1: mean residence time of the first piston (yrs)
:param frac_p1: fraction of the source that is in the first piston
:param f_p1: fraction of exponential flow the first piston
:param f_p2: fraction of exponential flow the second piston
:param mrt: mean residence time of the source (yrs) or None one of mrt or mrt_p2 must be passed
:param mrt_p2: mean residence time of the second piston (yrs) or None one of mrt or mrt_p2 must be passed
:param fill_value: value to prepend to the source concentration to meet the full age distribution needed (e.g. the concentration of very old water), up to the fill_threshold may be filled with the fill_value before an error is raised
:param fill_threshold: threshold for the source concentration to be filled with the minimum value default is 0.05 (5% of the concentration at the start time)
:param precision: precision of the age distribution (decimal places)
:param pred_step: step size for the prediction (yrs) default is 0.01 (approximately monthly), but this can result in longer run times for large prediction periods, so a larger step size can be used. Note that pred_step must be greater or equal to the precision used
:return: receptor_conc a pandas series of the receptor concentration indexed by age in years
"""
mrt, mrt_p2 = check_age_inputs(mrt, mrt_p1, mrt_p2, frac_p1, precision, f_p1, f_p2)
assert isinstance(pred_step, float), 'pred_step must be a float'
age_step, ages, age_fractions = make_age_dist(mrt, mrt_p1, mrt_p2, frac_p1, precision, f_p1, f_p2)
assert pred_step >= age_step, f'{pred_step=} must be greater than or equal to the {precision=}'
assert isinstance(once_and_future_source_conc, pd.Series), 'once_and_future_source_conc must be a pandas Series'
assert pd.api.types.is_float_dtype(once_and_future_source_conc.index), ('index of once_and_future_source_conc'
'must be float')
assert pd.api.types.is_number(predict_start), 'predict_start must be a number'
assert pd.api.types.is_number(predict_stop), 'predict_stop must be a number'
assert pd.api.types.is_number(fill_value), 'min_value must be a number'
assert pd.api.types.is_number(fill_threshold), 'fill_threshold must be a number'
# make the source concentration a regular series
once_and_future_source_conc = once_and_future_source_conc.copy(True)
once_and_future_source_conc.index = once_and_future_source_conc.index.values.round(precision)
expect_idx_vals = np.arange(once_and_future_source_conc.index.min(),
once_and_future_source_conc.index.max() + age_step,
age_step).round(precision)
temp = pd.Series(index=expect_idx_vals, data=np.nan)
temp.loc[once_and_future_source_conc.index] = once_and_future_source_conc.values
once_and_future_source_conc = temp
once_and_future_source_conc = once_and_future_source_conc.sort_index()
once_and_future_source_conc = once_and_future_source_conc.interpolate(method='linear', limit_direction='both')
# check that enough concentration data has been passed for the stop,
if predict_stop > once_and_future_source_conc.index.max():
raise ValueError(f'predict_stop ({predict_stop}) must be less than or equal to the max age of the source '
f'({once_and_future_source_conc.index.max()})')
# check the start
pred_ages = (predict_start - ages).round(precision)
idx = np.isin(pred_ages, once_and_future_source_conc.index)
if not idx.all():
missing_age_frac = age_fractions[~idx].sum()
minium_pass_age = np.flip(pred_ages)[(np.flip(age_fractions).cumsum() >= fill_threshold).argmax()]
if missing_age_frac > fill_threshold:
raise ValueError(
f'the source concentration is missing {missing_age_frac * 100:0.2f}% of the concentration on'
f' the old end of the source. This is greater than the fill_threshold of '
f'{fill_threshold} and the prediction cannot be made, concentration data must be passed'
f'from at least {minium_pass_age} years')
else:
warnings.warn(f'the source concentration is missing {missing_age_frac * 100:0.2f}% of the concentration on'
f' the old end of the source. This is less than the fill_threshold of '
f'{fill_threshold} and the missing values will be filled with the minimum value of '
f'{fill_value} and the prediction will be made. To avoid this warning pass concentration '
f'data from at least {pred_ages.min()} years')
once_and_future_source_conc = pd.concat((once_and_future_source_conc,
pd.Series(index=pred_ages[~idx], data=fill_value)))
out_times = np.arange(predict_start, predict_stop, pred_step).round(precision)
out_conc = np.full_like(out_times, np.nan)
for i, t in enumerate(out_times):
out_conc[i] = (once_and_future_source_conc.loc[(t - ages).round(precision)] * age_fractions).sum()
receptor_conc = pd.Series(index=out_times, data=out_conc)
return receptor_conc
[docs]
def predict_historical_source_conc(init_conc, mrt, mrt_p1, mrt_p2, frac_p1, f_p1, f_p2, prev_slope, max_conc,
min_conc, start_age=np.nan, precision=2, p0=None):
"""
Estimate the historical source concentration based on the receptor initial concentration the previous slope and the BEPEM model parameters
:param init_conc: the receptor initial concentration
:param mrt: mean residence time of the source (yrs)
:param mrt_p1: mean residence time of the first piston (yrs)
:param mrt_p2: mean residence time of the second piston (yrs)
:param frac_p1: fraction of the source that is in the first piston
:param f_p1: fraction of exponential flow the first piston
:param f_p2: fraction of exponential flow the second piston (use a dummy value if frac_p1 = 1)
:param prev_slope: slope of the previous trend (conc/yr)
:param max_conc: maximum concentration of the source (to limit optimisation)
:param min_conc: minimum concentration of the source (to limit optimisation)
:param start_age: set a start age for the source concentration (yrs) default is np.nan which will use the maximum of the mrt_p1 and mrt_p2
:param precision: precision of the age distribution (decimal places) default is 2, approximately monthly
:param p0: initial guess for the optimisation (slope, intercept) default is None which will use the previous slope and the initial concentration
:return: source_conc_past a pandas series of the source concentration indexed by age in years
"""
assert prev_slope>0,'This approach only works for increasing trends'
if p0 is None:
p0 = [prev_slope, init_conc]
mrt, mrt_p2 = check_age_inputs(mrt, mrt_p1, mrt_p2, frac_p1, precision, f_p1, f_p2)
age_step, ages, age_fractions = make_age_dist(mrt, mrt_p1, mrt_p2, frac_p1, precision, f_p1, f_p2)
t = np.arange(-5, 1, 1).astype(float)
ydata = pd.Series(index=t, data=init_conc + prev_slope * t)
def opt_func(t_vals, source_slope, source_init_conc):
ages_source = np.arange(0, np.nanmax([mrt_p1, mrt_p2]) * 5 + 5 + age_step, age_step).round(precision)
total_source_conc = pd.Series(index=-ages_source, data=source_init_conc - source_slope * ages_source)
total_source_conc.loc[total_source_conc < min_conc] = min_conc
t_vals.round(precision)
out_conc = np.full_like(t_vals, np.nan)
for i, t in enumerate(t_vals):
out_conc[i] = (total_source_conc.loc[(t - ages).round(precision)] * age_fractions).sum()
return out_conc
(s_slope, s_init), pcov = curve_fit(opt_func, t, ydata, p0=p0,
bounds=([0, 0], [np.inf, max_conc]))
ages = np.arange(0., np.nanmax([mrt_p1, mrt_p2, np.abs(start_age)]) * 5 * 2 + age_step, age_step).round(
precision)
source_conc_past = pd.Series(index=ages * -1, data=s_init - s_slope * ages)
source_conc_past.loc[source_conc_past < min_conc] = min_conc
return source_conc_past