Source code for komanawa.kendall_stats.utils
"""
created matt_dumont
on: 29/09/23
"""
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import linregress
from pathlib import Path
[docs]
def estimate_runtime(npoints, func, plot=False):
"""
assumes linear log-log relationship between runtime and number of points
:param npoints:
:param func:
:param plot: if True then plot the data and the regression line
:return:
"""
assert func in ['MannKendall', 'SeasonalKendall', 'MultiPartKendall_2part', 'SeasonalMultiPartKendall_2part',
'MultiPartKendall_3part', 'SeasonalMultiPartKendall_3part']
data = pd.read_csv(Path(__file__).parent.joinpath('time_test_results.txt'), index_col=0)
data.columns = [e.replace('_time_test','') for e in data.columns]
use_data = data[func].dropna()
lr = linregress(np.log10(use_data.index), np.log10(use_data))
out = 10 ** (lr.intercept + lr.slope * np.log10(npoints))
if plot:
fig, ax = plt.subplots()
ax.scatter(use_data.index, use_data, c='b', label='data')
x = np.arange(10, np.max(np.concatenate([use_data.index, npoints])))
ax.plot(x, 10 ** (lr.intercept + lr.slope * np.log10(x)), c='k', label='regression', ls='--')
use_y = 10 ** (lr.intercept + lr.slope * np.log10(npoints))
ax.scatter(npoints, use_y, c='r', label=f'estimate: for passed points')
ax.set_yscale('log')
ax.set_xscale('log')
ax.set_xlabel('Number of data points')
ax.set_ylabel('Runtime (seconds)')
ax.legend()
ax.set_title(f'{func} runtime estimate in seconds')
plt.show()
return out
if __name__ == '__main__':
for f in ['MannKendall', 'SeasonalKendall', 'MultiPartKendall_2part', 'SeasonalMultiPartKendall_2part',
'MultiPartKendall_3part', 'SeasonalMultiPartKendall_3part']:
print(f, estimate_runtime(np.array([500, 1000,5000,10000]), f, plot=True))