Source code for komanawa.kendall_stats.time_tests

"""
usage python time_tests.py [outdir]
:param outdir: path to save the results to, if not provided then the results are saved to the same directory as the script

created matt_dumont 
on: 29/09/23
"""
import pandas as pd

from komanawa.kendall_stats import MultiPartKendall, SeasonalMultiPartKendall, SeasonalKendall, MannKendall
from komanawa.kendall_stats import example_data

from pathlib import Path
import timeit
import sys
import os


[docs] def timeit_test(function_names, npoints, n=10): """ run an automated timeit test, must be outside of the function definition, prints results in scientific notation units are seconds :param py_file_path: path to the python file that holds the functions, if the functions are in the same script as call then __file__ is sufficient. in this case the function call should be protected by: if __name__ == '__main__': :param function_names: the names of the functions to test (iterable), functions must not have arguments :param n: number of times to test :return: dictionary of function names and their times """ py_file_path = __file__ print(py_file_path) d = os.path.dirname(py_file_path) fname = os.path.basename(py_file_path).replace('.py', '') sys.path.append(d) out = {} for fn in function_names: print(f'testing: {fn}({npoints})') t = timeit.timeit(f'{fn}({npoints})', setup='from {} import {}'.format(fname, fn), number=n) / n out[fn] = t print('{0:e} seconds'.format(t)) return out
def MannKendall_time_test(npoints): npoints = int(npoints) x, y = make_example_data.make_increasing_decreasing_data(slope=0.1, noise=5, step=100 / npoints) MannKendall(y) def SeasonalKendall_time_test(npoints): npoints = int(npoints) data = make_example_data.make_seasonal_data(slope=0.1, noise=5, unsort=False, na_data=False, step=100 / npoints) SeasonalKendall(df=data, data_col='y', season_col='seasons', alpha=0.05, rm_na=True, freq_limit=1) def MultiPartKendall_2part_time_test(npoints): npoints = int(npoints) x, y = make_example_data.make_multipart_sharp_change_data(slope=make_example_data.multipart_sharp_slopes[0], noise=make_example_data.multipart_sharp_noises[0], unsort=False, na_data=False, step=100 / npoints) t = MultiPartKendall(data=y, nparts=2, expect_part=(1, -1), min_size=10, alpha=0.05, no_trend_alpha=0.5, data_col=None, rm_na=True, serialise_path=None, recalc=False, ) t.get_maxz_breakpoints() def SeasonalMultiPartKendall_2part_time_test(npoints): npoints = int(npoints) data = make_example_data.make_seasonal_multipart_sharp_change(slope=make_example_data.multipart_sharp_slopes[0], noise=make_example_data.multipart_sharp_noises[0], unsort=False, na_data=False, step=100 / npoints) t = SeasonalMultiPartKendall(data, data_col='y', season_col='seasons', nparts=2, expect_part=(1, -1), min_size=10, alpha=0.05, no_trend_alpha=0.5, rm_na=True, serialise_path=None, freq_limit=1, recalc=False, initalize=True) t.get_maxz_breakpoints() def MultiPartKendall_3part_time_test(npoints): npoints = int(npoints) x, y = make_example_data.make_multipart_parabolic_data(slope=make_example_data.multipart_parabolic_slopes[0], noise=make_example_data.multipart_parabolic_noises[0], unsort=False, na_data=False, step=100 / npoints) t = MultiPartKendall(y, data_col=None, nparts=3, expect_part=(1, 0, -1), min_size=10, alpha=0.05, no_trend_alpha=0.5, rm_na=True, serialise_path=None, recalc=False) t.get_maxz_breakpoints() def SeasonalMultiPartKendall_3part_time_test(npoints): npoints = int(npoints) data = make_example_data.make_seasonal_multipart_parabolic(slope=make_example_data.multipart_parabolic_slopes[0], noise=make_example_data.multipart_parabolic_noises[0], unsort=False, na_data=False, step=100 / npoints) t = SeasonalMultiPartKendall(data, data_col='y', season_col='seasons', nparts=3, expect_part=(1, 0, -1), min_size=10, alpha=0.05, no_trend_alpha=0.5, rm_na=True, serialise_path=None, freq_limit=1, recalc=False, initalize=True) t.get_maxz_breakpoints() def test_all_functions(): use_npoints = '50' MannKendall_time_test(use_npoints) SeasonalKendall_time_test(use_npoints) MultiPartKendall_2part_time_test(use_npoints) SeasonalMultiPartKendall_2part_time_test(use_npoints) MultiPartKendall_3part_time_test(use_npoints) SeasonalMultiPartKendall_3part_time_test(use_npoints)
[docs] def run_time_test(outdir=None, all_npoints=['50', '100', '500', '1000'], function_names=['MannKendall_time_test', 'SeasonalKendall_time_test', 'MultiPartKendall_2part_time_test', 'SeasonalMultiPartKendall_2part_time_test', 'MultiPartKendall_3part_time_test', 'SeasonalMultiPartKendall_3part_time_test'] ): """ run the time test for all functions and save the results to a csv file :param outdir: place to save the output :param all_npoints: the dataset sizes to test :param function_names: the names of the functions to test, default is all :return: """ assert set(function_names).issubset(['MannKendall_time_test', 'SeasonalKendall_time_test', 'MultiPartKendall_2part_time_test', 'SeasonalMultiPartKendall_2part_time_test', 'MultiPartKendall_3part_time_test', 'SeasonalMultiPartKendall_3part_time_test']) if outdir is None: outdir = Path(__file__).parent else: outdir = Path(outdir) outdir.mkdir(exist_ok=True) outdata = pd.DataFrame(index=all_npoints, columns=function_names) outdata.index.name = 'npoints' for npoints in all_npoints: if int(npoints) > 500: use_n = 1 else: use_n = 2 print(f'testing {npoints}') temp = timeit_test(function_names, npoints, n=use_n) outdata.loc[npoints] = pd.Series(temp) print(f'saving results to {outdir.joinpath("time_test_results.txt")}') outdata.to_csv(outdir.joinpath('time_test_results.txt'))
if __name__ == '__main__': args = sys.argv outdir = None if len(args) > 1: outdir = args[1] run_time_test(outdir)