Source code for komanawa.kendall_stats.time_tests

"""
usage python time_tests.py [outdir]
:param outdir: path to save the results to, if not provided then the results are saved to the same directory as the script

created matt_dumont 
on: 29/09/23
"""
import pandas as pd

from komanawa.kendall_stats import MultiPartKendall, SeasonalMultiPartKendall, SeasonalKendall, MannKendall
from komanawa.kendall_stats import example_data

from pathlib import Path
import timeit
import sys
import os



[docs]
def timeit_test(function_names, npoints, n=10):
    """
    run an automated timeit test, must be outside of the function definition, prints results in scientific notation
    units are seconds

    :param py_file_path: path to the python file that holds the functions, if the functions are in the same script as call then  __file__ is sufficient. in this case the function call should be protected by: if __name__ == '__main__':
    :param function_names: the names of the functions to test (iterable), functions must not have arguments
    :param n: number of times to test
    :return: dictionary of function names and their times
    """
    py_file_path = __file__
    print(py_file_path)
    d = os.path.dirname(py_file_path)
    fname = os.path.basename(py_file_path).replace('.py', '')
    sys.path.append(d)

    out = {}
    for fn in function_names:
        print(f'testing: {fn}({npoints})')
        t = timeit.timeit(f'{fn}({npoints})',
                          setup='from {} import {}'.format(fname, fn),
                          number=n) / n
        out[fn] = t
        print('{0:e} seconds'.format(t))
    return out



def MannKendall_time_test(npoints):
    npoints = int(npoints)
    x, y = make_example_data.make_increasing_decreasing_data(slope=0.1, noise=5, step=100 / npoints)
    MannKendall(y)


def SeasonalKendall_time_test(npoints):
    npoints = int(npoints)
    data = make_example_data.make_seasonal_data(slope=0.1, noise=5, unsort=False, na_data=False, step=100 / npoints)
    SeasonalKendall(df=data, data_col='y', season_col='seasons', alpha=0.05, rm_na=True,
                    freq_limit=1)


def MultiPartKendall_2part_time_test(npoints):
    npoints = int(npoints)
    x, y = make_example_data.make_multipart_sharp_change_data(slope=make_example_data.multipart_sharp_slopes[0],
                                                              noise=make_example_data.multipart_sharp_noises[0],
                                                              unsort=False, na_data=False, step=100 / npoints)
    t = MultiPartKendall(data=y,
                         nparts=2, expect_part=(1, -1),
                         min_size=10,
                         alpha=0.05, no_trend_alpha=0.5,
                         data_col=None, rm_na=True,
                         serialise_path=None, recalc=False, )
    t.get_maxz_breakpoints()


def SeasonalMultiPartKendall_2part_time_test(npoints):
    npoints = int(npoints)
    data = make_example_data.make_seasonal_multipart_sharp_change(slope=make_example_data.multipart_sharp_slopes[0],
                                                                  noise=make_example_data.multipart_sharp_noises[0],
                                                                  unsort=False, na_data=False, step=100 / npoints)
    t = SeasonalMultiPartKendall(data, data_col='y', season_col='seasons',
                                 nparts=2, expect_part=(1, -1), min_size=10,
                                 alpha=0.05, no_trend_alpha=0.5,
                                 rm_na=True,
                                 serialise_path=None, freq_limit=1, recalc=False, initalize=True)
    t.get_maxz_breakpoints()


def MultiPartKendall_3part_time_test(npoints):
    npoints = int(npoints)
    x, y = make_example_data.make_multipart_parabolic_data(slope=make_example_data.multipart_parabolic_slopes[0],
                                                           noise=make_example_data.multipart_parabolic_noises[0],
                                                           unsort=False, na_data=False, step=100 / npoints)
    t = MultiPartKendall(y, data_col=None,
                         nparts=3, expect_part=(1, 0, -1), min_size=10,
                         alpha=0.05, no_trend_alpha=0.5,
                         rm_na=True,
                         serialise_path=None, recalc=False)
    t.get_maxz_breakpoints()


def SeasonalMultiPartKendall_3part_time_test(npoints):
    npoints = int(npoints)
    data = make_example_data.make_seasonal_multipart_parabolic(slope=make_example_data.multipart_parabolic_slopes[0],
                                                               noise=make_example_data.multipart_parabolic_noises[0],
                                                               unsort=False, na_data=False, step=100 / npoints)
    t = SeasonalMultiPartKendall(data, data_col='y', season_col='seasons',
                                 nparts=3, expect_part=(1, 0, -1), min_size=10,
                                 alpha=0.05, no_trend_alpha=0.5,
                                 rm_na=True,
                                 serialise_path=None, freq_limit=1, recalc=False, initalize=True)
    t.get_maxz_breakpoints()


def test_all_functions():
    use_npoints = '50'
    MannKendall_time_test(use_npoints)
    SeasonalKendall_time_test(use_npoints)
    MultiPartKendall_2part_time_test(use_npoints)
    SeasonalMultiPartKendall_2part_time_test(use_npoints)
    MultiPartKendall_3part_time_test(use_npoints)
    SeasonalMultiPartKendall_3part_time_test(use_npoints)



[docs]
def run_time_test(outdir=None, all_npoints=['50', '100', '500', '1000'],
                  function_names=['MannKendall_time_test', 'SeasonalKendall_time_test',
                                  'MultiPartKendall_2part_time_test', 'SeasonalMultiPartKendall_2part_time_test',
                                  'MultiPartKendall_3part_time_test', 'SeasonalMultiPartKendall_3part_time_test']
                  ):
    """
    run the time test for all functions and save the results to a csv file

    :param outdir: place to save the output
    :param all_npoints: the dataset sizes to test
    :param function_names: the names of the functions to test, default is all
    :return:
    """
    assert set(function_names).issubset(['MannKendall_time_test',
                                         'SeasonalKendall_time_test',
                                         'MultiPartKendall_2part_time_test',
                                         'SeasonalMultiPartKendall_2part_time_test',
                                         'MultiPartKendall_3part_time_test',
                                         'SeasonalMultiPartKendall_3part_time_test'])
    if outdir is None:
        outdir = Path(__file__).parent
    else:
        outdir = Path(outdir)
    outdir.mkdir(exist_ok=True)

    outdata = pd.DataFrame(index=all_npoints, columns=function_names)
    outdata.index.name = 'npoints'
    for npoints in all_npoints:
        if int(npoints) > 500:
            use_n = 1

        else:
            use_n = 2
        print(f'testing {npoints}')
        temp = timeit_test(function_names, npoints, n=use_n)
        outdata.loc[npoints] = pd.Series(temp)
    print(f'saving results to {outdir.joinpath("time_test_results.txt")}')
    outdata.to_csv(outdir.joinpath('time_test_results.txt'))



if __name__ == '__main__':
    args = sys.argv
    outdir = None
    if len(args) > 1:
        outdir = args[1]
    run_time_test(outdir)