Source code for mlinsights.timeseries.datasets

"""
Datasets for timeseries.


:githublink:`%|py|5`
"""
import datetime
import numpy
import pandas


[docs]def artificial_data(dt1, dt2, minutes=1): """ Generates articial data every minutes. :param dt1: first date :param dt2: second date :param minutes: interval between two observations :return: dataframe .. runpython:: :showcode: import datetime from mlinsights.timeseries.datasets import artificial_data now = datetime.datetime.now() data = artificial_data(now - datetime.timedelta(40), now) print(data.head()) :githublink:`%|py|28` """ def fxweek(x): return 2 - x * (1 - x) def sat(x): return 2 * x + 2 data = [] dt = datetime.timedelta(minutes=minutes) while dt1 < dt2: if dt1.weekday() == 6: dt1 += dt continue if minutes <= 120 and not (dt1.hour >= 8 and dt1.hour <= 18): dt1 += dt continue x = (dt1.hour - 8) / 10 if dt1.weekday() == 5: y = sat(x) else: y = fxweek(x) data.append({'time': dt1, 'y': y}) dt1 += dt df = pandas.DataFrame(data) df['y'] += numpy.random.randn(df.shape[0]) * 0.1 df['time'] = pandas.DatetimeIndex(df['time']) return df