|
1 | 1 | import logging
|
2 | 2 | import os
|
3 | 3 | from datetime import timedelta, datetime, timezone
|
4 |
| -from dateutil.parser import parse |
5 | 4 | import polars
|
6 | 5 | from dateutil import parser
|
7 | 6 |
|
@@ -57,6 +56,9 @@ def __init__(
|
57 | 56 | self.data = None
|
58 | 57 | self._start_date_data_source = None
|
59 | 58 | self._end_date_data_source = None
|
| 59 | + self.backtest_end_index = self.window_size |
| 60 | + self.backtest_start_index = 0 |
| 61 | + self.window_cache = {} |
60 | 62 |
|
61 | 63 | def prepare_data(
|
62 | 64 | self,
|
@@ -100,8 +102,6 @@ def prepare_data(
|
100 | 102 |
|
101 | 103 | self.backtest_data_start_date = backtest_data_start_date\
|
102 | 104 | .replace(microsecond=0)
|
103 |
| - self.backtest_data_index_date = backtest_data_start_date\ |
104 |
| - .replace(microsecond=0) |
105 | 105 | self.backtest_data_end_date = backtest_end_date.replace(microsecond=0)
|
106 | 106 |
|
107 | 107 | # Creating the backtest data directory and file
|
@@ -148,14 +148,30 @@ def prepare_data(
|
148 | 148 | self.write_data_to_file_path(file_path, ohlcv)
|
149 | 149 |
|
150 | 150 | self.load_data()
|
| 151 | + self._precompute_sliding_windows() # Precompute sliding windows! |
| 152 | + |
| 153 | + def _precompute_sliding_windows(self): |
| 154 | + """ |
| 155 | + Precompute all sliding windows for fast retrieval. |
| 156 | + """ |
| 157 | + self.window_cache = {} |
| 158 | + timestamps = self.data["Datetime"].to_list() |
| 159 | + |
| 160 | + for i in range(len(timestamps) - self.window_size + 1): |
| 161 | + # Use last timestamp as key |
| 162 | + end_time = timestamps[i + self.window_size - 1] |
| 163 | + self.window_cache[end_time] = self.data.slice(i, self.window_size) |
151 | 164 |
|
152 | 165 | def load_data(self):
|
153 | 166 | file_path = self._create_file_path()
|
154 |
| - self.data = polars.read_csv(file_path) |
| 167 | + self.data = polars.read_csv( |
| 168 | + file_path, dtypes={"Datetime": polars.Datetime}, low_memory=True |
| 169 | + ) # Faster parsing |
155 | 170 | first_row = self.data.head(1)
|
156 | 171 | last_row = self.data.tail(1)
|
157 |
| - self._start_date_data_source = parse(first_row["Datetime"][0]) |
158 |
| - self._end_date_data_source = parse(last_row["Datetime"][0]) |
| 172 | + |
| 173 | + self._start_date_data_source = first_row["Datetime"][0] |
| 174 | + self._end_date_data_source = last_row["Datetime"][0] |
159 | 175 |
|
160 | 176 | def _create_file_path(self):
|
161 | 177 | """
|
@@ -190,38 +206,21 @@ def get_data(
|
190 | 206 | source. This implementation will use polars to load and filter the
|
191 | 207 | data.
|
192 | 208 | """
|
193 |
| - if self.data is None: |
194 |
| - self.load_data() |
195 |
| - |
196 |
| - end_date = date |
197 | 209 |
|
198 |
| - if end_date is None: |
199 |
| - return self.data |
| 210 | + data = self.window_cache.get(date) |
| 211 | + if data is not None: |
| 212 | + return data |
200 | 213 |
|
201 |
| - start_date = self.create_start_date( |
202 |
| - end_date, self.time_frame, self.window_size |
203 |
| - ) |
| 214 | + # Find closest previous timestamp |
| 215 | + sorted_timestamps = sorted(self.window_cache.keys()) |
204 | 216 |
|
205 |
| - if start_date < self._start_date_data_source: |
206 |
| - raise OperationalException( |
207 |
| - f"Start date {start_date} is before the start date " |
208 |
| - f"of the data source {self._start_date_data_source}" |
209 |
| - ) |
| 217 | + closest_date = None |
| 218 | + for ts in reversed(sorted_timestamps): |
| 219 | + if ts < date: |
| 220 | + closest_date = ts |
| 221 | + break |
210 | 222 |
|
211 |
| - if end_date > self._end_date_data_source: |
212 |
| - raise OperationalException( |
213 |
| - f"End date {end_date} is after the end date " |
214 |
| - f"of the data source {self._end_date_data_source}" |
215 |
| - ) |
216 |
| - |
217 |
| - time_frame = TimeFrame.from_string(self.time_frame) |
218 |
| - start_date = start_date - \ |
219 |
| - timedelta(minutes=time_frame.amount_of_minutes) |
220 |
| - selection = self.data.filter( |
221 |
| - (self.data['Datetime'] >= start_date.strftime(DATETIME_FORMAT)) |
222 |
| - & (self.data['Datetime'] <= end_date.strftime(DATETIME_FORMAT)) |
223 |
| - ) |
224 |
| - return selection |
| 223 | + return self.window_cache.get(closest_date) if closest_date else None |
225 | 224 |
|
226 | 225 | def to_backtest_market_data_source(self) -> BacktestMarketDataSource:
|
227 | 226 | # Ignore this method for now
|
|
0 commit comments