From af07bd16fef5aa59c780b371ea8366abdd83c333 Mon Sep 17 00:00:00 2001 From: Leland Boeman Date: Mon, 6 May 2019 11:08:01 -0700 Subject: [PATCH 1/9] parse 5minute, 15 minute and hourly srml data --- pvlib/iotools/srml.py | 37 ++++++++++++++++++++++++++++++------- pvlib/test/test_srml.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 7 deletions(-) diff --git a/pvlib/iotools/srml.py b/pvlib/iotools/srml.py index 864455effb..7615a1608e 100644 --- a/pvlib/iotools/srml.py +++ b/pvlib/iotools/srml.py @@ -134,11 +134,17 @@ def format_index(df): year = int(df.columns[1]) df_doy = df[df.columns[0]] # Times are expressed as integers from 1-2400, we convert to 0-2359 by - # subracting one and then correcting the minutes at each former hour. - df_time = df[df.columns[1]] - 1 - fifty_nines = df_time % 100 == 99 - times = df_time.where(~fifty_nines, df_time - 40) - + # subracting the length of one interval and then correcting the times + # at each former hour. interval_length is determined by taking the + # difference of the first two rows of the time column. + interval_length = int(df[df.columns[1]][:2].diff()[1]) + df_time = df[df.columns[1]] - interval_length + if interval_length == 100: + # Hourly files do not require fixing the former hour timestamps. + times = df_time + else: + old_hours = df_time % 100 == (100 - interval_length) + times = df_time.where(~old_hours, df_time - 40) times = times.apply(lambda x: '{:04.0f}'.format(x)) doy = df_doy.apply(lambda x: '{:03.0f}'.format(x)) dts = pd.to_datetime(str(year) + '-' + doy + '-' + times, @@ -161,14 +167,31 @@ def read_srml_month_from_solardat(station, year, month, filetype='PO'): month: int Month to request data for. filetype: string - SRML file type to gather. 'RO' and 'PO' are the - only minute resolution files. + SRML file type to gather. See notes for explanation. Returns ------- data: pd.DataFrame One month of data from SRML. + Notes + ----- + File types designate the time interval of a file and if it is + raw or processed data. For instance, `RO` designates raw one + minute data and `PO` designates processed one minute data. The + availability of file types varies between sites. Below is a + table of file types and their time intervals. See [1] for site + information. + + ============= ============ ================== + time interval raw filetype processed filetype + ============= ============ ================== + 1 minute RO PO + 5 minute RF PF + 15 minute RQ PQ + hourly RH PH + ============= ============ ================== + References ---------- [1] University of Oregon Solar Radiation Measurement Laboratory diff --git a/pvlib/test/test_srml.py b/pvlib/test/test_srml.py index 29688715c3..657c1104e4 100644 --- a/pvlib/test/test_srml.py +++ b/pvlib/test/test_srml.py @@ -73,3 +73,33 @@ def test_read_srml_month_from_solardat(): file_data = srml.read_srml(url) requested = srml.read_srml_month_from_solardat('EU', 2018, 1) assert file_data.equals(requested) + + +@pytest.mark.parametrize('station, year, month, filetype', [ + ('TW', 2019, 4, 'RQ'), +]) +def test_15_minute_dt_index( + station, year, month, filetype): + data = srml.read_srml_month_from_solardat(station, year, month, filetype) + start = pd.Timestamp('{:04d}{:02d}01 00:00'.format(year, month)) + start = start.tz_localize('Etc/GMT+8') + end = pd.Timestamp('{:04d}{:02d}30 23:45'.format(year, month)) + end = end.tz_localize('Etc/GMT+8') + assert data.index[0] == start + assert data.index[-1] == end + assert (data.index[3::4].minute == 45).all() + + +@pytest.mark.parametrize('station, year, month, filetype', [ + ('CD', 1986, 4, 'PH'), +]) +def test_hourly_dt_index( + station, year, month, filetype): + data = srml.read_srml_month_from_solardat(station, year, month, filetype) + start = pd.Timestamp('{:04d}{:02d}01 00:00'.format(year, month)) + start = start.tz_localize('Etc/GMT+8') + end = pd.Timestamp('{:04d}{:02d}30 23:00'.format(year, month)) + end = end.tz_localize('Etc/GMT+8') + assert data.index[0] == start + assert data.index[-1] == end + assert (data.index.minute == 0).all() From 9d7f6577b69eab416b0bb158e04a787bbdb6cf59 Mon Sep 17 00:00:00 2001 From: Leland Boeman Date: Mon, 6 May 2019 11:12:31 -0700 Subject: [PATCH 2/9] fix akward comment --- pvlib/iotools/srml.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pvlib/iotools/srml.py b/pvlib/iotools/srml.py index 7615a1608e..49e83bae26 100644 --- a/pvlib/iotools/srml.py +++ b/pvlib/iotools/srml.py @@ -176,12 +176,11 @@ def read_srml_month_from_solardat(station, year, month, filetype='PO'): Notes ----- - File types designate the time interval of a file and if it is - raw or processed data. For instance, `RO` designates raw one - minute data and `PO` designates processed one minute data. The - availability of file types varies between sites. Below is a - table of file types and their time intervals. See [1] for site - information. + File types designate the time interval of a file and if it contains + raw or processed data. For instance, `RO` designates raw, one minute + data and `PO` designates processed one minute data. The availability + of file types varies between sites. Below is a table of file types + and their time intervals. See [1] for site information. ============= ============ ================== time interval raw filetype processed filetype From e8944f5c733799e438ce2a946bd23ab6a458cb92 Mon Sep 17 00:00:00 2001 From: Leland Boeman Date: Mon, 6 May 2019 11:20:09 -0700 Subject: [PATCH 3/9] update note about index shift, update whatsnew --- docs/sphinx/source/whatsnew/v0.6.2.rst | 3 +++ pvlib/iotools/srml.py | 10 +++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/sphinx/source/whatsnew/v0.6.2.rst b/docs/sphinx/source/whatsnew/v0.6.2.rst index c06c3640c0..443f2b3427 100644 --- a/docs/sphinx/source/whatsnew/v0.6.2.rst +++ b/docs/sphinx/source/whatsnew/v0.6.2.rst @@ -51,6 +51,8 @@ Bug fixes :py:func:`~pvlib.irradiance.klucher` and :py:func:`~pvlib.pvsystem.calcparams_desoto`. (:issue:`698`) * Fix :py:class:`~pvlib.forecast.NDFD` model by updating variables. +* Fix :py:func:`~pvlib.iotools.srml.format_index` to parse non + one-minute data correctly. (:issue:`709`) Testing @@ -67,3 +69,4 @@ Contributors * Kevin Anderson (:ghuser:`kevinsa5`) * :ghuser:`bentomlinson` * Jonathan Gaffiot (:ghuser:`jgaffiot`) +* Leland Boeman (:ghuser: `lboeman`) diff --git a/pvlib/iotools/srml.py b/pvlib/iotools/srml.py index 49e83bae26..9e67574263 100644 --- a/pvlib/iotools/srml.py +++ b/pvlib/iotools/srml.py @@ -42,11 +42,11 @@ def read_srml(filename): Notes ----- - The time index is shifted back one minute to account for 2400 hours, - and to avoid time parsing errors on leap years. The returned data - values should be understood to occur during the interval from the - time of the row until the time of the next row. This is consistent - with pandas' default labeling behavior. + The time index is shifted back by one interval to account for the + daily endtime of 2400, and to avoid time parsing errors on leap + years. The returned data values should be understood to occur + during the interval from the time of the row until the time of the + next row. This is consistent with pandas' default labeling behavior. See SRML's `Archival Files`_ page for more information. From a1ca00b63df46b701848eb59da696d11b456777f Mon Sep 17 00:00:00 2001 From: Leland Boeman Date: Mon, 6 May 2019 11:23:07 -0700 Subject: [PATCH 4/9] add network decorator to new tests --- pvlib/test/test_srml.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pvlib/test/test_srml.py b/pvlib/test/test_srml.py index 657c1104e4..2e92863c77 100644 --- a/pvlib/test/test_srml.py +++ b/pvlib/test/test_srml.py @@ -75,6 +75,7 @@ def test_read_srml_month_from_solardat(): assert file_data.equals(requested) +@network @pytest.mark.parametrize('station, year, month, filetype', [ ('TW', 2019, 4, 'RQ'), ]) @@ -90,6 +91,7 @@ def test_15_minute_dt_index( assert (data.index[3::4].minute == 45).all() +@network @pytest.mark.parametrize('station, year, month, filetype', [ ('CD', 1986, 4, 'PH'), ]) From c32c8b32d3ff6539c1916567cc2b47bbafad4f1a Mon Sep 17 00:00:00 2001 From: Leland Boeman Date: Mon, 6 May 2019 11:52:01 -0700 Subject: [PATCH 5/9] comment tweak --- pvlib/iotools/srml.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pvlib/iotools/srml.py b/pvlib/iotools/srml.py index 9e67574263..d20f5d2247 100644 --- a/pvlib/iotools/srml.py +++ b/pvlib/iotools/srml.py @@ -44,9 +44,10 @@ def read_srml(filename): ----- The time index is shifted back by one interval to account for the daily endtime of 2400, and to avoid time parsing errors on leap - years. The returned data values should be understood to occur - during the interval from the time of the row until the time of the - next row. This is consistent with pandas' default labeling behavior. + years. The returned data values are labeled by the left endpoint of + interval, and should be understood to occur during the interval from + the time of the row until the time of the next row. This is consistent + with pandas' default labeling behavior. See SRML's `Archival Files`_ page for more information. From 55be463d3bedce25cb7ef89dc0414ff1d863291c Mon Sep 17 00:00:00 2001 From: Leland Boeman Date: Mon, 6 May 2019 12:02:11 -0700 Subject: [PATCH 6/9] fix tests for py27-min --- pvlib/test/test_srml.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/pvlib/test/test_srml.py b/pvlib/test/test_srml.py index 2e92863c77..9924701609 100644 --- a/pvlib/test/test_srml.py +++ b/pvlib/test/test_srml.py @@ -76,15 +76,11 @@ def test_read_srml_month_from_solardat(): @network -@pytest.mark.parametrize('station, year, month, filetype', [ - ('TW', 2019, 4, 'RQ'), -]) -def test_15_minute_dt_index( - station, year, month, filetype): - data = srml.read_srml_month_from_solardat(station, year, month, filetype) - start = pd.Timestamp('{:04d}{:02d}01 00:00'.format(year, month)) +def test_15_minute_dt_index(): + data = srml.read_srml_month_from_solardat('TW', 2019, 4, 'RQ') + start = pd.Timestamp('20190401 00:00') start = start.tz_localize('Etc/GMT+8') - end = pd.Timestamp('{:04d}{:02d}30 23:45'.format(year, month)) + end = pd.Timestamp('20190430 23:45') end = end.tz_localize('Etc/GMT+8') assert data.index[0] == start assert data.index[-1] == end @@ -92,15 +88,11 @@ def test_15_minute_dt_index( @network -@pytest.mark.parametrize('station, year, month, filetype', [ - ('CD', 1986, 4, 'PH'), -]) -def test_hourly_dt_index( - station, year, month, filetype): - data = srml.read_srml_month_from_solardat(station, year, month, filetype) - start = pd.Timestamp('{:04d}{:02d}01 00:00'.format(year, month)) +def test_hourly_dt_index(): + data = srml.read_srml_month_from_solardat('CD', 1986, 4, 'PH') + start = pd.Timestamp('19860401 00:00') start = start.tz_localize('Etc/GMT+8') - end = pd.Timestamp('{:04d}{:02d}30 23:00'.format(year, month)) + end = pd.Timestamp('19860430 23:00') end = end.tz_localize('Etc/GMT+8') assert data.index[0] == start assert data.index[-1] == end From da2ce6bb9de8a77ccaaea2159f64cc9414f5f3ad Mon Sep 17 00:00:00 2001 From: Leland Boeman Date: Mon, 6 May 2019 14:00:49 -0700 Subject: [PATCH 7/9] more readable interval_length expression --- pvlib/iotools/srml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pvlib/iotools/srml.py b/pvlib/iotools/srml.py index d20f5d2247..ecc38540e0 100644 --- a/pvlib/iotools/srml.py +++ b/pvlib/iotools/srml.py @@ -138,7 +138,7 @@ def format_index(df): # subracting the length of one interval and then correcting the times # at each former hour. interval_length is determined by taking the # difference of the first two rows of the time column. - interval_length = int(df[df.columns[1]][:2].diff()[1]) + interval_length = df[df.columns[1]][1] - df[df.columns[1]][0] df_time = df[df.columns[1]] - interval_length if interval_length == 100: # Hourly files do not require fixing the former hour timestamps. From de734c0bb89c8b7e2958c82e8f6bfbaf0b595844 Mon Sep 17 00:00:00 2001 From: Leland Boeman Date: Tue, 7 May 2019 08:41:37 -0700 Subject: [PATCH 8/9] comment date parsing logic --- pvlib/iotools/srml.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pvlib/iotools/srml.py b/pvlib/iotools/srml.py index ecc38540e0..8c5cb9dc23 100644 --- a/pvlib/iotools/srml.py +++ b/pvlib/iotools/srml.py @@ -138,13 +138,23 @@ def format_index(df): # subracting the length of one interval and then correcting the times # at each former hour. interval_length is determined by taking the # difference of the first two rows of the time column. + # e.g. The first two rows of hourly data are 100 and 200 + # so interval_length is 100. interval_length = df[df.columns[1]][1] - df[df.columns[1]][0] df_time = df[df.columns[1]] - interval_length if interval_length == 100: # Hourly files do not require fixing the former hour timestamps. times = df_time else: - old_hours = df_time % 100 == (100 - interval_length) + # Because hours are represented by some multiple of 100, shifting + # results in invalid values. + # + # e.g. 200 (for 02:00) shifted becomes 185, the desired result is + # 145 (for 01:45) + # + # So we find all times with minutes greater than 60 and remove 40 + # to correct to valid times. + old_hours = df_time % 100 > 60 times = df_time.where(~old_hours, df_time - 40) times = times.apply(lambda x: '{:04.0f}'.format(x)) doy = df_doy.apply(lambda x: '{:03.0f}'.format(x)) From 3e220ea2eca105c1fd20818510f7d8e04d809167 Mon Sep 17 00:00:00 2001 From: Leland Boeman Date: Tue, 7 May 2019 08:55:29 -0700 Subject: [PATCH 9/9] more clarification of shifting logic --- pvlib/iotools/srml.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pvlib/iotools/srml.py b/pvlib/iotools/srml.py index 8c5cb9dc23..6312dcd42e 100644 --- a/pvlib/iotools/srml.py +++ b/pvlib/iotools/srml.py @@ -149,8 +149,8 @@ def format_index(df): # Because hours are represented by some multiple of 100, shifting # results in invalid values. # - # e.g. 200 (for 02:00) shifted becomes 185, the desired result is - # 145 (for 01:45) + # e.g. 200 (for 02:00) shifted by 15 minutes becomes 185, the + # desired result is 145 (for 01:45) # # So we find all times with minutes greater than 60 and remove 40 # to correct to valid times.