From a97bdcc2e5a2a4b82939b48a88887afcc4960b5e Mon Sep 17 00:00:00 2001 From: Will Holmgren Date: Mon, 18 Feb 2019 20:59:13 -0700 Subject: [PATCH 01/11] initial implementation --- docs/sphinx/source/whatsnew/v0.6.2.rst | 2 +- pvlib/iotools/__init__.py | 1 + pvlib/iotools/crn.py | 54 ++++++++++++++++++++++++++ 3 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 pvlib/iotools/crn.py diff --git a/docs/sphinx/source/whatsnew/v0.6.2.rst b/docs/sphinx/source/whatsnew/v0.6.2.rst index 74f4ca1025..78e5ff15c1 100644 --- a/docs/sphinx/source/whatsnew/v0.6.2.rst +++ b/docs/sphinx/source/whatsnew/v0.6.2.rst @@ -16,7 +16,7 @@ API Changes Enhancements ~~~~~~~~~~~~ - +* Add US CRN data reader to `pvlib.iotools`. Bug fixes ~~~~~~~~~ diff --git a/pvlib/iotools/__init__.py b/pvlib/iotools/__init__.py index 112cc6fbcf..8d48a15568 100644 --- a/pvlib/iotools/__init__.py +++ b/pvlib/iotools/__init__.py @@ -7,3 +7,4 @@ from pvlib.iotools.midc import read_midc_raw_data_from_nrel # noqa: F401 from pvlib.iotools.ecmwf_macc import read_ecmwf_macc # noqa: F401 from pvlib.iotools.ecmwf_macc import get_ecmwf_macc # noqa: F401 +from pvlib.iotools.crn import read_crn # noqa: F401 diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py new file mode 100644 index 0000000000..87260da2c7 --- /dev/null +++ b/pvlib/iotools/crn.py @@ -0,0 +1,54 @@ +"""Functions to read data from the US Climate Reference Network (CRN). +""" + +import pandas as pd + + +HEADERS = 'WBANNO UTC_DATE UTC_TIME LST_DATE LST_TIME CRX_VN LONGITUDE LATITUDE AIR_TEMPERATURE PRECIPITATION SOLAR_RADIATION SR_FLAG SURFACE_TEMPERATURE ST_TYPE ST_FLAG RELATIVE_HUMIDITY RH_FLAG SOIL_MOISTURE_5 SOIL_TEMPERATURE_5 WETNESS WET_FLAG WIND_1_5 WIND_FLAG' # noqa: E501 + + +def read_crn(filename): + """ + Read NOAA USCRN [1] fixed-width file into pandas dataframe. + + Parameters + ---------- + filename: str + filepath or url to read for the tsv file. + + Returns + ------- + data: Dataframe + A dataframe with datetime index and all of the variables listed + in the `VARIABLE_MAP` dict inside of the map_columns function, + along with their associated quality control flags. + + Notes + ----- + CRN files contain 5 minute averages labeled by the interval ending + time. Here, missing data is flagged as NaN, rather than the lowest + possible integer for a field (e.g. -999 or -99) + + References + ---------- + [1] U.S. Climate Reference Network + `https://www.ncdc.noaa.gov/crn/qcdatasets.html `_ + [2] Diamond, H. J. et. al., 2013: U.S. Climate Reference Network after + one decade of operations: status and assessment. Bull. Amer. + Meteor. Soc., 94, 489-498. :doi:`10.1175/BAMS-D-12-00170.1` + """ + + # read in data + data = pd.read_fwf(filename, header=None, names=HEADERS.split(' ')) + + # set index + # UTC_TIME does not have leading 0s, so must zfill(4) to comply + # with %H%M format + dts = data[['UTC_DATE', 'UTC_TIME']].astype(str) + dtindex = pd.to_datetime(dts['UTC_DATE'] + dts['UTC_TIME'].str.zfill(4), + format='%Y%m%d%H%M', utc=True) + data = data.set_index(dtindex) + + # set nans + + return data From bc5ed41640897496df246c3bfc3111b22cdc9e0c Mon Sep 17 00:00:00 2001 From: Will Holmgren Date: Tue, 19 Feb 2019 09:30:40 -0700 Subject: [PATCH 02/11] add crn file reader --- .../data/CRNS0101-05-2019-AZ_Tucson_11_W.txt | 3 ++ pvlib/iotools/crn.py | 34 +++++++++++++++- pvlib/test/test_crn.py | 39 +++++++++++++++++++ 3 files changed, 74 insertions(+), 2 deletions(-) create mode 100644 pvlib/data/CRNS0101-05-2019-AZ_Tucson_11_W.txt create mode 100644 pvlib/test/test_crn.py diff --git a/pvlib/data/CRNS0101-05-2019-AZ_Tucson_11_W.txt b/pvlib/data/CRNS0101-05-2019-AZ_Tucson_11_W.txt new file mode 100644 index 0000000000..51e25459e7 --- /dev/null +++ b/pvlib/data/CRNS0101-05-2019-AZ_Tucson_11_W.txt @@ -0,0 +1,3 @@ +53131 20190101 0005 20181231 1705 3 -111.17 32.24 10.4 0.0 10 0 9.0 C 0 52 0 -99.000 -9999.0 1144 0 2.20 0 +53131 20190101 0010 20181231 1710 3 -111.17 32.24 10.5 0.0 -99.00 0 9.0 C 0 52 0 -99.000 -9999.0 19 0 2.95 0 +53131 20190101 0015 20181231 1715 3 -111.17 32.24 -9999.0 0.0 9 0 8.9 C 0 52 0 -99.000 -9999.0 19 0 3.25 0 \ No newline at end of file diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py index 87260da2c7..6d8d74f31f 100644 --- a/pvlib/iotools/crn.py +++ b/pvlib/iotools/crn.py @@ -2,10 +2,31 @@ """ import pandas as pd +import numpy as np HEADERS = 'WBANNO UTC_DATE UTC_TIME LST_DATE LST_TIME CRX_VN LONGITUDE LATITUDE AIR_TEMPERATURE PRECIPITATION SOLAR_RADIATION SR_FLAG SURFACE_TEMPERATURE ST_TYPE ST_FLAG RELATIVE_HUMIDITY RH_FLAG SOIL_MOISTURE_5 SOIL_TEMPERATURE_5 WETNESS WET_FLAG WIND_1_5 WIND_FLAG' # noqa: E501 +VARIABLE_MAP = { + 'LONGITUDE': 'longitude', + 'LATITUDE': 'latitude', + 'AIR_TEMPERATURE': 'temp_air', + 'SOLAR_RADIATION': 'ghi', + 'SR_FLAG': 'ghi_flag', + 'RELATIVE_HUMIDITY': 'relative_humidity', + 'RH_FLAG': 'relative_humidity_flag', + 'WIND_1_5': 'wind_speed', + 'WIND_FLAG': 'wind_speed_flag' +} + +# specify dtypes for potentially problematic values +DTYPES = { + 'AIR_TEMPERATURE': np.float64, + 'SOLAR_RADIATION': np.float64, + 'RELATIVE_HUMIDITY': np.float64, + 'WIND_1_5': np.float64, +} + def read_crn(filename): """ @@ -27,7 +48,9 @@ def read_crn(filename): ----- CRN files contain 5 minute averages labeled by the interval ending time. Here, missing data is flagged as NaN, rather than the lowest - possible integer for a field (e.g. -999 or -99) + possible integer for a field (e.g. -999 or -99). + Air temperature in deg C. + Wind speed in m/s at a height of 1.5 m above ground level. References ---------- @@ -39,7 +62,8 @@ def read_crn(filename): """ # read in data - data = pd.read_fwf(filename, header=None, names=HEADERS.split(' ')) + data = pd.read_fwf(filename, header=None, names=HEADERS.split(' '), + dtype=DTYPES) # set index # UTC_TIME does not have leading 0s, so must zfill(4) to comply @@ -50,5 +74,11 @@ def read_crn(filename): data = data.set_index(dtindex) # set nans + for val in [-99, -999, -9999]: + data = data.where(data != val, np.nan) + + # rename and drop unwanted columns + data = data.rename(columns=VARIABLE_MAP) + data = data.filter(items=VARIABLE_MAP.values()) return data diff --git a/pvlib/test/test_crn.py b/pvlib/test/test_crn.py new file mode 100644 index 0000000000..531431da76 --- /dev/null +++ b/pvlib/test/test_crn.py @@ -0,0 +1,39 @@ +import inspect +import os + +import pandas as pd +from pandas.util.testing import assert_frame_equal +import numpy as np +from numpy import dtype + +from pvlib.iotools import crn + + +test_dir = os.path.dirname( + os.path.abspath(inspect.getfile(inspect.currentframe()))) +testfile = os.path.join(test_dir, + '../data/CRNS0101-05-2019-AZ_Tucson_11_W.txt') + + +def test_read_crn(): + columns = [ + 'longitude', 'latitude', 'temp_air', 'ghi', 'ghi_flag', + 'relative_humidity', 'relative_humidity_flag', 'wind_speed', + 'wind_speed_flag'] + index = pd.DatetimeIndex(['2019-01-01 00:05:00+00:00', + '2019-01-01 00:10:00+00:00', + '2019-01-01 00:15:00+00:00'], + dtype='datetime64[ns, UTC]', freq=None) + values = np.array([ + [-111.17, 32.24, 10.4, 10., 0, 52., 0, 2.2, 0], + [-111.17, 32.24, 10.5, np.nan, 0, 52, 0, 2.95, 0], + [-111.17, 32.24, np.nan, 9., 0, 52., 0, 3.25, 0]]) + dtypes = [ + dtype('float64'), dtype('float64'), dtype('float64'), + dtype('float64'), dtype('int64'), dtype('float64'), dtype('int64'), + dtype('float64'), dtype('int64')] + expected = pd.DataFrame(values, columns=columns, index=index) + for (col, _dtype) in zip(expected.columns, dtypes): + expected[col] = expected[col].astype(_dtype) + out = crn.read_crn(testfile) + assert_frame_equal(out, expected) From b41ff5c2f46a07c91fc7702f7b7146bfdb8fb6d3 Mon Sep 17 00:00:00 2001 From: Will Holmgren Date: Tue, 19 Feb 2019 09:42:20 -0700 Subject: [PATCH 03/11] add unused cols --- pvlib/iotools/crn.py | 2 -- pvlib/test/test_crn.py | 27 ++++++++++++++++++--------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py index 6d8d74f31f..d763d622c8 100644 --- a/pvlib/iotools/crn.py +++ b/pvlib/iotools/crn.py @@ -77,8 +77,6 @@ def read_crn(filename): for val in [-99, -999, -9999]: data = data.where(data != val, np.nan) - # rename and drop unwanted columns data = data.rename(columns=VARIABLE_MAP) - data = data.filter(items=VARIABLE_MAP.values()) return data diff --git a/pvlib/test/test_crn.py b/pvlib/test/test_crn.py index 531431da76..0a2aa7ad7a 100644 --- a/pvlib/test/test_crn.py +++ b/pvlib/test/test_crn.py @@ -4,7 +4,7 @@ import pandas as pd from pandas.util.testing import assert_frame_equal import numpy as np -from numpy import dtype +from numpy import dtype, nan from pvlib.iotools import crn @@ -17,21 +17,30 @@ def test_read_crn(): columns = [ - 'longitude', 'latitude', 'temp_air', 'ghi', 'ghi_flag', - 'relative_humidity', 'relative_humidity_flag', 'wind_speed', - 'wind_speed_flag'] + 'WBANNO', 'UTC_DATE', 'UTC_TIME', 'LST_DATE', 'LST_TIME', 'CRX_VN', + 'longitude', 'latitude', 'temp_air', 'PRECIPITATION', 'ghi', 'ghi_flag', + 'SURFACE_TEMPERATURE', 'ST_TYPE', 'ST_FLAG', 'relative_humidity', + 'relative_humidity_flag', 'SOIL_MOISTURE_5', 'SOIL_TEMPERATURE_5', + 'WETNESS', 'WET_FLAG', 'wind_speed', 'wind_speed_flag'] index = pd.DatetimeIndex(['2019-01-01 00:05:00+00:00', '2019-01-01 00:10:00+00:00', '2019-01-01 00:15:00+00:00'], dtype='datetime64[ns, UTC]', freq=None) values = np.array([ - [-111.17, 32.24, 10.4, 10., 0, 52., 0, 2.2, 0], - [-111.17, 32.24, 10.5, np.nan, 0, 52, 0, 2.95, 0], - [-111.17, 32.24, np.nan, 9., 0, 52., 0, 3.25, 0]]) + [53131, 20190101, 5, 20181231, 1705, 3, -111.17, 32.24, 10.4, 0.0, + 10.0, 0, 9.0, 'C', 0, 52.0, 0, nan, nan, 1144, 0, 2.2, 0], + [53131, 20190101, 10, 20181231, 1710, 3, -111.17, 32.24, 10.5, + 0.0, nan, 0, 9.0, 'C', 0, 52.0, 0, nan, nan, 19, 0, 2.95, 0], + [53131, 20190101, 15, 20181231, 1715, 3, -111.17, 32.24, nan, 0.0, + 9.0, 0, 8.9, 'C', 0, 52.0, 0, nan, nan, 19, 0, 3.25, 0]]) dtypes = [ + dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'), + dtype('int64'), dtype('int64'), dtype('float64'), dtype('float64'), dtype('float64'), dtype('float64'), dtype('float64'), - dtype('float64'), dtype('int64'), dtype('float64'), dtype('int64'), - dtype('float64'), dtype('int64')] + dtype('int64'), dtype('float64'), dtype('O'), dtype('int64'), + dtype('float64'), dtype('int64'), dtype('float64'), + dtype('float64'), dtype('int64'), dtype('int64'), dtype('float64'), + dtype('int64')] expected = pd.DataFrame(values, columns=columns, index=index) for (col, _dtype) in zip(expected.columns, dtypes): expected[col] = expected[col].astype(_dtype) From b4032a8ab0fb3df16722861f07e05363bccff1f9 Mon Sep 17 00:00:00 2001 From: Will Holmgren Date: Tue, 19 Feb 2019 09:43:52 -0700 Subject: [PATCH 04/11] add to api.rst --- docs/sphinx/source/api.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/sphinx/source/api.rst b/docs/sphinx/source/api.rst index ccfeb80981..58045f441c 100644 --- a/docs/sphinx/source/api.rst +++ b/docs/sphinx/source/api.rst @@ -337,6 +337,7 @@ relevant to solar energy modeling. iotools.read_midc_raw_data_from_nrel iotools.read_ecmwf_macc iotools.get_ecmwf_macc + iotools.read_crn A :py:class:`~pvlib.location.Location` object may be created from metadata in some files. From 4f85a8dcbc471fc79228574633b08bc3eb000119 Mon Sep 17 00:00:00 2001 From: Will Holmgren Date: Tue, 19 Feb 2019 10:21:18 -0700 Subject: [PATCH 05/11] better dtype handling --- pvlib/iotools/crn.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py index d763d622c8..c30d6041e3 100644 --- a/pvlib/iotools/crn.py +++ b/pvlib/iotools/crn.py @@ -3,6 +3,7 @@ import pandas as pd import numpy as np +from numpy import dtype HEADERS = 'WBANNO UTC_DATE UTC_TIME LST_DATE LST_TIME CRX_VN LONGITUDE LATITUDE AIR_TEMPERATURE PRECIPITATION SOLAR_RADIATION SR_FLAG SURFACE_TEMPERATURE ST_TYPE ST_FLAG RELATIVE_HUMIDITY RH_FLAG SOIL_MOISTURE_5 SOIL_TEMPERATURE_5 WETNESS WET_FLAG WIND_1_5 WIND_FLAG' # noqa: E501 @@ -20,12 +21,15 @@ } # specify dtypes for potentially problematic values -DTYPES = { - 'AIR_TEMPERATURE': np.float64, - 'SOLAR_RADIATION': np.float64, - 'RELATIVE_HUMIDITY': np.float64, - 'WIND_1_5': np.float64, -} +DTYPES = [ + dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'), + dtype('int64'), dtype('int64'), dtype('float64'), dtype('float64'), + dtype('float64'), dtype('float64'), dtype('float64'), + dtype('int64'), dtype('float64'), dtype('O'), dtype('int64'), + dtype('float64'), dtype('int64'), dtype('float64'), + dtype('float64'), dtype('int64'), dtype('int64'), dtype('float64'), + dtype('int64') +] def read_crn(filename): @@ -62,8 +66,10 @@ def read_crn(filename): """ # read in data - data = pd.read_fwf(filename, header=None, names=HEADERS.split(' '), - dtype=DTYPES) + data = pd.read_fwf(filename, header=None, names=HEADERS.split(' ')) + # loop here because dtype kwarg not supported in read_fwf until 0.20 + for (col, _dtype) in zip(data.columns, DTYPES): + data[col] = data[col].astype(_dtype) # set index # UTC_TIME does not have leading 0s, so must zfill(4) to comply @@ -72,6 +78,11 @@ def read_crn(filename): dtindex = pd.to_datetime(dts['UTC_DATE'] + dts['UTC_TIME'].str.zfill(4), format='%Y%m%d%H%M', utc=True) data = data.set_index(dtindex) + try: + # to_datetime(utc=True) does not work in older versions of pandas + data = data.tz_localize('UTC') + except TypeError: + pass # set nans for val in [-99, -999, -9999]: From b962780f47bfa78378d474e7c88109d4c2418fcf Mon Sep 17 00:00:00 2001 From: Will Holmgren Date: Tue, 19 Feb 2019 10:53:45 -0700 Subject: [PATCH 06/11] pandas to 0.16. remove py 3.3 classifier --- .travis.yml | 2 +- docs/sphinx/source/whatsnew/v0.6.2.rst | 2 ++ setup.py | 3 +-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index add860d129..66731cf911 100644 --- a/.travis.yml +++ b/.travis.yml @@ -69,7 +69,7 @@ install: pip uninstall numpy --yes; pip uninstall pandas --yes; pip install --no-cache-dir numpy==1.10.1; - pip install --no-cache-dir pandas==0.15.0; + pip install --no-cache-dir pandas==0.16.0; fi - conda list - echo $PATH diff --git a/docs/sphinx/source/whatsnew/v0.6.2.rst b/docs/sphinx/source/whatsnew/v0.6.2.rst index 78e5ff15c1..f9230d23de 100644 --- a/docs/sphinx/source/whatsnew/v0.6.2.rst +++ b/docs/sphinx/source/whatsnew/v0.6.2.rst @@ -9,6 +9,8 @@ release. **Python 2.7 support will end on June 1, 2019**. Releases made after this date will require Python 3. (:issue:`501`) +**Minimum pandas requirement bumped 0.15.0=>0.16.0** + API Changes ~~~~~~~~~~~ diff --git a/setup.py b/setup.py index bd61b42c8d..e1b4cc6e43 100755 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ URL = 'https://github.com/pvlib/pvlib-python' INSTALL_REQUIRES = ['numpy >= 1.10.1', - 'pandas >= 0.15.0', + 'pandas >= 0.16.0', 'pytz', 'six', ] @@ -61,7 +61,6 @@ 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', From 9e63811c37658db683d540240ed5aa40d686a670 Mon Sep 17 00:00:00 2001 From: Will Holmgren Date: Tue, 19 Feb 2019 11:09:30 -0700 Subject: [PATCH 07/11] maybe avoid issue with tz dtype specific to travis --- pvlib/test/test_crn.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pvlib/test/test_crn.py b/pvlib/test/test_crn.py index 0a2aa7ad7a..489fc9adbd 100644 --- a/pvlib/test/test_crn.py +++ b/pvlib/test/test_crn.py @@ -22,10 +22,10 @@ def test_read_crn(): 'SURFACE_TEMPERATURE', 'ST_TYPE', 'ST_FLAG', 'relative_humidity', 'relative_humidity_flag', 'SOIL_MOISTURE_5', 'SOIL_TEMPERATURE_5', 'WETNESS', 'WET_FLAG', 'wind_speed', 'wind_speed_flag'] - index = pd.DatetimeIndex(['2019-01-01 00:05:00+00:00', - '2019-01-01 00:10:00+00:00', - '2019-01-01 00:15:00+00:00'], - dtype='datetime64[ns, UTC]', freq=None) + index = pd.DatetimeIndex(['2019-01-01 00:05:00', + '2019-01-01 00:10:00', + '2019-01-01 00:15:00'], + freq=None).tz_localize('UTC') values = np.array([ [53131, 20190101, 5, 20181231, 1705, 3, -111.17, 32.24, 10.4, 0.0, 10.0, 0, 9.0, 'C', 0, 52.0, 0, nan, nan, 1144, 0, 2.2, 0], From be488419793ed2222c36a7c70a9ca30025bf8a63 Mon Sep 17 00:00:00 2001 From: Will Holmgren Date: Tue, 19 Feb 2019 13:31:51 -0700 Subject: [PATCH 08/11] use fixed width parsing --- .../data/CRNS0101-05-2019-AZ_Tucson_11_W.txt | 7 ++++--- pvlib/iotools/crn.py | 10 ++++++++- pvlib/test/test_crn.py | 21 +++++++++++-------- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/pvlib/data/CRNS0101-05-2019-AZ_Tucson_11_W.txt b/pvlib/data/CRNS0101-05-2019-AZ_Tucson_11_W.txt index 51e25459e7..ad0903b17c 100644 --- a/pvlib/data/CRNS0101-05-2019-AZ_Tucson_11_W.txt +++ b/pvlib/data/CRNS0101-05-2019-AZ_Tucson_11_W.txt @@ -1,3 +1,4 @@ -53131 20190101 0005 20181231 1705 3 -111.17 32.24 10.4 0.0 10 0 9.0 C 0 52 0 -99.000 -9999.0 1144 0 2.20 0 -53131 20190101 0010 20181231 1710 3 -111.17 32.24 10.5 0.0 -99.00 0 9.0 C 0 52 0 -99.000 -9999.0 19 0 2.95 0 -53131 20190101 0015 20181231 1715 3 -111.17 32.24 -9999.0 0.0 9 0 8.9 C 0 52 0 -99.000 -9999.0 19 0 3.25 0 \ No newline at end of file +53131 20190101 1610 20190101 0910 3 -111.17 32.24 -9999.0 0.0 296 0 4.4 C 0 90 0 -99.000 -9999.0 24 0 0.78 0 +53131 20190101 1615 20190101 0915 3 -111.17 32.24 3.3 0.0 183 0 4.0 C 0 87 0 -99.000 -9999.0 1182 0 0.36 0 +53131 20190101 1620 20190101 0920 3 -111.17 32.24 3.5 0.0 340 0 4.3 C 0 83 0 -99.000 -9999.0 1183 0 0.53 0 +53131 20190101 1625 20190101 0925 3 -111.17 32.24 4.0 0.0 393 0 4.8 C 0 81 0 -99.000 -9999.0 1223 0 0.64 0 \ No newline at end of file diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py index c30d6041e3..7898836cfb 100644 --- a/pvlib/iotools/crn.py +++ b/pvlib/iotools/crn.py @@ -20,6 +20,13 @@ 'WIND_FLAG': 'wind_speed_flag' } +# as specified in CRN README.txt file. excludes 1 space between columns +WIDTHS = [5, 8, 4, 8, 4, 6, 7, 7, 7, 7, 6, 1, 7, 1, 1, 5, 1, 7, 7, 5, 1, 6, 1] +# add 1 to make fields contiguous (required by pandas.read_fwf) +WIDTHS = [w + 1 for w in WIDTHS] +# no space after last column +WIDTHS[-1] -= 1 + # specify dtypes for potentially problematic values DTYPES = [ dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'), @@ -66,7 +73,8 @@ def read_crn(filename): """ # read in data - data = pd.read_fwf(filename, header=None, names=HEADERS.split(' ')) + data = pd.read_fwf(filename, header=None, names=HEADERS.split(' '), + widths=WIDTHS) # loop here because dtype kwarg not supported in read_fwf until 0.20 for (col, _dtype) in zip(data.columns, DTYPES): data[col] = data[col].astype(_dtype) diff --git a/pvlib/test/test_crn.py b/pvlib/test/test_crn.py index 489fc9adbd..24012384ef 100644 --- a/pvlib/test/test_crn.py +++ b/pvlib/test/test_crn.py @@ -22,17 +22,20 @@ def test_read_crn(): 'SURFACE_TEMPERATURE', 'ST_TYPE', 'ST_FLAG', 'relative_humidity', 'relative_humidity_flag', 'SOIL_MOISTURE_5', 'SOIL_TEMPERATURE_5', 'WETNESS', 'WET_FLAG', 'wind_speed', 'wind_speed_flag'] - index = pd.DatetimeIndex(['2019-01-01 00:05:00', - '2019-01-01 00:10:00', - '2019-01-01 00:15:00'], + index = pd.DatetimeIndex(['2019-01-01 16:10:00', + '2019-01-01 16:15:00', + '2019-01-01 16:20:00', + '2019-01-01 16:25:00'], freq=None).tz_localize('UTC') values = np.array([ - [53131, 20190101, 5, 20181231, 1705, 3, -111.17, 32.24, 10.4, 0.0, - 10.0, 0, 9.0, 'C', 0, 52.0, 0, nan, nan, 1144, 0, 2.2, 0], - [53131, 20190101, 10, 20181231, 1710, 3, -111.17, 32.24, 10.5, - 0.0, nan, 0, 9.0, 'C', 0, 52.0, 0, nan, nan, 19, 0, 2.95, 0], - [53131, 20190101, 15, 20181231, 1715, 3, -111.17, 32.24, nan, 0.0, - 9.0, 0, 8.9, 'C', 0, 52.0, 0, nan, nan, 19, 0, 3.25, 0]]) + [53131, 20190101, 1610, 20190101, 910, 3, -111.17, 32.24, nan, + 0.0, 296.0, 0, 4.4, 'C', 0, 90.0, 0, nan, nan, 24, 0, 0.78, 0], + [53131, 20190101, 1615, 20190101, 915, 3, -111.17, 32.24, 3.3, + 0.0, 183.0, 0, 4.0, 'C', 0, 87.0, 0, nan, nan, 1182, 0, 0.36, 0], + [53131, 20190101, 1620, 20190101, 920, 3, -111.17, 32.24, 3.5, + 0.0, 340.0, 0, 4.3, 'C', 0, 83.0, 0, nan, nan, 1183, 0, 0.53, 0], + [53131, 20190101, 1625, 20190101, 925, 3, -111.17, 32.24, 4.0, + 0.0, 393.0, 0, 4.8, 'C', 0, 81.0, 0, nan, nan, 1223, 0, 0.64, 0]]) dtypes = [ dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'), dtype('float64'), dtype('float64'), From 70ee7130267e45ebf4935634a777d9ec1beba10b Mon Sep 17 00:00:00 2001 From: Will Holmgren Date: Thu, 21 Feb 2019 10:48:36 -0700 Subject: [PATCH 09/11] style --- pvlib/iotools/crn.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py index 7898836cfb..07df9cd332 100644 --- a/pvlib/iotools/crn.py +++ b/pvlib/iotools/crn.py @@ -6,7 +6,12 @@ from numpy import dtype -HEADERS = 'WBANNO UTC_DATE UTC_TIME LST_DATE LST_TIME CRX_VN LONGITUDE LATITUDE AIR_TEMPERATURE PRECIPITATION SOLAR_RADIATION SR_FLAG SURFACE_TEMPERATURE ST_TYPE ST_FLAG RELATIVE_HUMIDITY RH_FLAG SOIL_MOISTURE_5 SOIL_TEMPERATURE_5 WETNESS WET_FLAG WIND_1_5 WIND_FLAG' # noqa: E501 +HEADERS = ( + 'WBANNO UTC_DATE UTC_TIME LST_DATE LST_TIME CRX_VN LONGITUDE LATITUDE ' + 'AIR_TEMPERATURE PRECIPITATION SOLAR_RADIATION SR_FLAG ' + 'SURFACE_TEMPERATURE ST_TYPE ST_FLAG RELATIVE_HUMIDITY RH_FLAG ' + 'SOIL_MOISTURE_5 SOIL_TEMPERATURE_5 WETNESS WET_FLAG WIND_1_5 WIND_FLAG' +) VARIABLE_MAP = { 'LONGITUDE': 'longitude', @@ -29,13 +34,10 @@ # specify dtypes for potentially problematic values DTYPES = [ - dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'), - dtype('int64'), dtype('int64'), dtype('float64'), dtype('float64'), - dtype('float64'), dtype('float64'), dtype('float64'), - dtype('int64'), dtype('float64'), dtype('O'), dtype('int64'), - dtype('float64'), dtype('int64'), dtype('float64'), - dtype('float64'), dtype('int64'), dtype('int64'), dtype('float64'), - dtype('int64') + 'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64', 'float64', + 'float64', 'float64', 'float64', 'int64', 'float64', 'O', 'int64', + 'float64', 'int64', 'float64', 'float64', 'int64', 'int64', 'float64', + 'int64' ] From 6e85833c1e22e53a7a1160ac64efbec0a17ce514 Mon Sep 17 00:00:00 2001 From: Will Holmgren Date: Thu, 21 Feb 2019 10:49:52 -0700 Subject: [PATCH 10/11] unused import --- pvlib/iotools/crn.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py index 07df9cd332..358074b6c1 100644 --- a/pvlib/iotools/crn.py +++ b/pvlib/iotools/crn.py @@ -3,7 +3,6 @@ import pandas as pd import numpy as np -from numpy import dtype HEADERS = ( From 9f9d2fb797e758e9d77c057860d0ea7d40102e1b Mon Sep 17 00:00:00 2001 From: Will Holmgren Date: Thu, 21 Feb 2019 11:10:21 -0700 Subject: [PATCH 11/11] more style and doc issues --- pvlib/iotools/crn.py | 30 +++++++++++++++++------------- pvlib/test/test_crn.py | 19 ++++++++++--------- 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/pvlib/iotools/crn.py b/pvlib/iotools/crn.py index 358074b6c1..cb08fb6df2 100644 --- a/pvlib/iotools/crn.py +++ b/pvlib/iotools/crn.py @@ -42,35 +42,39 @@ def read_crn(filename): """ - Read NOAA USCRN [1] fixed-width file into pandas dataframe. + Read NOAA USCRN [1]_ [2]_ fixed-width file into pandas dataframe. Parameters ---------- filename: str - filepath or url to read for the tsv file. + filepath or url to read for the fixed-width file. Returns ------- data: Dataframe - A dataframe with datetime index and all of the variables listed - in the `VARIABLE_MAP` dict inside of the map_columns function, - along with their associated quality control flags. + A dataframe with DatetimeIndex and all of the variables in the + file. Notes ----- CRN files contain 5 minute averages labeled by the interval ending time. Here, missing data is flagged as NaN, rather than the lowest - possible integer for a field (e.g. -999 or -99). - Air temperature in deg C. - Wind speed in m/s at a height of 1.5 m above ground level. + possible integer for a field (e.g. -999 or -99). Air temperature in + deg C. Wind speed in m/s at a height of 1.5 m above ground level. + + Variables corresponding to standard pvlib variables are renamed, + e.g. `SOLAR_RADIATION` becomes `ghi`. See the + `pvlib.iotools.crn.VARIABLE_MAP` dict for the complete mapping. References ---------- - [1] U.S. Climate Reference Network - `https://www.ncdc.noaa.gov/crn/qcdatasets.html `_ - [2] Diamond, H. J. et. al., 2013: U.S. Climate Reference Network after - one decade of operations: status and assessment. Bull. Amer. - Meteor. Soc., 94, 489-498. :doi:`10.1175/BAMS-D-12-00170.1` + .. [1] U.S. Climate Reference Network + `https://www.ncdc.noaa.gov/crn/qcdatasets.html + `_ + + .. [2] Diamond, H. J. et. al., 2013: U.S. Climate Reference Network + after one decade of operations: status and assessment. Bull. + Amer. Meteor. Soc., 94, 489-498. :doi:`10.1175/BAMS-D-12-00170.1` """ # read in data diff --git a/pvlib/test/test_crn.py b/pvlib/test/test_crn.py index 24012384ef..9cccf3568a 100644 --- a/pvlib/test/test_crn.py +++ b/pvlib/test/test_crn.py @@ -18,7 +18,8 @@ def test_read_crn(): columns = [ 'WBANNO', 'UTC_DATE', 'UTC_TIME', 'LST_DATE', 'LST_TIME', 'CRX_VN', - 'longitude', 'latitude', 'temp_air', 'PRECIPITATION', 'ghi', 'ghi_flag', + 'longitude', 'latitude', 'temp_air', 'PRECIPITATION', 'ghi', + 'ghi_flag', 'SURFACE_TEMPERATURE', 'ST_TYPE', 'ST_FLAG', 'relative_humidity', 'relative_humidity_flag', 'SOIL_MOISTURE_5', 'SOIL_TEMPERATURE_5', 'WETNESS', 'WET_FLAG', 'wind_speed', 'wind_speed_flag'] @@ -28,14 +29,14 @@ def test_read_crn(): '2019-01-01 16:25:00'], freq=None).tz_localize('UTC') values = np.array([ - [53131, 20190101, 1610, 20190101, 910, 3, -111.17, 32.24, nan, - 0.0, 296.0, 0, 4.4, 'C', 0, 90.0, 0, nan, nan, 24, 0, 0.78, 0], - [53131, 20190101, 1615, 20190101, 915, 3, -111.17, 32.24, 3.3, - 0.0, 183.0, 0, 4.0, 'C', 0, 87.0, 0, nan, nan, 1182, 0, 0.36, 0], - [53131, 20190101, 1620, 20190101, 920, 3, -111.17, 32.24, 3.5, - 0.0, 340.0, 0, 4.3, 'C', 0, 83.0, 0, nan, nan, 1183, 0, 0.53, 0], - [53131, 20190101, 1625, 20190101, 925, 3, -111.17, 32.24, 4.0, - 0.0, 393.0, 0, 4.8, 'C', 0, 81.0, 0, nan, nan, 1223, 0, 0.64, 0]]) + [53131, 20190101, 1610, 20190101, 910, 3, -111.17, 32.24, nan, + 0.0, 296.0, 0, 4.4, 'C', 0, 90.0, 0, nan, nan, 24, 0, 0.78, 0], + [53131, 20190101, 1615, 20190101, 915, 3, -111.17, 32.24, 3.3, + 0.0, 183.0, 0, 4.0, 'C', 0, 87.0, 0, nan, nan, 1182, 0, 0.36, 0], + [53131, 20190101, 1620, 20190101, 920, 3, -111.17, 32.24, 3.5, + 0.0, 340.0, 0, 4.3, 'C', 0, 83.0, 0, nan, nan, 1183, 0, 0.53, 0], + [53131, 20190101, 1625, 20190101, 925, 3, -111.17, 32.24, 4.0, + 0.0, 393.0, 0, 4.8, 'C', 0, 81.0, 0, nan, nan, 1223, 0, 0.64, 0]]) dtypes = [ dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'), dtype('float64'), dtype('float64'),