Skip to content

add SOLRAD data parser to iotools #667

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Mar 1, 2019
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/sphinx/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ relevant to solar energy modeling.
iotools.read_ecmwf_macc
iotools.get_ecmwf_macc
iotools.read_crn
iotools.read_solrad

A :py:class:`~pvlib.location.Location` object may be created from metadata
in some files.
Expand Down
1 change: 1 addition & 0 deletions docs/sphinx/source/whatsnew/v0.6.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ API Changes
Enhancements
~~~~~~~~~~~~
* Add US CRN data reader to `pvlib.iotools`.
* Add SOLRAD data reader to `pvlib.iotools`.

Bug fixes
~~~~~~~~~
Expand Down
6 changes: 6 additions & 0 deletions pvlib/data/abq19056.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Albuquerque
35.03796 -106.62211 1617 -7 version 1
2019 56 2 25 0 0 0.000 79.30 104.5 0 60.5 0 97.8 0 5.9 0 43.6 0 0.382 2.280 0.431 0.066
2019 56 2 25 0 1 0.017 79.49 102.6 0 59.7 0 96.2 0 5.7 0 43.6 0 0.764 1.800 0.431 0.063
2019 56 2 25 0 2 0.033 79.68 102.1 0 65.8 0 94.8 0 5.5 0 43.6 0 0.382 4.079 0.323 0.062
2019 56 2 25 0 3 0.050 79.87 102.6 0 76.3 0 -9999.9 0 5.3 0 43.6 0 0.509 1.920 0.215 0.059
6 changes: 6 additions & 0 deletions pvlib/data/msn19056.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Madison
43.07250 -89.41133 271 -6 version 1
2019 56 2 25 0 0 0.000 94.28 -2.3 0 0.0 0 0.4 0 -9999.9 1 -9999.9 1 187.2 0 265.6 0 265.3 0 0.000 0.000 0.000 -9999.900 0.002 26.000 27.000
2019 56 2 25 0 1 0.017 94.46 -2.3 0 0.0 0 0.1 0 -9999.9 1 -9999.9 1 188.2 0 265.6 0 265.3 0 0.133 0.128 0.223 -9999.900 0.001 26.000 72.000
2019 56 2 25 0 2 0.033 94.64 -2.7 0 -0.2 0 0.0 0 -9999.9 1 -9999.9 1 187.6 0 265.6 0 265.3 0 0.000 0.257 0.000 -9999.900 0.001 24.000 42.000
2019 56 2 25 0 3 0.050 94.82 -2.5 0 0.4 0 0.0 0 -9999.9 1 -9999.9 1 187.3 0 265.6 0 265.3 0 0.266 0.385 0.000 -9999.900 0.001 26.000 48.000
1 change: 1 addition & 0 deletions pvlib/iotools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@
from pvlib.iotools.ecmwf_macc import read_ecmwf_macc # noqa: F401
from pvlib.iotools.ecmwf_macc import get_ecmwf_macc # noqa: F401
from pvlib.iotools.crn import read_crn # noqa: F401
from pvlib.iotools.solrad import read_solrad # noqa: F401
122 changes: 122 additions & 0 deletions pvlib/iotools/solrad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
"""Functions to read data from the NOAA SOLRAD network.
"""

import numpy as np
import pandas as pd

# pvlib conventions
BASE_HEADERS = (
'year', 'julian_day', 'month', 'day', 'hour', 'minute', 'decimal_time',
'solar_zenith', 'ghi', 'ghi_flag', 'dni', 'dni_flag', 'dhi', 'dhi_flag',
'uvb', 'uvb_flag', 'uvb_temp', 'uvb_temp_flag'
)

# following README_SOLRAD.txt variable names for remaining
STD_HEADERS = ('std_dw_psp', 'std_direct', 'std_diffuse', 'std_uvb')

HEADERS = BASE_HEADERS + STD_HEADERS

DPIR_HEADERS = ('dpir', 'dpir_flag', 'dpirc', 'dpirc_flag', 'dpird',
'dpird_flag')

MADISON_HEADERS = BASE_HEADERS + DPIR_HEADERS + STD_HEADERS + (
'std_dpir', 'std_dpirc', 'std_dpird')


# as specified in README_SOLRAD.txt file. excludes 1 space between columns
WIDTHS = [4, 3] + 4*[2] + [6, 6] + 5*[7, 1] + 4*[9]
MADISON_WIDTHS = [4, 3] + 4*[2] + [6, 6] + 8*[7, 1] + 7*[9]
# add 1 to make fields contiguous (required by pandas.read_fwf)
WIDTHS = [w + 1 for w in WIDTHS]
MADISON_WIDTHS = [w + 1 for w in MADISON_WIDTHS]
# no space after last column
WIDTHS[-1] -= 1
MADISON_WIDTHS[-1] -= 1

DTYPES = [
'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64',
'float64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64',
'float64', 'int64', 'float64', 'int64', 'float64', 'float64',
'float64', 'float64']

MADISON_DTYPES = [
'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64', 'float64',
'float64', 'int64', 'float64', 'int64', 'float64', 'int64', 'float64',
'int64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64',
'float64', 'int64', 'float64', 'float64', 'float64', 'float64', 'float64',
'float64', 'float64']


def read_solrad(filename):
"""
Read NOAA SOLRAD [1]_ [2]_ fixed-width file into pandas dataframe.

Parameters
----------
filename: str
filepath or url to read for the fixed-width file.

Returns
-------
data: Dataframe
A dataframe with DatetimeIndex and all of the variables in the
file.

Notes
-----
SOLRAD data resolution is described by the README_SOLRAD.txt:
"Before 1-jan. 2015 the data were reported as 3-min averages;
on and after 1-Jan. 2015, SOLRAD data are reported as 1-min.
averages of 1-sec. samples."
Here, missing data is flagged as NaN, rather than -9999.9.

References
----------
.. [1] NOAA SOLRAD Network
`https://www.esrl.noaa.gov/gmd/grad/solrad/index.html
<https://www.esrl.noaa.gov/gmd/grad/solrad/index.html>`_

.. [2] B. B. Hicks et. al., (1996), The NOAA Integrated Surface
Irradiance Study (ISIS). A New Surface Radiation Monitoring
Program. Bull. Amer. Meteor. Soc., 77, 2857-2864.
:doi:`10.1175/1520-0477(1996)077<2857:TNISIS>2.0.CO;2`
"""
if 'msn' in filename:
names = MADISON_HEADERS
widths = MADISON_WIDTHS
dtypes = MADISON_DTYPES
else:
names = HEADERS
widths = WIDTHS
dtypes = DTYPES

# read in data
data = pd.read_fwf(filename, header=None, skiprows=2, names=names,
widths=widths, na_values=-9999.9)

# loop here because dtype kwarg not supported in read_fwf until 0.20
for (col, _dtype) in zip(data.columns, dtypes):
ser = data[col].astype(_dtype)
if _dtype == 'float64':
# older verions of pandas/numpy read '-9999.9' as
# -9999.8999999999996 and fail to set nan in read_fwf,
# so manually set nan
ser = ser.where(ser > -9999, other=np.nan)
data[col] = ser

# set index
# columns do not have leading 0s, so must zfill(2) to comply
# with %m%d%H%M format
dts = data[['month', 'day', 'hour', 'minute']].astype(str).apply(
lambda x: x.str.zfill(2))
dtindex = pd.to_datetime(
data['year'].astype(str) + dts['month'] + dts['day'] + dts['hour'] +
dts['minute'], format='%Y%m%d%H%M', utc=True)
data = data.set_index(dtindex)
try:
# to_datetime(utc=True) does not work in older versions of pandas
data = data.tz_localize('UTC')
except TypeError:
pass

return data
106 changes: 106 additions & 0 deletions pvlib/test/test_solrad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import inspect
import os

import pandas as pd
from pandas.util.testing import assert_frame_equal
import numpy as np
from numpy import nan

import pytest

from pvlib.iotools import solrad


test_dir = os.path.dirname(
os.path.abspath(inspect.getfile(inspect.currentframe())))
testfile = os.path.join(test_dir, '../data/abq19056.dat')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be testfile = os.path.join(test_dir, 'data', '703165TY.csv') for cross-platform compatibility

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The path you've specified would be pvlib/test/data/703165TY.csv, but the data is in pvlib/data/abq19056.dat. I am pretty sure that os.path handles the .. specification as needed for the platform. The appveyor builds do not object to this pattern.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Of course. I substituted a file I have to test. Doesn't work on Windows. C:\python\pvlib-dev\pvlib-python\pvlib\../data/abq19056.dat

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm... Appveyor tests run on Windows so I am quite confused.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm... the string is interpreted correctly when a file operation is performed, so it's OK as coded. I still think my suggestion is an improvement.


import os
import inspect

test_dir = os.path.dirname(
    os.path.abspath(inspect.getfile(inspect.currentframe())))

print(test_dir)

testfile = os.path.join(test_dir, '../data/703165TY.csv')
print(testfile)

with open(testfile) as infile:
    r = infile.readline()
    print(r)

produces

C:\python\pvlib-dev\pvlib-python\pvlib\test
C:\python\pvlib-dev\pvlib-python\pvlib\test\../data/703165TY.csv
703165,"SAND POINT",AK,-9.0,55.317,-160.517,7

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is '..' acceptable? testfile = os.path.join(test_dir, '..', 'data', '703165TY.csv')

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that would maintain the OS-specific separator so the string doesn't have a mix of \ and /

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or use os.pardir to avoid the ..

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. I thought os.path.join was smarter than it was because the tests have never complained about that pattern in the past. Looking forward to the Python 3 only days and using pathlib.

testfile_mad = os.path.join(test_dir, '../data/msn19056.dat')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see above comment



columns = [
'year', 'julian_day', 'month', 'day', 'hour', 'minute', 'decimal_time',
'solar_zenith', 'ghi', 'ghi_flag', 'dni', 'dni_flag', 'dhi', 'dhi_flag',
'uvb', 'uvb_flag', 'uvb_temp', 'uvb_temp_flag', 'std_dw_psp', 'std_direct',
'std_diffuse', 'std_uvb']
index = pd.DatetimeIndex(['2019-02-25 00:00:00',
'2019-02-25 00:01:00',
'2019-02-25 00:02:00',
'2019-02-25 00:03:00'],
freq=None).tz_localize('UTC')
values = np.array([
[2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00, 0.000e+00,
0.000e+00, 7.930e+01, 1.045e+02, 0.000e+00, 6.050e+01, 0.000e+00,
9.780e+01, 0.000e+00, 5.900e+00, 0.000e+00, 4.360e+01, 0.000e+00,
3.820e-01, 2.280e+00, 4.310e-01, 6.000e-02],
[2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00, 1.000e+00,
1.700e-02, 7.949e+01, 1.026e+02, 0.000e+00, 5.970e+01, 0.000e+00,
9.620e+01, 0.000e+00, 5.700e+00, 0.000e+00, 4.360e+01, 0.000e+00,
7.640e-01, 1.800e+00, 4.310e-01, 6.000e-02],
[2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00, 2.000e+00,
3.300e-02, 7.968e+01, 1.021e+02, 0.000e+00, 6.580e+01, 0.000e+00,
9.480e+01, 0.000e+00, 5.500e+00, 0.000e+00, 4.360e+01, 0.000e+00,
3.820e-01, 4.079e+00, 3.230e-01, 6.000e-02],
[2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00, 3.000e+00,
5.000e-02, 7.987e+01, 1.026e+02, 0.000e+00, 7.630e+01, 0.000e+00,
nan, 0.000e+00, 5.300e+00, 0.000e+00, 4.360e+01, 0.000e+00,
5.090e-01, 1.920e+00, 2.150e-01, 5.000e-02]])
dtypes = [
'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64',
'float64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64',
'float64', 'int64', 'float64', 'int64', 'float64', 'float64',
'float64', 'float64']

columns_mad = [
'year', 'julian_day', 'month', 'day', 'hour', 'minute', 'decimal_time',
'solar_zenith', 'ghi', 'ghi_flag', 'dni', 'dni_flag', 'dhi', 'dhi_flag',
'uvb', 'uvb_flag', 'uvb_temp', 'uvb_temp_flag', 'dpir', 'dpir_flag',
'dpirc', 'dpirc_flag', 'dpird', 'dpird_flag', 'std_dw_psp',
'std_direct', 'std_diffuse', 'std_uvb', 'std_dpir', 'std_dpirc',
'std_dpird']
values_mad = np.array([
[ 2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00,
0.000e+00, 0.000e+00, 9.428e+01, -2.300e+00, 0.000e+00,
0.000e+00, 0.000e+00, 4.000e-01, 0.000e+00, nan,
1.000e+00, nan, 1.000e+00, 1.872e+02, 0.000e+00,
2.656e+02, 0.000e+00, 2.653e+02, 0.000e+00, 0.000e+00,
0.000e+00, 0.000e+00, nan, 2.000e-03, 2.600e+01,
2.700e+01],
[ 2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00,
1.000e+00, 1.700e-02, 9.446e+01, -2.300e+00, 0.000e+00,
0.000e+00, 0.000e+00, 1.000e-01, 0.000e+00, nan,
1.000e+00, nan, 1.000e+00, 1.882e+02, 0.000e+00,
2.656e+02, 0.000e+00, 2.653e+02, 0.000e+00, 1.330e-01,
1.280e-01, 2.230e-01, nan, 1.000e-03, 2.600e+01,
7.200e+01],
[ 2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00,
2.000e+00, 3.300e-02, 9.464e+01, -2.700e+00, 0.000e+00,
-2.000e-01, 0.000e+00, 0.000e+00, 0.000e+00, nan,
1.000e+00, nan, 1.000e+00, 1.876e+02, 0.000e+00,
2.656e+02, 0.000e+00, 2.653e+02, 0.000e+00, 0.000e+00,
2.570e-01, 0.000e+00, nan, 1.000e-03, 2.400e+01,
4.200e+01],
[ 2.019e+03, 5.600e+01, 2.000e+00, 2.500e+01, 0.000e+00,
3.000e+00, 5.000e-02, 9.482e+01, -2.500e+00, 0.000e+00,
4.000e-01, 0.000e+00, 0.000e+00, 0.000e+00, nan,
1.000e+00, nan, 1.000e+00, 1.873e+02, 0.000e+00,
2.656e+02, 0.000e+00, 2.653e+02, 0.000e+00, 2.660e-01,
3.850e-01, 0.000e+00, nan, 1.000e-03, 2.600e+01,
4.800e+01]])
dtypes_mad = [
'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64', 'float64',
'float64', 'int64', 'float64', 'int64', 'float64', 'int64', 'float64',
'int64', 'float64', 'int64', 'float64', 'int64', 'float64', 'int64',
'float64', 'int64', 'float64', 'float64', 'float64', 'float64', 'float64',
'float64', 'float64']


@pytest.mark.parametrize('testfile,index,columns,values,dtypes', [
(testfile, index, columns, values, dtypes),
(testfile_mad, index, columns_mad, values_mad, dtypes_mad)
])
def test_read_solrad(testfile, index, columns, values, dtypes):
expected = pd.DataFrame(values, columns=columns, index=index)
for (col, _dtype) in zip(expected.columns, dtypes):
expected[col] = expected[col].astype(_dtype)
out = solrad.read_solrad(testfile)
assert_frame_equal(out, expected)