From e4ca1fb93cc9f728479f8fc1d850f8c47dc2d4ae Mon Sep 17 00:00:00 2001
From: Micah Sandusky <32111103+micah-prime@users.noreply.github.com>
Date: Fri, 31 May 2024 11:25:56 -0600
Subject: [PATCH] Add reader for CUES data (#106)
* first pass at a reader for CUES data
* Get cues data validation working. Next we need to handle variables that return multiple instruments from the API
* Get instrument selection working
* Get daily data test working
* Add citation for data. add test for hourly data and instrument specific variables
* Adding more variables
* flake8
* move geosphere tutorial
---
README.rst | 1 +
metloom/pointdata/__init__.py | 4 +-
metloom/pointdata/base.py | 17 ++-
metloom/pointdata/cues.py | 170 ++++++++++++++++++++++
metloom/variables.py | 52 +++++++
tests/data/cues_mocks/daily_response.txt | 4 +
tests/data/cues_mocks/hourly_response.txt | 25 ++++
tests/test_cues.py | 122 ++++++++++++++++
8 files changed, 391 insertions(+), 4 deletions(-)
create mode 100644 metloom/pointdata/cues.py
create mode 100644 tests/data/cues_mocks/daily_response.txt
create mode 100644 tests/data/cues_mocks/hourly_response.txt
create mode 100644 tests/test_cues.py
diff --git a/README.rst b/README.rst
index 321c454..46f00b9 100644
--- a/README.rst
+++ b/README.rst
@@ -43,6 +43,7 @@ Features
* `MESOWEST `_
* `USGS `_
* `GEOSPHERE AUSTRIA `_
+ * `UCSB CUES `_
* `MET NORWAY `_
Requirements
diff --git a/metloom/pointdata/__init__.py b/metloom/pointdata/__init__.py
index ea739ca..ef9397c 100644
--- a/metloom/pointdata/__init__.py
+++ b/metloom/pointdata/__init__.py
@@ -4,9 +4,11 @@
from .mesowest import MesowestPointData
from .usgs import USGSPointData
from .geosphere_austria import GeoSphereHistPointData, GeoSphereCurrentPointData
+from .norway import MetNorwayPointData
+from .cues import CuesLevel1
__all__ = [
"PointData", "PointDataCollection", "CDECPointData", "SnotelPointData",
"MesowestPointData", "USGSPointData", "GeoSphereHistPointData",
- "GeoSphereCurrentPointData"
+ "GeoSphereCurrentPointData", "CuesLevel1", "MetNorwayPointData"
]
diff --git a/metloom/pointdata/base.py b/metloom/pointdata/base.py
index 3856cd9..883adb6 100644
--- a/metloom/pointdata/base.py
+++ b/metloom/pointdata/base.py
@@ -12,6 +12,10 @@
LOG = logging.getLogger("metloom.pointdata.base")
+class DataValidationError(RuntimeError):
+ pass
+
+
class PointDataCollection:
"""
Iterator class for a collection of PointData objects.
@@ -251,17 +255,24 @@ def validate_sensor_df(cls, gdf: gpd.GeoDataFrame):
index_names = gdf.index.names
# check for required indexes
for ei in cls.EXPECTED_INDICES:
- assert ei in index_names
+ if ei not in index_names:
+ raise DataValidationError(
+ f"{ei} was expected, but not found as an"
+ f" index of the final dataframe"
+ )
# check for expected columns - avoid modifying at class level
expected_columns = copy.deepcopy(cls.EXPECTED_COLUMNS)
possible_extras = ["measurementDate", "quality_code"]
for pe in possible_extras:
if pe in columns:
expected_columns += [pe]
-
for column in expected_columns:
if column not in columns:
- raise ValueError(f"Expected {column} not found")
+ raise DataValidationError(
+ f"{column} was expected, but not found as a"
+ f" column of the final dataframe"
+ )
+
remaining_columns = [c for c in columns if c not in expected_columns]
# make sure all variables have a units column as well
for rc in remaining_columns:
diff --git a/metloom/pointdata/cues.py b/metloom/pointdata/cues.py
new file mode 100644
index 0000000..bf89cdb
--- /dev/null
+++ b/metloom/pointdata/cues.py
@@ -0,0 +1,170 @@
+"""
+A reader for the Mammoth CUES site
+https://snow.ucsb.edu/index.php/description/
+
+"""
+from datetime import datetime, timezone, timedelta
+from io import StringIO
+from typing import List
+import logging
+import geopandas as gpd
+import pandas as pd
+import requests
+
+from metloom.pointdata import PointData
+from metloom.variables import CuesLevel1Variables, SensorDescription
+
+LOG = logging.getLogger(__name__)
+
+
+class CuesLevel1(PointData):
+ """
+ Implement PointData methods for CUES level 1 data
+ https://snow.ucsb.edu/index.php/description/
+ https://snow.ucsb.edu/index.php/query-db/
+ https://doi.org/10.21424/R4159Q
+
+ """
+ ALLOWED_VARIABLES = CuesLevel1Variables
+ URL = "https://snow.ucsb.edu/index.php/query-db/"
+ DATASOURCE = "UCSB CUES"
+
+ def __init__(self, station_id, name, metadata=None):
+ """
+ See docstring for PointData.__init__
+ """
+ super(CuesLevel1, self).__init__(
+ station_id or "CUES",
+ name or "CUES",
+ metadata=metadata
+ )
+ self._raw_metadata = None
+ self._tzinfo = timezone(timedelta(hours=-8.0))
+
+ def _get_one_vaiable(
+ self, start_date, end_date, variables: SensorDescription,
+ period, method
+ ):
+ dt_fmt = "%Y-%m-%d"
+ data = dict(
+ # table="downward looking solar radiation",
+ table=variables.code, start=start_date.strftime(dt_fmt),
+ end=end_date.strftime(dt_fmt), interval=period,
+ method=method, output="CSV",
+ category="Measurement"
+ )
+ resp = requests.post(self.URL, data=data)
+ resp.raise_for_status()
+ return resp.content.decode()
+
+ def _sensor_response_to_df(self, data, variable):
+
+ # Check for no data
+ if not data.replace("\n", ""):
+ LOG.debug(f"No data returned for {variable}")
+ return None
+
+ # Parse the 'csv' string returned
+ df = pd.read_csv(
+ StringIO(data), delimiter=",", skip_blank_lines=True,
+ comment="#"
+ )
+ columns = list(df.columns.values)
+ # check that we have the expected columns for the
+ # instrument since multiple may be returned
+ if variable.instrument:
+ var_column = None
+ for c in columns:
+ if variable.instrument in c:
+ var_column = c
+ break
+ if var_column is None:
+ LOG.error(f"Returned columns were {columns}")
+ raise RuntimeError(
+ f"Could not find column for expected"
+ f" instrument {variable.instrument}"
+ )
+ elif len(columns) > 2:
+ raise RuntimeError(
+ f"Expected 2 columns, got {columns}"
+ )
+ else:
+ # just use the second of two columns
+ var_column = columns[1]
+ column_map = {
+ columns[0]: "datetime",
+ var_column: variable.name
+ }
+ # Parse the units out of the returned column name
+ units = columns[1].split(";")[-1].replace(
+ "(", ""
+ ).replace(")", "").strip()
+ # Rename to desired columns and add a units column
+ df.rename(columns=column_map, inplace=True)
+
+ # handle the timezone convert to UTC
+ df["datetime"] = pd.to_datetime(df["datetime"])
+ df["datetime"] = df["datetime"].apply(self._handle_df_tz)
+
+ df = df.set_index("datetime")
+ df = df.loc[:, [variable.name]]
+ df[f"{variable.name}_units"] = [units] * len(df)
+
+ return df
+
+ def _get_data(
+ self, start_date, end_date, variables: List[SensorDescription],
+ period,
+ ):
+ df = pd.DataFrame()
+ df.index.name = "datetime"
+ for variable in variables:
+ method = "sum" if variable.accumulated else "average"
+ data = self._get_one_vaiable(
+ start_date, end_date, variable, period, method
+ )
+ df_var = self._sensor_response_to_df(data, variable)
+ if df_var is not None:
+ df[df_var.columns] = df_var
+ # Set the site info
+ df["site"] = [self.id] * len(df)
+ df["datasource"] = [self.DATASOURCE] * len(df)
+ # Make this a geodataframe
+ df = gpd.GeoDataFrame(df, geometry=[self.metadata] * len(df))
+ df = df.reset_index().set_index(["datetime", "site"])
+ self.validate_sensor_df(df)
+ return df
+
+ def get_daily_data(self, start_date: datetime, end_date: datetime,
+ variables: List[SensorDescription]):
+ return self._get_data(
+ start_date, end_date, variables, "day"
+ )
+
+ def get_hourly_data(self, start_date: datetime, end_date: datetime,
+ variables: List[SensorDescription]):
+ return self._get_data(
+ start_date, end_date, variables, "hr"
+ )
+
+ def get_snow_course_data(self, start_date: datetime, end_date: datetime,
+ variables: List[SensorDescription]):
+ raise NotImplementedError("Not implemented")
+
+ def _get_metadata(self):
+ pass
+
+ def points_from_geometry(self, geometry: gpd.GeoDataFrame,
+ variables: List[SensorDescription],
+ snow_courses=False, within_geometry=True,
+ buffer=0.0):
+ raise NotImplementedError("Not implemented")
+
+ @property
+ def metadata(self):
+ """
+ Hardcode the metadata
+ """
+ return gpd.points_from_xy(
+ [-119.029128], [37.643093], [9661]
+ )[0]
diff --git a/metloom/variables.py b/metloom/variables.py
index c1d4c5f..5e9cdd4 100644
--- a/metloom/variables.py
+++ b/metloom/variables.py
@@ -13,6 +13,15 @@ class SensorDescription:
accumulated: bool = False # whether or not the data is accumulated
+@dataclass(eq=True, frozen=True)
+class InstrumentDescription(SensorDescription):
+ """
+ Extend the Sensor Description to include instrument
+ """
+ # description of the specific instrument for the variable
+ instrument: str = None
+
+
class VariableBase:
"""
Base class to store all variables for a specific datasource. Each
@@ -252,6 +261,49 @@ class GeoSphereHistVariables(VariableBase):
)
+class CuesLevel1Variables(VariableBase):
+ """
+ Variables for CUES level1 data
+ https://snow.ucsb.edu/index.php/query-db/
+
+ Some variables report back with multiple instruments. See `UPSHORTWAVE`
+ and `UPSHORTWAVE2` for two instrument specific implementations
+ of the same variable.
+
+ """
+ TEMP = InstrumentDescription("air temperature", "AIR TEMP")
+ RH = InstrumentDescription("RH", "RELATIVE HUMIDITY")
+ LASERSNOWDEPTH = InstrumentDescription("laser snow depth", "LASER SNOWDEPTH")
+ SNOWDEPTH = InstrumentDescription("snow depth", "SNOWDEPTH")
+ NEWSNOWDEPTH = InstrumentDescription("new snow depth", "NEW SNOWDEPTH")
+ SWE = InstrumentDescription("Snow Pillow (DWR) SWE", "SWE")
+ # PRECIPITATION = InstrumentDescription(
+ # "nied", "Precipitation Total", accumulated=True
+ # )
+ TEMPSURFSNOW = InstrumentDescription(
+ "snow surface temperature", "SNOW SURFACE TEMPERATURE"
+ )
+ DOWNSHORTWAVE = InstrumentDescription(
+ "downward looking solar radiation", "DOWNWARD SHORTWAVE RADIATION",
+ )
+ UPSHORTWAVE = InstrumentDescription(
+ "upward looking solar radiation", "UPWARD SHORTWAVE RADIATION",
+ instrument="Eppley Lab precision spectral pyranometer"
+ )
+ UPSHORTWAVE2 = InstrumentDescription(
+ "upward looking solar radiation", "UPWARD SHORTWAVE RADIATION 2",
+ instrument="uplooking Sunshine pyranometer direct and diffus"
+ )
+ DOWNSHORTWAVEIR = InstrumentDescription(
+ "downward looking near-IR radiation",
+ "DOWNWARD NIR SHORTWAVE RADIATION",
+ )
+ UPSHORTWAVEIR = InstrumentDescription(
+ "upward looking near-IR radiation",
+ "UPWARD NIR SHORTWAVE RADIATION",
+ )
+
+
class MetNorwayVariables(VariableBase):
"""
See https://frost.met.no/concepts2.html#calculationmethod
diff --git a/tests/data/cues_mocks/daily_response.txt b/tests/data/cues_mocks/daily_response.txt
new file mode 100644
index 0000000..fe77df6
--- /dev/null
+++ b/tests/data/cues_mocks/daily_response.txt
@@ -0,0 +1,4 @@
+MeasDateTime,downward looking solar radiation; downlooking Eppley Lab precision spectral pyranometer; (Watts/meter^2)
+2020-03-15 00:00:00,95.64
+2020-03-16 00:00:00,86.87
+2020-03-17 00:00:00,182.23
diff --git a/tests/data/cues_mocks/hourly_response.txt b/tests/data/cues_mocks/hourly_response.txt
new file mode 100644
index 0000000..623c13d
--- /dev/null
+++ b/tests/data/cues_mocks/hourly_response.txt
@@ -0,0 +1,25 @@
+MeasDateTime,upward looking solar radiation; uplooking Eppley Lab precision spectral pyranometer; (Watts/meter^2),upward looking solar radiation; uplooking Sunshine pyranometer direct and diffuse; (Watts/meter^2)
+2020-04-01 00:00:00,-9.78,0.53
+2020-04-01 01:00:00,-9.90,0.47
+2020-04-01 02:00:00,-10.13,0.58
+2020-04-01 03:00:00,-10.20,0.68
+2020-04-01 04:00:00,-10.08,0.66
+2020-04-01 05:00:00,-5.17,3.98
+2020-04-01 06:00:00,118.68,112.46
+2020-04-01 07:00:00,347.40,347.80
+2020-04-01 08:00:00,568.13,567.65
+2020-04-01 09:00:00,754.97,747.07
+2020-04-01 10:00:00,771.33,764.55
+2020-04-01 11:00:00,433.62,413.53
+2020-04-01 12:00:00,341.02,314.57
+2020-04-01 13:00:00,268.41,240.90
+2020-04-01 14:00:00,233.50,212.72
+2020-04-01 15:00:00,291.97,286.49
+2020-04-01 16:00:00,353.31,365.16
+2020-04-01 17:00:00,139.36,151.85
+2020-04-01 18:00:00,-3.04,7.08
+2020-04-01 19:00:00,-5.78,0.65
+2020-04-01 20:00:00,-5.40,0.74
+2020-04-01 21:00:00,-5.84,0.77
+2020-04-01 22:00:00,-6.30,0.92
+2020-04-01 23:00:00,-8.44,0.87
diff --git a/tests/test_cues.py b/tests/test_cues.py
new file mode 100644
index 0000000..889d3ed
--- /dev/null
+++ b/tests/test_cues.py
@@ -0,0 +1,122 @@
+from datetime import datetime
+from os.path import join
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+import geopandas as gpd
+import pandas as pd
+import pytest
+
+from metloom.pointdata import CuesLevel1
+from metloom.variables import CuesLevel1Variables
+from tests.test_point_data import BasePointDataTest
+
+DATA_DIR = str(Path(__file__).parent.joinpath("data/cues_mocks"))
+
+
+class TestCuesStation(BasePointDataTest):
+
+ @pytest.fixture(scope="function")
+ def station(self):
+ return CuesLevel1(None, None)
+
+ @pytest.fixture(scope="class")
+ def expected_meta(self):
+ return gpd.points_from_xy(
+ [-119.029128], [37.643093], [9661]
+ )[0]
+
+ @pytest.fixture(scope="class")
+ def daily_expected(self, expected_meta):
+ df = gpd.GeoDataFrame.from_dict(
+ [
+ {
+ "datetime": pd.Timestamp("2020-03-15 08:00:00+0000", tz="UTC"),
+ "DOWNWARD SHORTWAVE RADIATION": 95.64,
+ },
+ {
+ "datetime": pd.Timestamp("2020-03-16 08:00:00+0000", tz="UTC"),
+ "DOWNWARD SHORTWAVE RADIATION": 86.87,
+ },
+ {
+ "datetime": pd.Timestamp("2020-03-17 08:00:00+0000", tz="UTC"),
+ "DOWNWARD SHORTWAVE RADIATION": 182.23,
+ },
+
+ ],
+ geometry=[expected_meta] * 3,
+ )
+ df["DOWNWARD SHORTWAVE RADIATION_units"] = ["Watts/meter^2"] * len(df)
+ df["site"] = ["CUES"] * len(df)
+ df["datasource"] = ["UCSB CUES"] * len(df)
+ # needed to reorder the columns for the pd testing compare
+ df = df.filter(
+ [
+ "datetime",
+ "geometry",
+ "DOWNWARD SHORTWAVE RADIATION",
+ "site",
+ "DOWNWARD SHORTWAVE RADIATION_units",
+ "datasource",
+ ]
+ )
+ df.set_index(keys=["datetime", "site"], inplace=True)
+ return df
+
+ @classmethod
+ def get_url_response(cls, resp="daily"):
+ if resp == 'daily':
+ with open(join(DATA_DIR, "daily_response.txt")) as fp:
+ data = fp.read()
+ elif resp == 'hourly':
+ with open(join(DATA_DIR, "hourly_response.txt")) as fp:
+ data = fp.read()
+ else:
+ raise RuntimeError(f"{resp} is an unknown option")
+
+ obj = MagicMock()
+ obj.content = data.encode()
+ return obj
+
+ def test_get_metadata(self, station, expected_meta):
+ assert expected_meta == station.metadata
+
+ def test_get_daily_data(self, station, daily_expected):
+ with patch("metloom.pointdata.cues.requests") as mock_requests:
+ mock_requests.post.side_effect = [
+ self.get_url_response(),
+ ]
+ response = station.get_daily_data(
+ datetime(2020, 3, 15),
+ datetime(2020, 3, 17),
+ [CuesLevel1Variables.DOWNSHORTWAVE],
+ )
+ pd.testing.assert_frame_equal(
+ response.sort_index(axis=1),
+ daily_expected.sort_index(axis=1)
+ )
+
+ def test_get_hourly_data(self, station):
+ """
+ Test that we get hourly data correctly.
+ This also uses the `UPSHORTWAVE` variable so we can test
+ that the instrument specific implementation of variables is working.
+ """
+ with patch("metloom.pointdata.cues.requests") as mock_requests:
+ mock_requests.post.side_effect = [
+ self.get_url_response(resp="hourly"),
+ ]
+ resp = station.get_hourly_data(
+ datetime(2020, 4, 1), datetime(2020, 4, 2),
+ [CuesLevel1Variables.UPSHORTWAVE],
+ )
+ resp = resp.reset_index()
+ assert resp["datetime"].values[0] == pd.to_datetime("2020-04-01 08")
+ assert resp["datetime"].values[-1] == pd.to_datetime("2020-04-02 07")
+ assert resp["UPWARD SHORTWAVE RADIATION"].values[0] == -9.78
+ assert resp["UPWARD SHORTWAVE RADIATION"].values[-1] == -8.44
+ assert all(resp["site"].values == "CUES")
+
+ def test_points_from_geometry_failure(self, station):
+ with pytest.raises(NotImplementedError):
+ station.points_from_geometry(None, None)