From e4ca1fb93cc9f728479f8fc1d850f8c47dc2d4ae Mon Sep 17 00:00:00 2001 From: Micah Sandusky <32111103+micah-prime@users.noreply.github.com> Date: Fri, 31 May 2024 11:25:56 -0600 Subject: [PATCH] Add reader for CUES data (#106) * first pass at a reader for CUES data * Get cues data validation working. Next we need to handle variables that return multiple instruments from the API * Get instrument selection working * Get daily data test working * Add citation for data. add test for hourly data and instrument specific variables * Adding more variables * flake8 * move geosphere tutorial --- README.rst | 1 + metloom/pointdata/__init__.py | 4 +- metloom/pointdata/base.py | 17 ++- metloom/pointdata/cues.py | 170 ++++++++++++++++++++++ metloom/variables.py | 52 +++++++ tests/data/cues_mocks/daily_response.txt | 4 + tests/data/cues_mocks/hourly_response.txt | 25 ++++ tests/test_cues.py | 122 ++++++++++++++++ 8 files changed, 391 insertions(+), 4 deletions(-) create mode 100644 metloom/pointdata/cues.py create mode 100644 tests/data/cues_mocks/daily_response.txt create mode 100644 tests/data/cues_mocks/hourly_response.txt create mode 100644 tests/test_cues.py diff --git a/README.rst b/README.rst index 321c454..46f00b9 100644 --- a/README.rst +++ b/README.rst @@ -43,6 +43,7 @@ Features * `MESOWEST `_ * `USGS `_ * `GEOSPHERE AUSTRIA `_ + * `UCSB CUES `_ * `MET NORWAY `_ Requirements diff --git a/metloom/pointdata/__init__.py b/metloom/pointdata/__init__.py index ea739ca..ef9397c 100644 --- a/metloom/pointdata/__init__.py +++ b/metloom/pointdata/__init__.py @@ -4,9 +4,11 @@ from .mesowest import MesowestPointData from .usgs import USGSPointData from .geosphere_austria import GeoSphereHistPointData, GeoSphereCurrentPointData +from .norway import MetNorwayPointData +from .cues import CuesLevel1 __all__ = [ "PointData", "PointDataCollection", "CDECPointData", "SnotelPointData", "MesowestPointData", "USGSPointData", "GeoSphereHistPointData", - "GeoSphereCurrentPointData" + "GeoSphereCurrentPointData", "CuesLevel1", "MetNorwayPointData" ] diff --git a/metloom/pointdata/base.py b/metloom/pointdata/base.py index 3856cd9..883adb6 100644 --- a/metloom/pointdata/base.py +++ b/metloom/pointdata/base.py @@ -12,6 +12,10 @@ LOG = logging.getLogger("metloom.pointdata.base") +class DataValidationError(RuntimeError): + pass + + class PointDataCollection: """ Iterator class for a collection of PointData objects. @@ -251,17 +255,24 @@ def validate_sensor_df(cls, gdf: gpd.GeoDataFrame): index_names = gdf.index.names # check for required indexes for ei in cls.EXPECTED_INDICES: - assert ei in index_names + if ei not in index_names: + raise DataValidationError( + f"{ei} was expected, but not found as an" + f" index of the final dataframe" + ) # check for expected columns - avoid modifying at class level expected_columns = copy.deepcopy(cls.EXPECTED_COLUMNS) possible_extras = ["measurementDate", "quality_code"] for pe in possible_extras: if pe in columns: expected_columns += [pe] - for column in expected_columns: if column not in columns: - raise ValueError(f"Expected {column} not found") + raise DataValidationError( + f"{column} was expected, but not found as a" + f" column of the final dataframe" + ) + remaining_columns = [c for c in columns if c not in expected_columns] # make sure all variables have a units column as well for rc in remaining_columns: diff --git a/metloom/pointdata/cues.py b/metloom/pointdata/cues.py new file mode 100644 index 0000000..bf89cdb --- /dev/null +++ b/metloom/pointdata/cues.py @@ -0,0 +1,170 @@ +""" +A reader for the Mammoth CUES site +https://snow.ucsb.edu/index.php/description/ + +""" +from datetime import datetime, timezone, timedelta +from io import StringIO +from typing import List +import logging +import geopandas as gpd +import pandas as pd +import requests + +from metloom.pointdata import PointData +from metloom.variables import CuesLevel1Variables, SensorDescription + +LOG = logging.getLogger(__name__) + + +class CuesLevel1(PointData): + """ + Implement PointData methods for CUES level 1 data + https://snow.ucsb.edu/index.php/description/ + https://snow.ucsb.edu/index.php/query-db/ + https://doi.org/10.21424/R4159Q + + """ + ALLOWED_VARIABLES = CuesLevel1Variables + URL = "https://snow.ucsb.edu/index.php/query-db/" + DATASOURCE = "UCSB CUES" + + def __init__(self, station_id, name, metadata=None): + """ + See docstring for PointData.__init__ + """ + super(CuesLevel1, self).__init__( + station_id or "CUES", + name or "CUES", + metadata=metadata + ) + self._raw_metadata = None + self._tzinfo = timezone(timedelta(hours=-8.0)) + + def _get_one_vaiable( + self, start_date, end_date, variables: SensorDescription, + period, method + ): + dt_fmt = "%Y-%m-%d" + data = dict( + # table="downward looking solar radiation", + table=variables.code, start=start_date.strftime(dt_fmt), + end=end_date.strftime(dt_fmt), interval=period, + method=method, output="CSV", + category="Measurement" + ) + resp = requests.post(self.URL, data=data) + resp.raise_for_status() + return resp.content.decode() + + def _sensor_response_to_df(self, data, variable): + + # Check for no data + if not data.replace("\n", ""): + LOG.debug(f"No data returned for {variable}") + return None + + # Parse the 'csv' string returned + df = pd.read_csv( + StringIO(data), delimiter=",", skip_blank_lines=True, + comment="#" + ) + columns = list(df.columns.values) + # check that we have the expected columns for the + # instrument since multiple may be returned + if variable.instrument: + var_column = None + for c in columns: + if variable.instrument in c: + var_column = c + break + if var_column is None: + LOG.error(f"Returned columns were {columns}") + raise RuntimeError( + f"Could not find column for expected" + f" instrument {variable.instrument}" + ) + elif len(columns) > 2: + raise RuntimeError( + f"Expected 2 columns, got {columns}" + ) + else: + # just use the second of two columns + var_column = columns[1] + column_map = { + columns[0]: "datetime", + var_column: variable.name + } + # Parse the units out of the returned column name + units = columns[1].split(";")[-1].replace( + "(", "" + ).replace(")", "").strip() + # Rename to desired columns and add a units column + df.rename(columns=column_map, inplace=True) + + # handle the timezone convert to UTC + df["datetime"] = pd.to_datetime(df["datetime"]) + df["datetime"] = df["datetime"].apply(self._handle_df_tz) + + df = df.set_index("datetime") + df = df.loc[:, [variable.name]] + df[f"{variable.name}_units"] = [units] * len(df) + + return df + + def _get_data( + self, start_date, end_date, variables: List[SensorDescription], + period, + ): + df = pd.DataFrame() + df.index.name = "datetime" + for variable in variables: + method = "sum" if variable.accumulated else "average" + data = self._get_one_vaiable( + start_date, end_date, variable, period, method + ) + df_var = self._sensor_response_to_df(data, variable) + if df_var is not None: + df[df_var.columns] = df_var + # Set the site info + df["site"] = [self.id] * len(df) + df["datasource"] = [self.DATASOURCE] * len(df) + # Make this a geodataframe + df = gpd.GeoDataFrame(df, geometry=[self.metadata] * len(df)) + df = df.reset_index().set_index(["datetime", "site"]) + self.validate_sensor_df(df) + return df + + def get_daily_data(self, start_date: datetime, end_date: datetime, + variables: List[SensorDescription]): + return self._get_data( + start_date, end_date, variables, "day" + ) + + def get_hourly_data(self, start_date: datetime, end_date: datetime, + variables: List[SensorDescription]): + return self._get_data( + start_date, end_date, variables, "hr" + ) + + def get_snow_course_data(self, start_date: datetime, end_date: datetime, + variables: List[SensorDescription]): + raise NotImplementedError("Not implemented") + + def _get_metadata(self): + pass + + def points_from_geometry(self, geometry: gpd.GeoDataFrame, + variables: List[SensorDescription], + snow_courses=False, within_geometry=True, + buffer=0.0): + raise NotImplementedError("Not implemented") + + @property + def metadata(self): + """ + Hardcode the metadata + """ + return gpd.points_from_xy( + [-119.029128], [37.643093], [9661] + )[0] diff --git a/metloom/variables.py b/metloom/variables.py index c1d4c5f..5e9cdd4 100644 --- a/metloom/variables.py +++ b/metloom/variables.py @@ -13,6 +13,15 @@ class SensorDescription: accumulated: bool = False # whether or not the data is accumulated +@dataclass(eq=True, frozen=True) +class InstrumentDescription(SensorDescription): + """ + Extend the Sensor Description to include instrument + """ + # description of the specific instrument for the variable + instrument: str = None + + class VariableBase: """ Base class to store all variables for a specific datasource. Each @@ -252,6 +261,49 @@ class GeoSphereHistVariables(VariableBase): ) +class CuesLevel1Variables(VariableBase): + """ + Variables for CUES level1 data + https://snow.ucsb.edu/index.php/query-db/ + + Some variables report back with multiple instruments. See `UPSHORTWAVE` + and `UPSHORTWAVE2` for two instrument specific implementations + of the same variable. + + """ + TEMP = InstrumentDescription("air temperature", "AIR TEMP") + RH = InstrumentDescription("RH", "RELATIVE HUMIDITY") + LASERSNOWDEPTH = InstrumentDescription("laser snow depth", "LASER SNOWDEPTH") + SNOWDEPTH = InstrumentDescription("snow depth", "SNOWDEPTH") + NEWSNOWDEPTH = InstrumentDescription("new snow depth", "NEW SNOWDEPTH") + SWE = InstrumentDescription("Snow Pillow (DWR) SWE", "SWE") + # PRECIPITATION = InstrumentDescription( + # "nied", "Precipitation Total", accumulated=True + # ) + TEMPSURFSNOW = InstrumentDescription( + "snow surface temperature", "SNOW SURFACE TEMPERATURE" + ) + DOWNSHORTWAVE = InstrumentDescription( + "downward looking solar radiation", "DOWNWARD SHORTWAVE RADIATION", + ) + UPSHORTWAVE = InstrumentDescription( + "upward looking solar radiation", "UPWARD SHORTWAVE RADIATION", + instrument="Eppley Lab precision spectral pyranometer" + ) + UPSHORTWAVE2 = InstrumentDescription( + "upward looking solar radiation", "UPWARD SHORTWAVE RADIATION 2", + instrument="uplooking Sunshine pyranometer direct and diffus" + ) + DOWNSHORTWAVEIR = InstrumentDescription( + "downward looking near-IR radiation", + "DOWNWARD NIR SHORTWAVE RADIATION", + ) + UPSHORTWAVEIR = InstrumentDescription( + "upward looking near-IR radiation", + "UPWARD NIR SHORTWAVE RADIATION", + ) + + class MetNorwayVariables(VariableBase): """ See https://frost.met.no/concepts2.html#calculationmethod diff --git a/tests/data/cues_mocks/daily_response.txt b/tests/data/cues_mocks/daily_response.txt new file mode 100644 index 0000000..fe77df6 --- /dev/null +++ b/tests/data/cues_mocks/daily_response.txt @@ -0,0 +1,4 @@ +MeasDateTime,downward looking solar radiation; downlooking Eppley Lab precision spectral pyranometer; (Watts/meter^2) +2020-03-15 00:00:00,95.64 +2020-03-16 00:00:00,86.87 +2020-03-17 00:00:00,182.23 diff --git a/tests/data/cues_mocks/hourly_response.txt b/tests/data/cues_mocks/hourly_response.txt new file mode 100644 index 0000000..623c13d --- /dev/null +++ b/tests/data/cues_mocks/hourly_response.txt @@ -0,0 +1,25 @@ +MeasDateTime,upward looking solar radiation; uplooking Eppley Lab precision spectral pyranometer; (Watts/meter^2),upward looking solar radiation; uplooking Sunshine pyranometer direct and diffuse; (Watts/meter^2) +2020-04-01 00:00:00,-9.78,0.53 +2020-04-01 01:00:00,-9.90,0.47 +2020-04-01 02:00:00,-10.13,0.58 +2020-04-01 03:00:00,-10.20,0.68 +2020-04-01 04:00:00,-10.08,0.66 +2020-04-01 05:00:00,-5.17,3.98 +2020-04-01 06:00:00,118.68,112.46 +2020-04-01 07:00:00,347.40,347.80 +2020-04-01 08:00:00,568.13,567.65 +2020-04-01 09:00:00,754.97,747.07 +2020-04-01 10:00:00,771.33,764.55 +2020-04-01 11:00:00,433.62,413.53 +2020-04-01 12:00:00,341.02,314.57 +2020-04-01 13:00:00,268.41,240.90 +2020-04-01 14:00:00,233.50,212.72 +2020-04-01 15:00:00,291.97,286.49 +2020-04-01 16:00:00,353.31,365.16 +2020-04-01 17:00:00,139.36,151.85 +2020-04-01 18:00:00,-3.04,7.08 +2020-04-01 19:00:00,-5.78,0.65 +2020-04-01 20:00:00,-5.40,0.74 +2020-04-01 21:00:00,-5.84,0.77 +2020-04-01 22:00:00,-6.30,0.92 +2020-04-01 23:00:00,-8.44,0.87 diff --git a/tests/test_cues.py b/tests/test_cues.py new file mode 100644 index 0000000..889d3ed --- /dev/null +++ b/tests/test_cues.py @@ -0,0 +1,122 @@ +from datetime import datetime +from os.path import join +from pathlib import Path +from unittest.mock import patch, MagicMock + +import geopandas as gpd +import pandas as pd +import pytest + +from metloom.pointdata import CuesLevel1 +from metloom.variables import CuesLevel1Variables +from tests.test_point_data import BasePointDataTest + +DATA_DIR = str(Path(__file__).parent.joinpath("data/cues_mocks")) + + +class TestCuesStation(BasePointDataTest): + + @pytest.fixture(scope="function") + def station(self): + return CuesLevel1(None, None) + + @pytest.fixture(scope="class") + def expected_meta(self): + return gpd.points_from_xy( + [-119.029128], [37.643093], [9661] + )[0] + + @pytest.fixture(scope="class") + def daily_expected(self, expected_meta): + df = gpd.GeoDataFrame.from_dict( + [ + { + "datetime": pd.Timestamp("2020-03-15 08:00:00+0000", tz="UTC"), + "DOWNWARD SHORTWAVE RADIATION": 95.64, + }, + { + "datetime": pd.Timestamp("2020-03-16 08:00:00+0000", tz="UTC"), + "DOWNWARD SHORTWAVE RADIATION": 86.87, + }, + { + "datetime": pd.Timestamp("2020-03-17 08:00:00+0000", tz="UTC"), + "DOWNWARD SHORTWAVE RADIATION": 182.23, + }, + + ], + geometry=[expected_meta] * 3, + ) + df["DOWNWARD SHORTWAVE RADIATION_units"] = ["Watts/meter^2"] * len(df) + df["site"] = ["CUES"] * len(df) + df["datasource"] = ["UCSB CUES"] * len(df) + # needed to reorder the columns for the pd testing compare + df = df.filter( + [ + "datetime", + "geometry", + "DOWNWARD SHORTWAVE RADIATION", + "site", + "DOWNWARD SHORTWAVE RADIATION_units", + "datasource", + ] + ) + df.set_index(keys=["datetime", "site"], inplace=True) + return df + + @classmethod + def get_url_response(cls, resp="daily"): + if resp == 'daily': + with open(join(DATA_DIR, "daily_response.txt")) as fp: + data = fp.read() + elif resp == 'hourly': + with open(join(DATA_DIR, "hourly_response.txt")) as fp: + data = fp.read() + else: + raise RuntimeError(f"{resp} is an unknown option") + + obj = MagicMock() + obj.content = data.encode() + return obj + + def test_get_metadata(self, station, expected_meta): + assert expected_meta == station.metadata + + def test_get_daily_data(self, station, daily_expected): + with patch("metloom.pointdata.cues.requests") as mock_requests: + mock_requests.post.side_effect = [ + self.get_url_response(), + ] + response = station.get_daily_data( + datetime(2020, 3, 15), + datetime(2020, 3, 17), + [CuesLevel1Variables.DOWNSHORTWAVE], + ) + pd.testing.assert_frame_equal( + response.sort_index(axis=1), + daily_expected.sort_index(axis=1) + ) + + def test_get_hourly_data(self, station): + """ + Test that we get hourly data correctly. + This also uses the `UPSHORTWAVE` variable so we can test + that the instrument specific implementation of variables is working. + """ + with patch("metloom.pointdata.cues.requests") as mock_requests: + mock_requests.post.side_effect = [ + self.get_url_response(resp="hourly"), + ] + resp = station.get_hourly_data( + datetime(2020, 4, 1), datetime(2020, 4, 2), + [CuesLevel1Variables.UPSHORTWAVE], + ) + resp = resp.reset_index() + assert resp["datetime"].values[0] == pd.to_datetime("2020-04-01 08") + assert resp["datetime"].values[-1] == pd.to_datetime("2020-04-02 07") + assert resp["UPWARD SHORTWAVE RADIATION"].values[0] == -9.78 + assert resp["UPWARD SHORTWAVE RADIATION"].values[-1] == -8.44 + assert all(resp["site"].values == "CUES") + + def test_points_from_geometry_failure(self, station): + with pytest.raises(NotImplementedError): + station.points_from_geometry(None, None)