Skip to content

Commit

Permalink
Add reader for CUES data (#106)
Browse files Browse the repository at this point in the history
* first pass at a reader for CUES data

* Get cues data validation working. Next we need to handle variables that return multiple instruments from the API

* Get instrument selection working

* Get daily data test working

* Add citation for data. add test for hourly data and instrument specific variables

* Adding more variables

* flake8

* move geosphere tutorial
  • Loading branch information
micah-prime authored May 31, 2024
1 parent 1c0136f commit e4ca1fb
Show file tree
Hide file tree
Showing 8 changed files with 391 additions and 4 deletions.
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ Features
* `MESOWEST <https://developers.synopticdata.com/mesonet/>`_
* `USGS <https://waterservices.usgs.gov/rest/>`_
* `GEOSPHERE AUSTRIA <https://data.hub.geosphere.at/dataset/>`_
* `UCSB CUES <https://snow.ucsb.edu/#>`_
* `MET NORWAY <https://frost.met.no/index.html>`_

Requirements
Expand Down
4 changes: 3 additions & 1 deletion metloom/pointdata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
from .mesowest import MesowestPointData
from .usgs import USGSPointData
from .geosphere_austria import GeoSphereHistPointData, GeoSphereCurrentPointData
from .norway import MetNorwayPointData
from .cues import CuesLevel1

__all__ = [
"PointData", "PointDataCollection", "CDECPointData", "SnotelPointData",
"MesowestPointData", "USGSPointData", "GeoSphereHistPointData",
"GeoSphereCurrentPointData"
"GeoSphereCurrentPointData", "CuesLevel1", "MetNorwayPointData"
]
17 changes: 14 additions & 3 deletions metloom/pointdata/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
LOG = logging.getLogger("metloom.pointdata.base")


class DataValidationError(RuntimeError):
pass


class PointDataCollection:
"""
Iterator class for a collection of PointData objects.
Expand Down Expand Up @@ -251,17 +255,24 @@ def validate_sensor_df(cls, gdf: gpd.GeoDataFrame):
index_names = gdf.index.names
# check for required indexes
for ei in cls.EXPECTED_INDICES:
assert ei in index_names
if ei not in index_names:
raise DataValidationError(
f"{ei} was expected, but not found as an"
f" index of the final dataframe"
)
# check for expected columns - avoid modifying at class level
expected_columns = copy.deepcopy(cls.EXPECTED_COLUMNS)
possible_extras = ["measurementDate", "quality_code"]
for pe in possible_extras:
if pe in columns:
expected_columns += [pe]

for column in expected_columns:
if column not in columns:
raise ValueError(f"Expected {column} not found")
raise DataValidationError(
f"{column} was expected, but not found as a"
f" column of the final dataframe"
)

remaining_columns = [c for c in columns if c not in expected_columns]
# make sure all variables have a units column as well
for rc in remaining_columns:
Expand Down
170 changes: 170 additions & 0 deletions metloom/pointdata/cues.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
"""
A reader for the Mammoth CUES site
https://snow.ucsb.edu/index.php/description/
"""
from datetime import datetime, timezone, timedelta
from io import StringIO
from typing import List
import logging
import geopandas as gpd
import pandas as pd
import requests

from metloom.pointdata import PointData
from metloom.variables import CuesLevel1Variables, SensorDescription

LOG = logging.getLogger(__name__)


class CuesLevel1(PointData):
"""
Implement PointData methods for CUES level 1 data
https://snow.ucsb.edu/index.php/description/
https://snow.ucsb.edu/index.php/query-db/
https://doi.org/10.21424/R4159Q
"""
ALLOWED_VARIABLES = CuesLevel1Variables
URL = "https://snow.ucsb.edu/index.php/query-db/"
DATASOURCE = "UCSB CUES"

def __init__(self, station_id, name, metadata=None):
"""
See docstring for PointData.__init__
"""
super(CuesLevel1, self).__init__(
station_id or "CUES",
name or "CUES",
metadata=metadata
)
self._raw_metadata = None
self._tzinfo = timezone(timedelta(hours=-8.0))

def _get_one_vaiable(
self, start_date, end_date, variables: SensorDescription,
period, method
):
dt_fmt = "%Y-%m-%d"
data = dict(
# table="downward looking solar radiation",
table=variables.code, start=start_date.strftime(dt_fmt),
end=end_date.strftime(dt_fmt), interval=period,
method=method, output="CSV",
category="Measurement"
)
resp = requests.post(self.URL, data=data)
resp.raise_for_status()
return resp.content.decode()

def _sensor_response_to_df(self, data, variable):

# Check for no data
if not data.replace("\n", ""):
LOG.debug(f"No data returned for {variable}")
return None

# Parse the 'csv' string returned
df = pd.read_csv(
StringIO(data), delimiter=",", skip_blank_lines=True,
comment="#"
)
columns = list(df.columns.values)
# check that we have the expected columns for the
# instrument since multiple may be returned
if variable.instrument:
var_column = None
for c in columns:
if variable.instrument in c:
var_column = c
break
if var_column is None:
LOG.error(f"Returned columns were {columns}")
raise RuntimeError(
f"Could not find column for expected"
f" instrument {variable.instrument}"
)
elif len(columns) > 2:
raise RuntimeError(
f"Expected 2 columns, got {columns}"
)
else:
# just use the second of two columns
var_column = columns[1]
column_map = {
columns[0]: "datetime",
var_column: variable.name
}
# Parse the units out of the returned column name
units = columns[1].split(";")[-1].replace(
"(", ""
).replace(")", "").strip()
# Rename to desired columns and add a units column
df.rename(columns=column_map, inplace=True)

# handle the timezone convert to UTC
df["datetime"] = pd.to_datetime(df["datetime"])
df["datetime"] = df["datetime"].apply(self._handle_df_tz)

df = df.set_index("datetime")
df = df.loc[:, [variable.name]]
df[f"{variable.name}_units"] = [units] * len(df)

return df

def _get_data(
self, start_date, end_date, variables: List[SensorDescription],
period,
):
df = pd.DataFrame()
df.index.name = "datetime"
for variable in variables:
method = "sum" if variable.accumulated else "average"
data = self._get_one_vaiable(
start_date, end_date, variable, period, method
)
df_var = self._sensor_response_to_df(data, variable)
if df_var is not None:
df[df_var.columns] = df_var
# Set the site info
df["site"] = [self.id] * len(df)
df["datasource"] = [self.DATASOURCE] * len(df)
# Make this a geodataframe
df = gpd.GeoDataFrame(df, geometry=[self.metadata] * len(df))
df = df.reset_index().set_index(["datetime", "site"])
self.validate_sensor_df(df)
return df

def get_daily_data(self, start_date: datetime, end_date: datetime,
variables: List[SensorDescription]):
return self._get_data(
start_date, end_date, variables, "day"
)

def get_hourly_data(self, start_date: datetime, end_date: datetime,
variables: List[SensorDescription]):
return self._get_data(
start_date, end_date, variables, "hr"
)

def get_snow_course_data(self, start_date: datetime, end_date: datetime,
variables: List[SensorDescription]):
raise NotImplementedError("Not implemented")

def _get_metadata(self):
pass

def points_from_geometry(self, geometry: gpd.GeoDataFrame,
variables: List[SensorDescription],
snow_courses=False, within_geometry=True,
buffer=0.0):
raise NotImplementedError("Not implemented")

@property
def metadata(self):
"""
Hardcode the metadata
"""
return gpd.points_from_xy(
[-119.029128], [37.643093], [9661]
)[0]
52 changes: 52 additions & 0 deletions metloom/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,15 @@ class SensorDescription:
accumulated: bool = False # whether or not the data is accumulated


@dataclass(eq=True, frozen=True)
class InstrumentDescription(SensorDescription):
"""
Extend the Sensor Description to include instrument
"""
# description of the specific instrument for the variable
instrument: str = None


class VariableBase:
"""
Base class to store all variables for a specific datasource. Each
Expand Down Expand Up @@ -252,6 +261,49 @@ class GeoSphereHistVariables(VariableBase):
)


class CuesLevel1Variables(VariableBase):
"""
Variables for CUES level1 data
https://snow.ucsb.edu/index.php/query-db/
Some variables report back with multiple instruments. See `UPSHORTWAVE`
and `UPSHORTWAVE2` for two instrument specific implementations
of the same variable.
"""
TEMP = InstrumentDescription("air temperature", "AIR TEMP")
RH = InstrumentDescription("RH", "RELATIVE HUMIDITY")
LASERSNOWDEPTH = InstrumentDescription("laser snow depth", "LASER SNOWDEPTH")
SNOWDEPTH = InstrumentDescription("snow depth", "SNOWDEPTH")
NEWSNOWDEPTH = InstrumentDescription("new snow depth", "NEW SNOWDEPTH")
SWE = InstrumentDescription("Snow Pillow (DWR) SWE", "SWE")
# PRECIPITATION = InstrumentDescription(
# "nied", "Precipitation Total", accumulated=True
# )
TEMPSURFSNOW = InstrumentDescription(
"snow surface temperature", "SNOW SURFACE TEMPERATURE"
)
DOWNSHORTWAVE = InstrumentDescription(
"downward looking solar radiation", "DOWNWARD SHORTWAVE RADIATION",
)
UPSHORTWAVE = InstrumentDescription(
"upward looking solar radiation", "UPWARD SHORTWAVE RADIATION",
instrument="Eppley Lab precision spectral pyranometer"
)
UPSHORTWAVE2 = InstrumentDescription(
"upward looking solar radiation", "UPWARD SHORTWAVE RADIATION 2",
instrument="uplooking Sunshine pyranometer direct and diffus"
)
DOWNSHORTWAVEIR = InstrumentDescription(
"downward looking near-IR radiation",
"DOWNWARD NIR SHORTWAVE RADIATION",
)
UPSHORTWAVEIR = InstrumentDescription(
"upward looking near-IR radiation",
"UPWARD NIR SHORTWAVE RADIATION",
)


class MetNorwayVariables(VariableBase):
"""
See https://frost.met.no/concepts2.html#calculationmethod
Expand Down
4 changes: 4 additions & 0 deletions tests/data/cues_mocks/daily_response.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
MeasDateTime,downward looking solar radiation; downlooking Eppley Lab precision spectral pyranometer; (Watts/meter^2)
2020-03-15 00:00:00,95.64
2020-03-16 00:00:00,86.87
2020-03-17 00:00:00,182.23
25 changes: 25 additions & 0 deletions tests/data/cues_mocks/hourly_response.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
MeasDateTime,upward looking solar radiation; uplooking Eppley Lab precision spectral pyranometer; (Watts/meter^2),upward looking solar radiation; uplooking Sunshine pyranometer direct and diffuse; (Watts/meter^2)
2020-04-01 00:00:00,-9.78,0.53
2020-04-01 01:00:00,-9.90,0.47
2020-04-01 02:00:00,-10.13,0.58
2020-04-01 03:00:00,-10.20,0.68
2020-04-01 04:00:00,-10.08,0.66
2020-04-01 05:00:00,-5.17,3.98
2020-04-01 06:00:00,118.68,112.46
2020-04-01 07:00:00,347.40,347.80
2020-04-01 08:00:00,568.13,567.65
2020-04-01 09:00:00,754.97,747.07
2020-04-01 10:00:00,771.33,764.55
2020-04-01 11:00:00,433.62,413.53
2020-04-01 12:00:00,341.02,314.57
2020-04-01 13:00:00,268.41,240.90
2020-04-01 14:00:00,233.50,212.72
2020-04-01 15:00:00,291.97,286.49
2020-04-01 16:00:00,353.31,365.16
2020-04-01 17:00:00,139.36,151.85
2020-04-01 18:00:00,-3.04,7.08
2020-04-01 19:00:00,-5.78,0.65
2020-04-01 20:00:00,-5.40,0.74
2020-04-01 21:00:00,-5.84,0.77
2020-04-01 22:00:00,-6.30,0.92
2020-04-01 23:00:00,-8.44,0.87
Loading

0 comments on commit e4ca1fb

Please sign in to comment.