Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

issue 104 #114

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,34 @@

# -- Project information

project = 'MetGenC'
copyright = '2024, NSIDC'
author = 'National Snow and Ice Data Center'
project = "MetGenC"
copyright = "2024, NSIDC"
author = "National Snow and Ice Data Center"

release = '0.6'
version = '0.6.0'
release = "0.6"
version = "0.6.0"

# -- General configuration

extensions = [
'sphinx.ext.duration',
'sphinx.ext.doctest',
'sphinx.ext.autodoc',
'sphinx.ext.autosummary',
'sphinx.ext.intersphinx',
"sphinx.ext.duration",
"sphinx.ext.doctest",
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
"sphinx.ext.intersphinx",
]

intersphinx_mapping = {
'python': ('https://docs.python.org/3/', None),
'sphinx': ('https://www.sphinx-doc.org/en/master/', None),
"python": ("https://docs.python.org/3/", None),
"sphinx": ("https://www.sphinx-doc.org/en/master/", None),
}
intersphinx_disabled_domains = ['std']
intersphinx_disabled_domains = ["std"]

templates_path = ['_templates']
templates_path = ["_templates"]

# -- Options for HTML output

html_theme = 'sphinx_rtd_theme'
html_theme = "sphinx_rtd_theme"

# -- Options for EPUB output
epub_show_urls = 'footnote'
epub_show_urls = "footnote"
55 changes: 53 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ check-jsonschema = "^0.29.4"
returns = "^0.23.0"
toolz = "^1.0.0"
funcy = "^2.0"
isoduration = "^20.11.0"
[tool.poetry.group.test.dependencies]
pytest = "^8.3.2"
moto = {extras = ["all"], version = "^5.0.14"}
Expand Down
15 changes: 15 additions & 0 deletions src/nsidc/metgen/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ def init(config):
"""Populates a configuration file based on user input."""
click.echo(metgen.banner())
config = metgen.init_config(config)
# add step here to evaluate input files?
# if netcdf: header information
click.echo(f"Initialized the metgen configuration file {config}")


Expand Down Expand Up @@ -68,6 +70,19 @@ def validate(config_filename, content_type):
metgen.validate(configuration, content_type)


@cli.command()
@click.option(
"-f",
"--file",
"data_filename",
help="Path to sample data file",
required=True,
)
def assess():
"""Examine a sample data file for metadata completeness"""
return True


@cli.command()
@click.option(
"-c",
Expand Down
32 changes: 29 additions & 3 deletions src/nsidc/metgen/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import logging
import os.path
from pathlib import Path
from returns.maybe import Maybe, Nothing

from nsidc.metgen import aws, constants
from nsidc.metgen import aws, constants, netcdf_reader


class ValidationError(Exception):
Expand All @@ -30,6 +31,11 @@ class Config:
checksum_type: str
number: int
dry_run: bool
filename_regex: Maybe[str] = Maybe.empty
time_coverage_duration: Maybe[str] = Maybe.empty
geospatial_x_resolution: Maybe[str] = Maybe.empty
geospatial_y_resolution: Maybe[str] = Maybe.empty
date_modified: Maybe[str] = Maybe.empty

def show(self):
# TODO: add section headings in the right spot
Expand All @@ -43,8 +49,8 @@ def show(self):
if self.dry_run:
LOGGER.info("")
LOGGER.info(
"""Note: The dry-run option was included, so no files will be\
staged and no CNM messages published."""
"""Note: The dry-run option was included, so no files will be \
staged and no CNM messages published."""
)
LOGGER.info("")

Expand Down Expand Up @@ -106,6 +112,11 @@ def configuration(
"checksum_type": constants.DEFAULT_CHECKSUM_TYPE,
"number": constants.DEFAULT_NUMBER,
"dry_run": constants.DEFAULT_DRY_RUN,
"filename_regex": Nothing,
"time_coverage_duration": Nothing,
"geospatial_x_resolution": Nothing,
"geospatial_y_resolution": Nothing,
"date_modified": Nothing,
}
try:
return Config(
Expand Down Expand Up @@ -174,6 +185,21 @@ def configuration(
_get_configuration_value(
environment, "Settings", "dry_run", bool, config_parser, overrides
),
_get_configuration_value(
environment, "Collection", "filename_regex", str, config_parser, overrides
),
_get_configuration_value(
environment, "Collection", "time_coverage_duration", str, config_parser, overrides
),
_get_configuration_value(
environment, "Collection", "geospatial_x_resolution", str, config_parser, overrides
),
_get_configuration_value(
environment, "Collection", "geospatial_y_resolution", str, config_parser, overrides
),
_get_configuration_value(
environment, "Collection", "date_modified", str, config_parser, overrides
),
)
except Exception as e:
raise Exception("Unable to read the configuration file", e)
Expand Down
50 changes: 17 additions & 33 deletions src/nsidc/metgen/metgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
import os.path
import sys
import uuid
from collections.abc import Callable
from functools import cache
from importlib.resources import open_text
from pathlib import Path
from string import Template
from typing import Callable

import jsonschema
from funcy import all, filter, partial, rcompose, take
Expand Down Expand Up @@ -175,31 +176,6 @@ def init_config(configuration_file):
return configuration_file


def prepare_output_dirs(configuration):
"""
Generate paths to ummg and cnm output directories.
Remove any existing UMM-G files if needed.
TODO: create local_output_dir, ummg_dir, and cnm subdir if they don't exist
"""
ummg_path = Path(configuration.local_output_dir, configuration.ummg_dir)
cnm_path = Path(configuration.local_output_dir, "cnm")

if configuration.overwrite_ummg:
scrub_json_files(ummg_path)

return (ummg_path, cnm_path)


def scrub_json_files(path):
print(f"Removing existing files in {path}")
for file_path in path.glob("*.json"):
try:
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
except Exception as e:
print("Failed to delete %s: %s" % (file_path, e))


# -------------------------------------------------------------------
# Data structures for processing Granules and recording results
# -------------------------------------------------------------------
Expand All @@ -224,6 +200,7 @@ class Granule:
submission_time: Maybe[str] = Maybe.empty
uuid: Maybe[str] = Maybe.empty
cnm_message: Maybe[str] = Maybe.empty
data_reader: Callable[[str], dict] = Maybe.empty


@dataclasses.dataclass
Expand Down Expand Up @@ -281,8 +258,9 @@ def process(configuration: config.Config) -> None:

# Find all of the input granule files, limit the size of the list based
# on the configuration, and execute the pipeline on each of the granules.
# TODO: Nicely manage reader and glob pattern for other file types.
candidate_granules = [
Granule(p.name, data_filenames=[str(p)])
Granule(p.name, data_filenames=[str(p)], data_reader=netcdf_reader.extract_metadata)
for p in Path(configuration.data_dir).glob("*.nc")
]
granules = take(configuration.number, candidate_granules)
Expand Down Expand Up @@ -359,15 +337,21 @@ def null_operation(configuration: config.Config, granule: Granule) -> Granule:
return granule


@cache
def retrieve_collection(auth_id: str, version: int):
# ummc_from_cmr = talk_to_cmr(configuration.auth_id, configuration.version)
# pull out fields from UMM-C response and use to create collection object
# with more than just auth_id and version number.
return Collection(auth_id, version)


def granule_collection(configuration: config.Config, granule: Granule) -> Granule:
juliacollins marked this conversation as resolved.
Show resolved Hide resolved
"""
Find the Granule's Collection and add it to the Granule.
Associate collection information with the Granule.
"""
# TODO: When we start querying CMR, refactor the pipeline to retrieve
# collection information from CMR once, then associate it with each
juliacollins marked this conversation as resolved.
Show resolved Hide resolved
# granule.
return dataclasses.replace(
granule, collection=Collection(configuration.auth_id, configuration.version)
granule,
collection=retrieve_collection(configuration.auth_id, configuration.version),
)


Expand Down Expand Up @@ -418,7 +402,7 @@ def create_ummg(configuration: config.Config, granule: Granule) -> Granule:
# }
metadata_details = {}
for data_file in granule.data_filenames:
metadata_details[data_file] = netcdf_reader.extract_metadata(data_file)
metadata_details[data_file] = granule.data_reader(data_file, configuration)

# Collapse information about (possibly) multiple files into a granule summary.
summary = metadata_summary(metadata_details)
Expand Down
Loading
Loading