Skip to content

Commit

Permalink
Remove couchdb env vars (#1149)
Browse files Browse the repository at this point in the history
* Remove couchdb-related env vars

* Fix logic for if any collections were successfully validated

* Run validator as part of harvest_collection dag
  • Loading branch information
barbarahui authored Nov 4, 2024
1 parent 4fffa56 commit cb14126
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 26 deletions.
8 changes: 2 additions & 6 deletions dags/shared_tasks/mapping_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,11 +151,7 @@ def validate_collection_task(
"[3433/vernacular_metadata_v1/mapped_metadata_v1/3.jsonl]"
]
"""
validate = bool(
os.environ.get("UCLDC_SOLR_URL") and
os.environ.get("UCLDC_COUCH_URL")
)
if validate:
if os.environ.get("UCLDC_SOLR_URL"):
mapped_page_batches = [json.loads(batch) for batch in mapped_page_batches]
mapped_pages = list(chain.from_iterable(mapped_page_batches))
mapped_pages = [path for path in mapped_pages if 'children' not in path]
Expand Down Expand Up @@ -266,7 +262,7 @@ def validate_endpoint_task(url, mapped_versions, params=None, **context):
print(f"please validate manually: {list(errored_collections.keys())}")
print("*" * 60)

if not len(errored_collections) == len(validations):
if len(errored_collections) == len(validations):
print("-", file=sys.stderr)
raise ValueError("No collections successfully validated, exiting.")

Expand Down
2 changes: 0 additions & 2 deletions env.example
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ export SKIP_UNDEFINED_ENRICHMENTS=True
# export UCLDC_SOLR_URL="https://harvest-stg.cdlib.org/solr_api" # this is solr stage
export UCLDC_SOLR_URL="https://solr.calisphere.org/solr" # this is solr prod
export UCLDC_SOLR_API_KEY= # ask for a key
# export UCLDC_COUCH_URL="https://harvest-stg.cdlib.org/" # this is couch stage
export UCLDC_COUCH_URL="https://harvest-prd.cdlib.org/" # this is couch prod

# content_harvester when run locally via aws_mwaa_local_runner
# export METADATA_MOUNT=/<path on local host>/rikolti_data # required to run content harvester as docker operator in mwaa-local-runner
Expand Down
3 changes: 0 additions & 3 deletions metadata_mapper/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,3 @@

SOLR_URL = os.environ.get('UCLDC_SOLR_URL', False)
SOLR_API_KEY = os.environ.get('UCLDC_SOLR_API_KEY', False)
COUCH_URL = os.environ.get('UCLDC_COUCH_URL', False)

COUCH_TIMEOUT = int(os.environ.get('UCLDC_COUCH_TIMEOUT', 60))
19 changes: 4 additions & 15 deletions metadata_mapper/validate_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,21 +289,10 @@ def couch_db_request(collection_id: int, field_name: str) -> list[dict[str, str]
Returns: list[dict]
"""
url = f"{settings.COUCH_URL}/" \
"couchdb/ucldc/_design/all_provider_docs/" \
"_list/has_field_value/by_provider_name_wdoc" \
f"?key=\"{collection_id}\"&field={field_name}&limit=100000"

try:
response = requests.get(url, verify=False, timeout=settings.COUCH_TIMEOUT)
return json.loads(response.content)
except requests.exceptions.Timeout as e:
print(e)
print(f"Request to Couchdb has timed out after {settings.COUCH_TIMEOUT} \
seconds. Continuing without isShownAt and isShownBy values, \
which may result in increased/inaccurate validation errors.")
return []

print("Couchdb is no longer running. "
"Continuing without isShownAt and isShownBy values, "
"which may result in increased/inaccurate validation errors.")
return []

def get_couch_db_data(collection_id: int,
harvest_ids: list[str]) -> dict[str, dict[str, str]]:
Expand Down

0 comments on commit cb14126

Please sign in to comment.