Skip to content

Commit

Permalink
[WIP]
Browse files Browse the repository at this point in the history
  • Loading branch information
lthurston committed Jan 29, 2024
1 parent 1357c85 commit 7d18840
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 21 deletions.
8 changes: 7 additions & 1 deletion metadata_fetcher/fetchers/Fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

from requests.adapters import HTTPAdapter, Retry
from rikolti.utils.versions import put_vernacular_page

import time
import os

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -52,6 +53,11 @@ def fetch_page(self):
f"[{self.collection_id}]: fetching page {self.write_page} "
f"at {page.get('url')}"
)

# Added because collection 28011 was failing without this
print(f"Sleeping in {os.path.basename(__file__)}!")
time.sleep(1)
print("Done Sleeping!")
try:
response = requests.get(**page)
response.raise_for_status()
Expand Down
5 changes: 3 additions & 2 deletions metadata_fetcher/lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
import sys
import time
import os

from .fetchers.Fetcher import Fetcher
from rikolti.utils.versions import create_vernacular_version
Expand All @@ -22,7 +23,7 @@ def import_fetcher(harvest_type):


# AWS Lambda entry point
def fetch_collection(payload, vernacular_version, context, sleep=False) -> list[dict]:
def fetch_collection(payload, vernacular_version, sleep=True) -> list[dict]:
"""
returns a list of dicts with the following keys:
document_count: int
Expand All @@ -41,7 +42,7 @@ def fetch_collection(payload, vernacular_version, context, sleep=False) -> list[
fetch_status = []
try:
if sleep:
print("Sleeping!")
print(f"Sleeping in {os.path.basename(__file__)}!")
time.sleep(1)
print("Done Sleeping!")
fetcher = fetcher_class(payload)
Expand Down
30 changes: 12 additions & 18 deletions metadata_mapper/mappers/marc/ucb_tind_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,16 @@ class UcbTindRecord(MarcRecord):
def UCLDC_map(self):
return {
"calisphere-id": self.legacy_couch_db_id.split("--")[1],
"_id": self.get_marc_data_fields(["901"], ["a"]),
"isShownAt": self.get_marc_data_fields(["856"], ["u"]),
"isShownBy": self.get_marc_data_fields(["856"], ["u"]),
"language": self.get_marc_data_fields(["041"], ["a"]),
"date": self.get_marc_data_fields(["260"], ["c"]),
"publisher": self.get_marc_data_fields(["260"], ["a", "b"]),
"format": self.map_format,
"extent": self.map_extent,
"identifier": self.get_marc_data_fields(["020", "022", "035"], ["a"]),
"identifier": self.get_marc_data_fields(["020", "022", "024", "901"],
["a"]),
"creator": self.get_marc_data_fields(["100", "110", "111"]),
"relation": self.map_relation,
"description": self.map_description,
Expand All @@ -38,18 +40,16 @@ def UCLDC_map(self):

def map_type(self):
value = []

for type_mapping in self.get_type_mapping():
value.append(type_mapping[1])
for types in self.get_matching_types():
value.append(types[1])

return value


def map_spec_type(self):
value = []

for type_mapping in self.get_type_mapping():
value.append(type_mapping[0])
for types in self.get_matching_types():
value.append(types[0])

if (self.get_marc_control_field("008", 28)
in ("a", "c", "f", "i", "l", "m", "o", "s") or
Expand All @@ -58,15 +58,14 @@ def map_spec_type(self):

return value

def get_type_mapping(self):
def get_matching_types(self):
type_mapping = []

compare = (self.get_marc_leader("type_of_control") +
self.get_marc_leader("bibliographic_level") +
self.get_marc_control_field("007", 1) +
self.get_marc_control_field("008", 21))

for (key, value) in self.type_mapping["leader"]:
for (key, value) in self.get_types()["leader"].items():
if re.match(f"^{key}", compare):
type_mapping.append(value)

Expand Down Expand Up @@ -223,7 +222,7 @@ def get_marc_control_field(self, field_tag: str, index: int = None) -> list:
def get_marc_data_fields(self, field_tags: list, subfield_codes=[],
**kwargs) -> list:
"""
Get the values of specified subfields from given MARC fields.
Get the values of specified subfields from given MARC fields. This allows control fields too.
Set the `exclude_subfields` kwarg to exclude the specified subfield_codes.
Expand All @@ -236,11 +235,6 @@ def get_marc_data_fields(self, field_tags: list, subfield_codes=[],
all subfields will be included.
:return: A list of values of the specified subfields.
"""

# Don't let any control tags sneak in! They don't have subfields.
data_field_tags = [tag for tag in field_tags
if tag.isnumeric() and int(tag) > 99]

def subfield_matches(check_code: str, subfield_codes: list,
exclude_subfields: bool) -> bool:
"""
Expand Down Expand Up @@ -269,7 +263,7 @@ def subfield_matches(check_code: str, subfield_codes: list,
value_list = [process_value(value, field_tag, subfield[0])
if process_value else value
for (field_tag, matching_fields) in
self.get_marc_tag_value_map(data_field_tags).items()
self.get_marc_tag_value_map(field_tags).items()
for matching_field in matching_fields
for subfield in list(matching_field.subfields_as_dict().items())
for value in subfield[1]
Expand Down Expand Up @@ -311,7 +305,7 @@ def get_marc_leader(self, leader_key: str):

return ""

def get_type_mapping(self):
def get_types(self):
"""
Legacy code verbatim
:return:
Expand Down

0 comments on commit 7d18840

Please sign in to comment.