From db952f0f9f2159a7f7f917ef6028fc31069703ba Mon Sep 17 00:00:00 2001 From: Lucas Thurston Date: Sun, 17 Dec 2023 11:00:13 -0800 Subject: [PATCH] Implement naive MARC fetcher --- metadata_fetcher/fetchers/marc_fetcher.py | 23 +++++++++++++++++++++++ metadata_fetcher/requirements.txt | 1 + 2 files changed, 24 insertions(+) create mode 100644 metadata_fetcher/fetchers/marc_fetcher.py diff --git a/metadata_fetcher/fetchers/marc_fetcher.py b/metadata_fetcher/fetchers/marc_fetcher.py new file mode 100644 index 000000000..7bb41daf2 --- /dev/null +++ b/metadata_fetcher/fetchers/marc_fetcher.py @@ -0,0 +1,23 @@ +import requests + +from .Fetcher import Fetcher +import json +import pymarc + + +class MarcFetcher(Fetcher): + def __init__(self, params: dict[str]): + super(MarcFetcher, self).__init__(params) + self.url = params.get("harvest_data").get("url") + + def build_fetch_request(self) -> dict[str]: + return {"url": self.url} + + def check_page(self, http_resp: requests.Response) -> int: + return sum(1 for _ in pymarc.MARCReader(http_resp.content, + to_unicode=True, + utf8_handling="replace")) + + def json(self) -> str: + return json.dumps({"finished": True}) + diff --git a/metadata_fetcher/requirements.txt b/metadata_fetcher/requirements.txt index c3639dff7..2e8ea8bb8 100644 --- a/metadata_fetcher/requirements.txt +++ b/metadata_fetcher/requirements.txt @@ -3,3 +3,4 @@ requests sickle python-dotenv beautifulsoup4 +pymarc