From eacfe06b051ef1f345282f8e540e241e90a01bf6 Mon Sep 17 00:00:00 2001 From: Mohamed Zumair Date: Fri, 6 Dec 2024 06:23:28 +0530 Subject: [PATCH] Improve MediaFusion scraping configs --- src/program/services/scrapers/mediafusion.py | 90 ++++++++++++-------- 1 file changed, 53 insertions(+), 37 deletions(-) diff --git a/src/program/services/scrapers/mediafusion.py b/src/program/services/scrapers/mediafusion.py index b9fb50b0..33b1dc48 100644 --- a/src/program/services/scrapers/mediafusion.py +++ b/src/program/services/scrapers/mediafusion.py @@ -1,6 +1,5 @@ """ Mediafusion scraper module """ -import json -import re + from typing import Dict from loguru import logger @@ -35,7 +34,11 @@ def __init__(self): self.timeout = self.settings.timeout self.encrypted_string = None # https://github.com/elfhosted/infra/blob/ci/mediafusion/middleware-ratelimit-stream.yaml - rate_limit_params = get_rate_limit_params(max_calls=1, period=10) if self.settings.ratelimit else None + rate_limit_params = ( + get_rate_limit_params(max_calls=1, period=10) + if self.settings.ratelimit + else None + ) session = create_service_session(rate_limit_params=rate_limit_params) self.request_handler = ScraperRequestHandler(session) self.initialized = self.validate() @@ -57,39 +60,36 @@ def validate(self) -> bool: logger.error("Mediafusion ratelimit must be a valid boolean.") return False - if self.app_settings.downloaders.real_debrid.enabled: - self.api_key = self.app_settings.downloaders.real_debrid.api_key - self.downloader = "realdebrid" - elif self.app_settings.downloaders.torbox.enabled: - self.api_key = self.app_settings.downloaders.torbox.api_key - self.downloader = "torbox" - elif self.app_settings.downloaders.all_debrid.enabled: - self.api_key = self.app_settings.downloaders.all_debrid.api_key - self.downloader = "alldebrid" - else: - logger.error("No downloader enabled, please enable at least one.") - return False - payload = { - "sp": { - "sv": self.downloader, - "tk": self.api_key, - "ewc": False - }, - "sr": ["4k", "2160p", "1440p", "1080p", "720p", "480p", None], - "ec": False, - "eim": False, - "sftn": True, - "tsp": ["cached"], # sort order, but this doesnt matter as we sort later - "nf": ["Disable"], # nudity filter - "cf": ["Disable"] # certification filter + "selected_resolutions": [ + "4k", + "2160p", + "1440p", + "1080p", + "720p", + "480p", + None, + ], + "max_streams_per_resolution": 100, + "live_search_streams": True, + "show_full_torrent_name": True, + "torrent_sorting_priority": [], # disable sort order, but this doesnt matter as we sort later + "language_sorting": [], + "nudity_filter": ["Disable"], + "certification_filter": ["Disable"], } url = f"{self.settings.url}/encrypt-user-data" headers = {"Content-Type": "application/json"} try: - response = self.request_handler.execute(HttpMethod.POST, url, overriden_response_type=ResponseType.DICT, json=payload, headers=headers) + response = self.request_handler.execute( + HttpMethod.POST, + url, + overriden_response_type=ResponseType.DICT, + json=payload, + headers=headers, + ) if not response.data or response.data["status"] != "success": logger.error(f"Failed to encrypt user data: {response.data['message']}") return False @@ -100,7 +100,9 @@ def validate(self) -> bool: try: url = f"{self.settings.url}/manifest.json" - response = self.request_handler.execute(HttpMethod.GET, url, timeout=self.timeout) + response = self.request_handler.execute( + HttpMethod.GET, url, timeout=self.timeout + ) return response.is_ok except Exception as e: logger.error(f"Mediafusion failed to initialize: {e}") @@ -117,7 +119,9 @@ def run(self, item: MediaItem) -> Dict[str, str]: except RateLimitExceeded: logger.debug(f"Mediafusion ratelimit exceeded for item: {item.log_string}") except ConnectTimeout: - logger.warning(f"Mediafusion connection timeout for item: {item.log_string}") + logger.warning( + f"Mediafusion connection timeout for item: {item.log_string}" + ) except ReadTimeout: logger.warning(f"Mediafusion read timeout for item: {item.log_string}") except RequestException as e: @@ -134,7 +138,9 @@ def scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]: if identifier: url += identifier - response = self.request_handler.execute(HttpMethod.GET, f"{url}.json", timeout=self.timeout) + response = self.request_handler.execute( + HttpMethod.GET, f"{url}.json", timeout=self.timeout + ) if not response.is_ok or len(response.data.streams) <= 0: logger.log("NOT_FOUND", f"No streams found for {item.log_string}") return {} @@ -142,17 +148,27 @@ def scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]: torrents: Dict[str, str] = {} for stream in response.data.streams: - if not hasattr(stream, "description") and hasattr(stream, "title") and "rate-limit exceeded" in stream.title: - raise RateLimitExceeded(f"Mediafusion rate-limit exceeded for item: {item.log_string}") + if ( + not hasattr(stream, "description") + and hasattr(stream, "title") + and "rate-limit exceeded" in stream.title + ): + raise RateLimitExceeded( + f"Mediafusion rate-limit exceeded for item: {item.log_string}" + ) description_split = stream.description.replace("📂 ", "") raw_title = description_split.split("\n")[0] - info_hash = re.search(r"info_hash=([A-Za-z0-9]+)", stream.url).group(1) + if scrape_type == "series": + raw_title = raw_title.split("/")[0] + info_hash = stream.infoHash if info_hash and info_hash not in torrents: torrents[info_hash] = raw_title if torrents: - logger.log("SCRAPER", f"Found {len(torrents)} streams for {item.log_string}") + logger.log( + "SCRAPER", f"Found {len(torrents)} streams for {item.log_string}" + ) else: logger.log("NOT_FOUND", f"No streams found for {item.log_string}") - return torrents \ No newline at end of file + return torrents