Skip to content

Commit

Permalink
Improve MediaFusion scraping configs
Browse files Browse the repository at this point in the history
  • Loading branch information
mhdzumair authored and dreulavelle committed Dec 6, 2024
1 parent 42829a2 commit eacfe06
Showing 1 changed file with 53 additions and 37 deletions.
90 changes: 53 additions & 37 deletions src/program/services/scrapers/mediafusion.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
""" Mediafusion scraper module """
import json
import re

from typing import Dict

from loguru import logger
Expand Down Expand Up @@ -35,7 +34,11 @@ def __init__(self):
self.timeout = self.settings.timeout
self.encrypted_string = None
# https://github.com/elfhosted/infra/blob/ci/mediafusion/middleware-ratelimit-stream.yaml
rate_limit_params = get_rate_limit_params(max_calls=1, period=10) if self.settings.ratelimit else None
rate_limit_params = (
get_rate_limit_params(max_calls=1, period=10)
if self.settings.ratelimit
else None
)
session = create_service_session(rate_limit_params=rate_limit_params)
self.request_handler = ScraperRequestHandler(session)
self.initialized = self.validate()
Expand All @@ -57,39 +60,36 @@ def validate(self) -> bool:
logger.error("Mediafusion ratelimit must be a valid boolean.")
return False

if self.app_settings.downloaders.real_debrid.enabled:
self.api_key = self.app_settings.downloaders.real_debrid.api_key
self.downloader = "realdebrid"
elif self.app_settings.downloaders.torbox.enabled:
self.api_key = self.app_settings.downloaders.torbox.api_key
self.downloader = "torbox"
elif self.app_settings.downloaders.all_debrid.enabled:
self.api_key = self.app_settings.downloaders.all_debrid.api_key
self.downloader = "alldebrid"
else:
logger.error("No downloader enabled, please enable at least one.")
return False

payload = {
"sp": {
"sv": self.downloader,
"tk": self.api_key,
"ewc": False
},
"sr": ["4k", "2160p", "1440p", "1080p", "720p", "480p", None],
"ec": False,
"eim": False,
"sftn": True,
"tsp": ["cached"], # sort order, but this doesnt matter as we sort later
"nf": ["Disable"], # nudity filter
"cf": ["Disable"] # certification filter
"selected_resolutions": [
"4k",
"2160p",
"1440p",
"1080p",
"720p",
"480p",
None,
],
"max_streams_per_resolution": 100,
"live_search_streams": True,
"show_full_torrent_name": True,
"torrent_sorting_priority": [], # disable sort order, but this doesnt matter as we sort later
"language_sorting": [],
"nudity_filter": ["Disable"],
"certification_filter": ["Disable"],
}

url = f"{self.settings.url}/encrypt-user-data"
headers = {"Content-Type": "application/json"}

try:
response = self.request_handler.execute(HttpMethod.POST, url, overriden_response_type=ResponseType.DICT, json=payload, headers=headers)
response = self.request_handler.execute(
HttpMethod.POST,
url,
overriden_response_type=ResponseType.DICT,
json=payload,
headers=headers,
)
if not response.data or response.data["status"] != "success":
logger.error(f"Failed to encrypt user data: {response.data['message']}")
return False
Expand All @@ -100,7 +100,9 @@ def validate(self) -> bool:

try:
url = f"{self.settings.url}/manifest.json"
response = self.request_handler.execute(HttpMethod.GET, url, timeout=self.timeout)
response = self.request_handler.execute(
HttpMethod.GET, url, timeout=self.timeout
)
return response.is_ok
except Exception as e:
logger.error(f"Mediafusion failed to initialize: {e}")
Expand All @@ -117,7 +119,9 @@ def run(self, item: MediaItem) -> Dict[str, str]:
except RateLimitExceeded:
logger.debug(f"Mediafusion ratelimit exceeded for item: {item.log_string}")
except ConnectTimeout:
logger.warning(f"Mediafusion connection timeout for item: {item.log_string}")
logger.warning(
f"Mediafusion connection timeout for item: {item.log_string}"
)
except ReadTimeout:
logger.warning(f"Mediafusion read timeout for item: {item.log_string}")
except RequestException as e:
Expand All @@ -134,25 +138,37 @@ def scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]:
if identifier:
url += identifier

response = self.request_handler.execute(HttpMethod.GET, f"{url}.json", timeout=self.timeout)
response = self.request_handler.execute(
HttpMethod.GET, f"{url}.json", timeout=self.timeout
)
if not response.is_ok or len(response.data.streams) <= 0:
logger.log("NOT_FOUND", f"No streams found for {item.log_string}")
return {}

torrents: Dict[str, str] = {}

for stream in response.data.streams:
if not hasattr(stream, "description") and hasattr(stream, "title") and "rate-limit exceeded" in stream.title:
raise RateLimitExceeded(f"Mediafusion rate-limit exceeded for item: {item.log_string}")
if (
not hasattr(stream, "description")
and hasattr(stream, "title")
and "rate-limit exceeded" in stream.title
):
raise RateLimitExceeded(
f"Mediafusion rate-limit exceeded for item: {item.log_string}"
)
description_split = stream.description.replace("📂 ", "")
raw_title = description_split.split("\n")[0]
info_hash = re.search(r"info_hash=([A-Za-z0-9]+)", stream.url).group(1)
if scrape_type == "series":
raw_title = raw_title.split("/")[0]
info_hash = stream.infoHash
if info_hash and info_hash not in torrents:
torrents[info_hash] = raw_title

if torrents:
logger.log("SCRAPER", f"Found {len(torrents)} streams for {item.log_string}")
logger.log(
"SCRAPER", f"Found {len(torrents)} streams for {item.log_string}"
)
else:
logger.log("NOT_FOUND", f"No streams found for {item.log_string}")

return torrents
return torrents

0 comments on commit eacfe06

Please sign in to comment.