-
-
Notifications
You must be signed in to change notification settings - Fork 61
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: improved mediafusion validation and scrape logic #932
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,5 @@ | ||
""" Mediafusion scraper module """ | ||
import json | ||
import re | ||
|
||
from typing import Dict | ||
|
||
from loguru import logger | ||
|
@@ -35,7 +34,11 @@ def __init__(self): | |
self.timeout = self.settings.timeout | ||
self.encrypted_string = None | ||
# https://github.com/elfhosted/infra/blob/ci/mediafusion/middleware-ratelimit-stream.yaml | ||
rate_limit_params = get_rate_limit_params(max_calls=1, period=10) if self.settings.ratelimit else None | ||
rate_limit_params = ( | ||
get_rate_limit_params(max_calls=1, period=10) | ||
if self.settings.ratelimit | ||
else None | ||
) | ||
session = create_service_session(rate_limit_params=rate_limit_params) | ||
self.request_handler = ScraperRequestHandler(session) | ||
self.initialized = self.validate() | ||
|
@@ -57,39 +60,36 @@ def validate(self) -> bool: | |
logger.error("Mediafusion ratelimit must be a valid boolean.") | ||
return False | ||
|
||
if self.app_settings.downloaders.real_debrid.enabled: | ||
self.api_key = self.app_settings.downloaders.real_debrid.api_key | ||
self.downloader = "realdebrid" | ||
elif self.app_settings.downloaders.torbox.enabled: | ||
self.api_key = self.app_settings.downloaders.torbox.api_key | ||
self.downloader = "torbox" | ||
elif self.app_settings.downloaders.all_debrid.enabled: | ||
self.api_key = self.app_settings.downloaders.all_debrid.api_key | ||
self.downloader = "alldebrid" | ||
else: | ||
logger.error("No downloader enabled, please enable at least one.") | ||
return False | ||
|
||
payload = { | ||
"sp": { | ||
"sv": self.downloader, | ||
"tk": self.api_key, | ||
"ewc": False | ||
}, | ||
"sr": ["4k", "2160p", "1440p", "1080p", "720p", "480p", None], | ||
"ec": False, | ||
"eim": False, | ||
"sftn": True, | ||
"tsp": ["cached"], # sort order, but this doesnt matter as we sort later | ||
"nf": ["Disable"], # nudity filter | ||
"cf": ["Disable"] # certification filter | ||
"selected_resolutions": [ | ||
"4k", | ||
"2160p", | ||
"1440p", | ||
"1080p", | ||
"720p", | ||
"480p", | ||
None, | ||
], | ||
"max_streams_per_resolution": 100, | ||
"live_search_streams": True, | ||
"show_full_torrent_name": True, | ||
"torrent_sorting_priority": [], # disable sort order, but this doesnt matter as we sort later | ||
"language_sorting": [], | ||
"nudity_filter": ["Disable"], | ||
"certification_filter": ["Disable"], | ||
} | ||
|
||
url = f"{self.settings.url}/encrypt-user-data" | ||
headers = {"Content-Type": "application/json"} | ||
|
||
try: | ||
response = self.request_handler.execute(HttpMethod.POST, url, overriden_response_type=ResponseType.DICT, json=payload, headers=headers) | ||
response = self.request_handler.execute( | ||
HttpMethod.POST, | ||
url, | ||
overriden_response_type=ResponseType.DICT, | ||
json=payload, | ||
headers=headers, | ||
) | ||
if not response.data or response.data["status"] != "success": | ||
logger.error(f"Failed to encrypt user data: {response.data['message']}") | ||
return False | ||
|
@@ -100,7 +100,9 @@ def validate(self) -> bool: | |
|
||
try: | ||
url = f"{self.settings.url}/manifest.json" | ||
response = self.request_handler.execute(HttpMethod.GET, url, timeout=self.timeout) | ||
response = self.request_handler.execute( | ||
HttpMethod.GET, url, timeout=self.timeout | ||
) | ||
return response.is_ok | ||
except Exception as e: | ||
logger.error(f"Mediafusion failed to initialize: {e}") | ||
|
@@ -117,7 +119,9 @@ def run(self, item: MediaItem) -> Dict[str, str]: | |
except RateLimitExceeded: | ||
logger.debug(f"Mediafusion ratelimit exceeded for item: {item.log_string}") | ||
except ConnectTimeout: | ||
logger.warning(f"Mediafusion connection timeout for item: {item.log_string}") | ||
logger.warning( | ||
f"Mediafusion connection timeout for item: {item.log_string}" | ||
) | ||
except ReadTimeout: | ||
logger.warning(f"Mediafusion read timeout for item: {item.log_string}") | ||
except RequestException as e: | ||
|
@@ -134,25 +138,37 @@ def scrape(self, item: MediaItem) -> tuple[Dict[str, str], int]: | |
if identifier: | ||
url += identifier | ||
|
||
response = self.request_handler.execute(HttpMethod.GET, f"{url}.json", timeout=self.timeout) | ||
response = self.request_handler.execute( | ||
HttpMethod.GET, f"{url}.json", timeout=self.timeout | ||
) | ||
if not response.is_ok or len(response.data.streams) <= 0: | ||
logger.log("NOT_FOUND", f"No streams found for {item.log_string}") | ||
return {} | ||
|
||
torrents: Dict[str, str] = {} | ||
|
||
for stream in response.data.streams: | ||
if not hasattr(stream, "description") and hasattr(stream, "title") and "rate-limit exceeded" in stream.title: | ||
raise RateLimitExceeded(f"Mediafusion rate-limit exceeded for item: {item.log_string}") | ||
if ( | ||
not hasattr(stream, "description") | ||
and hasattr(stream, "title") | ||
and "rate-limit exceeded" in stream.title | ||
): | ||
raise RateLimitExceeded( | ||
f"Mediafusion rate-limit exceeded for item: {item.log_string}" | ||
) | ||
description_split = stream.description.replace("📂 ", "") | ||
raw_title = description_split.split("\n")[0] | ||
info_hash = re.search(r"info_hash=([A-Za-z0-9]+)", stream.url).group(1) | ||
if scrape_type == "series": | ||
raw_title = raw_title.split("/")[0] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm.. In previous updates I've been pretty back and forth on this one.. Do we want the torrent or do we want the file in these scenarios.. We would prefer the torrent I think because it would have more episodes that might be needed from that same torrent. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll fix this one. Thanks man! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here it's providing the episode file name. If you preferred to get the episode file name, that's also fine. |
||
info_hash = stream.infoHash | ||
if info_hash and info_hash not in torrents: | ||
torrents[info_hash] = raw_title | ||
|
||
if torrents: | ||
logger.log("SCRAPER", f"Found {len(torrents)} streams for {item.log_string}") | ||
logger.log( | ||
"SCRAPER", f"Found {len(torrents)} streams for {item.log_string}" | ||
) | ||
else: | ||
logger.log("NOT_FOUND", f"No streams found for {item.log_string}") | ||
|
||
return torrents | ||
return torrents |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@mhdzumair what is live search streams?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you enable this, it will fetch the streams from mediafusion scraper with time limit. Also note that, it's not going to fetch from scrapers every time. If you disable this, mediafusion will only respond with existing db data.