Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: api manual scraping fixes #915

Merged
merged 4 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 18 additions & 9 deletions src/program/services/downloaders/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,27 @@ class DebridFile(BaseModel):
filesize: Optional[int] = Field(default=None)

@classmethod
def create(cls, filename: str, filesize_bytes: int, filetype: Literal["movie", "episode"], file_id: Optional[int] = None) -> Optional["DebridFile"]:
def create(
cls,
filename: str,
filesize_bytes: int,
filetype: Literal["movie", "show", "season", "episode"],
file_id: Optional[int] = None,
limit_filesize: bool = True

) -> Optional["DebridFile"]:
"""Factory method to validate and create a DebridFile"""
if not any(filename.endswith(ext) for ext in VIDEO_EXTENSIONS) and not "sample" in filename.lower():
return None

filesize_mb = filesize_bytes / 1_000_000
if filetype == "movie":
if not (FILESIZE_MOVIE_CONSTRAINT[0] <= filesize_mb <= FILESIZE_MOVIE_CONSTRAINT[1]):
return None
elif filetype == "episode":
if not (FILESIZE_EPISODE_CONSTRAINT[0] <= filesize_mb <= FILESIZE_EPISODE_CONSTRAINT[1]):
return None

if limit_filesize:
filesize_mb = filesize_bytes / 1_000_000
if filetype == "movie":
if not (FILESIZE_MOVIE_CONSTRAINT[0] <= filesize_mb <= FILESIZE_MOVIE_CONSTRAINT[1]):
return None
elif filetype in ["show", "season", "episode"]:
if not (FILESIZE_EPISODE_CONSTRAINT[0] <= filesize_mb <= FILESIZE_EPISODE_CONSTRAINT[1]):
return None

return cls(filename=filename, filesize=filesize_bytes, file_id=file_id)

Expand Down
18 changes: 11 additions & 7 deletions src/program/services/downloaders/shared.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from abc import ABC, abstractmethod
from datetime import datetime
from typing import Optional
from typing import Optional, Union

from RTN import ParsedData, parse

Expand Down Expand Up @@ -41,30 +41,34 @@ def get_instant_availability(self, infohash: str, item_type: str) -> Optional[To
pass

@abstractmethod
def add_torrent(self, infohash: str) -> int:
def add_torrent(self, infohash: str) -> Union[int, str]:
"""
Add a torrent and return its information

Args:
infohash: The hash of the torrent to add

Returns:
str: The ID of the added torrent
Union[int, str]: The ID of the added torrent

Notes:
The return type changes depending on the downloader
"""
pass

@abstractmethod
def select_files(self, request: list[int]) -> None:
def select_files(self, torrent_id: Union[int, str], file_ids: list[int]) -> None:
"""
Select which files to download from the torrent

Args:
request: File selection details including torrent ID and file IDs
torrent_id: ID of the torrent to select files for
file_ids: IDs of the files to select
"""
pass

@abstractmethod
def get_torrent_info(self, torrent_id: str) -> TorrentInfo:
def get_torrent_info(self, torrent_id: Union[int, str]) -> TorrentInfo:
"""
Get information about a specific torrent using its ID

Expand All @@ -77,7 +81,7 @@ def get_torrent_info(self, torrent_id: str) -> TorrentInfo:
pass

@abstractmethod
def delete_torrent(self, torrent_id: str) -> None:
def delete_torrent(self, torrent_id: Union[int, str]) -> None:
"""
Delete a torrent from the service

Expand Down
3 changes: 3 additions & 0 deletions src/program/services/scrapers/mediafusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ def validate(self) -> bool:
elif self.app_settings.downloaders.torbox.enabled:
self.api_key = self.app_settings.downloaders.torbox.api_key
self.downloader = "torbox"
elif self.app_settings.downloaders.all_debrid.enabled:
self.api_key = self.app_settings.downloaders.all_debrid.api_key
self.downloader = "alldebrid"
else:
logger.error("No downloader enabled, please enable at least one.")
return False
Expand Down
50 changes: 25 additions & 25 deletions src/routers/secure/scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from program.services.scrapers import Scraping
from program.services.scrapers.shared import rtn
from program.types import Event
from program.services.downloaders.models import TorrentContainer, TorrentInfo


class Stream(BaseModel):
Expand All @@ -38,8 +39,8 @@ class StartSessionResponse(BaseModel):
message: str
session_id: str
torrent_id: str
torrent_info: dict
containers: Optional[List[dict]]
torrent_info: TorrentInfo
containers: Optional[List[TorrentContainer]]
expires_at: str

class SelectFilesResponse(BaseModel):
Expand Down Expand Up @@ -102,10 +103,10 @@ def __init__(self, id: str, item_id: str, magnet: str):
self.id = id
self.item_id = item_id
self.magnet = magnet
self.torrent_id: Optional[str] = None
self.torrent_info: Optional[dict] = None
self.containers: Optional[list] = None
self.selected_files: Optional[dict] = None
self.torrent_id: Optional[Union[int, str]] = None
self.torrent_info: Optional[TorrentInfo] = None
self.containers: Optional[TorrentContainer] = None
self.selected_files: Optional[Dict[str, Dict[str, Union[str, int]]]] = None
self.created_at: datetime = datetime.now()
self.expires_at: datetime = datetime.now() + timedelta(minutes=5)

Expand Down Expand Up @@ -218,13 +219,10 @@ def scrape_item(request: Request, id: str) -> ScrapeItemResponse:
.unique()
.scalar_one_or_none()
)
streams = scraper.scrape(item)
stream_containers = downloader.get_instant_availability([stream for stream in streams.keys()])
for stream in streams.keys():
if len(stream_containers.get(stream, [])) > 0:
streams[stream].is_cached = True
else:
streams[stream].is_cached = False
streams: Dict[str, Stream] = scraper.scrape(item)
for stream in streams.values():
container = downloader.get_instant_availability(stream.infohash, item.type)
stream.is_cached = bool(container and container.cached)
log_string = item.log_string

return {
Expand Down Expand Up @@ -278,23 +276,25 @@ def get_info_hash(magnet: str) -> str:
session = session_manager.create_session(item_id or imdb_id, info_hash)

try:
torrent_id = downloader.add_torrent(info_hash)
torrent_info = downloader.get_torrent_info(torrent_id)
containers = downloader.get_instant_availability([session.magnet]).get(session.magnet, None)
session_manager.update_session(session.id, torrent_id=torrent_id, torrent_info=torrent_info, containers=containers)
torrent_id: str = downloader.add_torrent(info_hash)
torrent_info: TorrentInfo = downloader.get_torrent_info(torrent_id)
container: Optional[TorrentContainer] = downloader.get_instant_availability(info_hash, item.type)
session_manager.update_session(session.id, torrent_id=torrent_id, torrent_info=torrent_info, containers=container)
except Exception as e:
background_tasks.add_task(session_manager.abort_session, session.id)
raise HTTPException(status_code=500, detail=str(e))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Use exception chaining to preserve traceback

When re-raising an exception within an except block, use exception chaining to maintain the original traceback information.

Apply this fix:

- raise HTTPException(status_code=500, detail=str(e))
+ raise HTTPException(status_code=500, detail=str(e)) from e
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
raise HTTPException(status_code=500, detail=str(e))
raise HTTPException(status_code=500, detail=str(e)) from e
🧰 Tools
🪛 Ruff (0.8.0)

285-285: Within an except clause, raise exceptions with raise ... from err or raise ... from None to distinguish them from errors in exception handling

(B904)


return {
data = {
"message": "Started manual scraping session",
"session_id": session.id,
"torrent_id": torrent_id,
"torrent_info": torrent_info,
"containers": containers,
"containers": [container] if container else None,
"expires_at": session.expires_at.isoformat()
}

return StartSessionResponse(**data)

@router.post(
"/scrape/select_files/{session_id}",
summary="Select files for torrent id, for this to be instant it requires files to be one of /manual/instant_availability response containers",
Expand All @@ -307,7 +307,7 @@ def manual_select_files(request: Request, session_id, files: Container) -> Selec
raise HTTPException(status_code=404, detail="Session not found or expired")
if not session.torrent_id:
session_manager.abort_session(session_id)
raise HTTPException(status_code=500, detail="")
raise HTTPException(status_code=500, detail="No torrent ID found")

download_type = "uncached"
if files.model_dump() in session.containers:
Expand Down Expand Up @@ -336,7 +336,7 @@ async def manual_update_attributes(request: Request, session_id, data: Union[Con
raise HTTPException(status_code=404, detail="Session not found or expired")
if not session.item_id:
session_manager.abort_session(session_id)
raise HTTPException(status_code=500, detail="")
raise HTTPException(status_code=500, detail="No item ID found")

with db.Session() as db_session:
if str(session.item_id).startswith("tt") and not db_functions.get_item_by_external_id(imdb_id=session.item_id) and not db_functions.get_item_by_id(session.item_id):
Expand All @@ -357,8 +357,8 @@ async def manual_update_attributes(request: Request, session_id, data: Union[Con
item.reset()
item.file = data.filename
item.folder = data.filename
item.alternative_folder = session.torrent_info["original_filename"]
item.active_stream = {"infohash": session.magnet, "id": session.torrent_info["id"]}
item.alternative_folder = session.torrent_info.alternative_filename
item.active_stream = {"infohash": session.magnet, "id": session.torrent_info.id}
Comment on lines +360 to +361
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Ensure session.torrent_info is not None before accessing attributes

Accessing session.torrent_info.alternative_filename assumes that session.torrent_info is not None. Add a check to prevent potential AttributeError.

Apply this fix:

+ if session.torrent_info:
    item.alternative_folder = session.torrent_info.alternative_filename
    item.active_stream = {"infohash": session.magnet, "id": session.torrent_info.id}
+ else:
+   raise HTTPException(status_code=500, detail="Torrent info is missing in the session")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
item.alternative_folder = session.torrent_info.alternative_filename
item.active_stream = {"infohash": session.magnet, "id": session.torrent_info.id}
if session.torrent_info:
item.alternative_folder = session.torrent_info.alternative_filename
item.active_stream = {"infohash": session.magnet, "id": session.torrent_info.id}
else:
raise HTTPException(status_code=500, detail="Torrent info is missing in the session")

torrent = rtn.rank(session.magnet, session.magnet)
item.streams.append(ItemStream(torrent))
item_ids_to_submit.append(item.id)
Expand All @@ -377,8 +377,8 @@ async def manual_update_attributes(request: Request, session_id, data: Union[Con
item_episode.reset()
item_episode.file = episode_data.filename
item_episode.folder = episode_data.filename
item_episode.alternative_folder = session.torrent_info["original_filename"]
item_episode.active_stream = {"infohash": session.magnet, "id": session.torrent_info["id"]}
item_episode.alternative_folder = session.torrent_info.alternative_filename
item_episode.active_stream = {"infohash": session.magnet, "id": session.torrent_info.id}
Comment on lines +380 to +381
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add null check for session.torrent_info before accessing

Same as previous comment, ensure session.torrent_info is not None when accessing its attributes in the loop for episodes.

Apply this fix:

+ if session.torrent_info:
    item_episode.alternative_folder = session.torrent_info.alternative_filename
    item_episode.active_stream = {"infohash": session.magnet, "id": session.torrent_info.id}
+ else:
+   raise HTTPException(status_code=500, detail="Torrent info is missing in the session")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
item_episode.alternative_folder = session.torrent_info.alternative_filename
item_episode.active_stream = {"infohash": session.magnet, "id": session.torrent_info.id}
if session.torrent_info:
item_episode.alternative_folder = session.torrent_info.alternative_filename
item_episode.active_stream = {"infohash": session.magnet, "id": session.torrent_info.id}
else:
raise HTTPException(status_code=500, detail="Torrent info is missing in the session")

torrent = rtn.rank(session.magnet, session.magnet)
item_episode.streams.append(ItemStream(torrent))
item_ids_to_submit.append(item_episode.id)
Expand Down
Loading