Files
decluttarr/src/jobs/remove_bad_files.py
NaruZosa 2e6973bea4 Added sigterm handling to exit cleanly when running in Docker.
Fixed typos and various linting issues such as PEP violations.
Added ruff and fixed common issues and linting issues.
2025-05-25 16:54:51 +10:00

205 lines
8.4 KiB
Python

from pathlib import Path
from src.jobs.removal_job import RemovalJob
from src.utils.log_setup import logger
good_extensions = [
# Movies, TV Shows - Radarr, Sonarr, Whisparr
".webm", ".m4v", ".3gp", ".nsv", ".ty", ".strm", ".rm", ".rmvb", ".m3u", ".ifo", ".mov", ".qt", ".divx", ".xvid", ".bivx", ".nrg", ".pva", ".wmv", ".asf", ".asx", ".ogm", ".ogv", ".m2v", ".avi", ".bin", ".dat", ".dvr-ms", ".mpg", ".mpeg", ".mp4", ".avc", ".vp3", ".svq3", ".nuv", ".viv", ".dv", ".fli", ".flv", ".wpl", ".img", ".iso", ".vob", ".mkv", ".mk3d", ".ts", ".wtv", ".m2ts",
# Subs - Radarr, Sonarr, Whisparr
".sub", ".srt", ".idx",
# Audio - Lidarr, Readarr
".aac", ".aif", ".aiff", ".aifc", ".ape", ".flac", ".mp2", ".mp3", ".m4a", ".m4b", ".m4p", ".mp4a", ".oga", ".ogg", ".opus", ".vorbis", ".wma", ".wav", ".wv", "wavepack",
# Text - Readarr
".epub", ".kepub", ".mobi", ".azw3", ".pdf",
]
bad_keywords = ["Sample", "Trailer"]
class RemoveBadFiles(RemovalJob):
queue_scope = "normal"
blocklist = True
# fmt: off
bad_keyword_limit = 500 # Megabyte; do not remove items larger than that
# fmt: on
async def _find_affected_items(self):
queue = await self.queue_manager.get_queue_items(queue_scope="normal")
# Get in-scope download IDs
result = self._group_download_ids_by_client(queue)
affected_items = []
for download_client, info in result.items():
download_client_type = info["download_client_type"]
download_ids = info["download_ids"]
if download_client_type == "qbittorrent":
client_items = await self._handle_qbit(download_client, download_ids, queue)
affected_items.extend(client_items)
return affected_items
def _group_download_ids_by_client(self, queue):
"""
Group all relevant download IDs by download client.
Limited to qbittorrent currently, as no other download clients implemented
"""
result = {}
for item in queue:
download_client_name = item.get("downloadClient")
if not download_client_name:
continue
download_client, download_client_type = self.settings.download_clients.get_download_client_by_name(download_client_name)
if not download_client or not download_client_type:
continue
# Skip non-qBittorrent clients for now
if download_client_type != "qbittorrent":
continue
result.setdefault(download_client, {
"download_client_type": download_client_type,
"download_ids": set(),
})["download_ids"].add(item["downloadId"])
return result
async def _handle_qbit(self, qbit_client, hashes, queue):
"""Handle qBittorrent-specific logic for marking files as 'Do Not Download'."""
affected_items = []
qbit_items = await qbit_client.get_qbit_items(hashes=hashes)
for qbit_item in self._get_items_to_process(qbit_items):
self.arr.tracker.extension_checked.append(qbit_item["hash"])
torrent_files = await self._get_active_files(qbit_client, qbit_item["hash"])
stoppable_files = self._get_stoppable_files(torrent_files)
if not stoppable_files:
continue
await self._mark_files_as_stopped(qbit_client, qbit_item["hash"], stoppable_files)
self._log_stopped_files(stoppable_files, qbit_item["name"])
if self._all_files_stopped(torrent_files, stoppable_files):
logger.verbose(">>> All files in this torrent have been marked as 'Do not Download'. Removing torrent.")
affected_items.extend(self._match_queue_items(queue, qbit_item["hash"]))
return affected_items
# -- Helper functions for qbit handling --
def _get_items_to_process(self, qbit_items):
"""
Return only downloads that have metadata, are supposedly downloading.
This is to prevent the case where a download has metadata but is not actually downloading.
Additionally, each download should be checked at least once (for bad extensions), and thereafter only if availability drops to less than 100%
"""
return [
item for item in qbit_items
if (
item.get("has_metadata")
and item["state"] in {"downloading", "forcedDL", "stalledDL"}
and (
item["hash"] not in self.arr.tracker.extension_checked
or item["availability"] < 1
)
)
]
@staticmethod
async def _get_active_files(qbit_client, torrent_hash) -> list[dict]:
"""Return only files from the torrent that are still set to download, with file extension and name."""
files = await qbit_client.get_torrent_files(torrent_hash) # Await the async method
return [
{
**f, # Include all original file properties
"file_name": Path(f["name"]).name, # Add proper filename (without folder)
"file_extension": Path(f["name"]).suffix, # Add file_extension (e.g., .mp3)
}
for f in files if f["priority"] > 0
]
@staticmethod
def _log_stopped_files(stopped_files, torrent_name) -> None:
logger.verbose(
f">>> Stopped downloading {len(stopped_files)} file{'s' if len(stopped_files) != 1 else ''} in: {torrent_name}",
)
for file, reasons in stopped_files:
logger.verbose(f">>> - {file['file_name']} ({' & '.join(reasons)})")
def _get_stoppable_files(self, torrent_files):
"""Return files that can be marked as 'Do not Download' based on specific conditions."""
stoppable_files = []
for file in torrent_files:
# If the file has metadata and its priority is greater than 0, we can check it
if file["priority"] > 0:
reasons = []
# Check for bad extension
if self._is_bad_extension(file):
reasons.append(f"Bad extension: {file['file_extension']}")
# Check for bad keywords
if self._contains_bad_keyword(file):
reasons.append("Contains bad keyword in path")
# Check if the file has low availability
if self._is_complete_partial(file):
reasons.append(f"Low availability: {file['availability'] * 100:.1f}%")
# Only add to stoppable_files if there are reasons to stop the file
if reasons:
stoppable_files.append((file, reasons))
return stoppable_files
@staticmethod
def _is_bad_extension(file) -> bool:
"""Check if the file has a bad extension."""
return file["file_extension"].lower() not in good_extensions
def _contains_bad_keyword(self, file):
"""Check if the file path contains a bad keyword and is smaller than the limit."""
file_path = file.get("name", "").lower()
file_size_mb = file.get("size", 0) / 1024 / 1024
return (
any(keyword.lower() in file_path for keyword in bad_keywords)
and file_size_mb <= self.bad_keyword_limit
)
@staticmethod
def _is_complete_partial(file) -> bool:
"""Check if the availability is less than 100% and the file is not fully downloaded."""
return bool(file["availability"] < 1 and file["progress"] != 1)
async def _mark_files_as_stopped(self, qbit_client, torrent_hash, stoppable_files):
"""Mark specific files as 'Do Not Download' in qBittorrent."""
for file, _ in stoppable_files:
if not self.settings.general.test_run:
await qbit_client.set_torrent_file_priority(torrent_hash, file["index"], 0)
@staticmethod
def _all_files_stopped(torrent_files, stoppable_files) -> bool:
"""Check if all files are either stopped (priority 0) or in the stoppable files list."""
stoppable_file_indexes = {file[0]["index"] for file in stoppable_files}
return all(f["priority"] == 0 or f["index"] in stoppable_file_indexes for f in torrent_files)
@staticmethod
def _match_queue_items(queue, download_hash) -> list:
"""Find matching queue item(s) by downloadId (uppercase)."""
return [
item for item in queue
if item["downloadId"].upper() == download_hash.upper()
]