diff --git a/Writerside/topics/Indexer-Settings.md b/Writerside/topics/Indexer-Settings.md index de3f56a..47712e0 100644 --- a/Writerside/topics/Indexer-Settings.md +++ b/Writerside/topics/Indexer-Settings.md @@ -1,6 +1,7 @@ # Indexers -Indexer settings are configured in the `[indexers]` section of your `config.toml` file. MediaManager supports both Prowlarr and Jackett as indexer providers. +Indexer settings are configured in the `[indexers]` section of your `config.toml` file. MediaManager supports both +Prowlarr and Jackett as indexer providers. ## Prowlarr (`[indexers.prowlarr]`) @@ -16,6 +17,12 @@ Base URL of your Prowlarr instance. API key for Prowlarr. You can find this in Prowlarr's settings under General. +- `reject_torrents_on_url_error` + +Set to `true` to reject torrents if there is a URL error when fetching from Prowlarr. Until MediaManager v1.9.0 the +default behavior was `false`, but from v1.9.0 onwards the default is `true`. It's recommended to set this to `true` to +avoid adding possibly invalid torrents. + ## Jackett (`[indexers.jackett]`) - `enabled` @@ -40,14 +47,14 @@ Here's a complete example of the indexers section in your `config.toml`: ```toml [indexers] - [indexers.prowlarr] - enabled = true - url = "http://prowlarr:9696" - api_key = "your_prowlarr_api_key" +[indexers.prowlarr] +enabled = true +url = "http://prowlarr:9696" +api_key = "your_prowlarr_api_key" - [indexers.jackett] - enabled = false - url = "http://jackett:9117" - api_key = "your_jackett_api_key" - indexers = ["1337x", "rarbg"] +[indexers.jackett] +enabled = false +url = "http://jackett:9117" +api_key = "your_jackett_api_key" +indexers = ["1337x", "rarbg"] ``` diff --git a/config.dev.toml b/config.dev.toml index e3e5bc8..50af4de 100644 --- a/config.dev.toml +++ b/config.dev.toml @@ -119,6 +119,7 @@ base_path = "/api" enabled = false url = "http://localhost:9696" api_key = "" +reject_torrents_on_url_error = true # Jackett settings [indexers.jackett] diff --git a/config.example.toml b/config.example.toml index d828b72..4498289 100644 --- a/config.example.toml +++ b/config.example.toml @@ -119,6 +119,7 @@ base_path = "/api" enabled = false url = "http://localhost:9696" api_key = "" +reject_torrents_on_url_error = true # Jackett settings [indexers.jackett] diff --git a/media_manager/indexer/config.py b/media_manager/indexer/config.py index db16423..04e8063 100644 --- a/media_manager/indexer/config.py +++ b/media_manager/indexer/config.py @@ -5,6 +5,7 @@ class ProwlarrConfig(BaseSettings): enabled: bool = False api_key: str = "" url: str = "http://localhost:9696" + reject_torrents_on_url_error: bool = True class JackettConfig(BaseSettings): diff --git a/media_manager/indexer/indexers/jackett.py b/media_manager/indexer/indexers/jackett.py index f495145..a03d212 100644 --- a/media_manager/indexer/indexers/jackett.py +++ b/media_manager/indexer/indexers/jackett.py @@ -1,5 +1,7 @@ +import concurrent import logging import xml.etree.ElementTree as ET +from concurrent.futures.thread import ThreadPoolExecutor from xml.etree.ElementTree import Element import requests @@ -26,67 +28,84 @@ class Jackett(GenericIndexer): # NOTE: this could be done in parallel, but if there aren't more than a dozen indexers, it shouldn't matter def search(self, query: str, is_tv: bool) -> list[IndexerQueryResult]: + log.debug("Searching for " + query) + + futures = [] + with ThreadPoolExecutor() as executor, requests.Session() as session: + for indexer in self.indexers: + future = executor.submit( + self.get_torrents_by_indexer, indexer, query, is_tv, session + ) + futures.append(future) + + responses = [] + + for future in concurrent.futures.as_completed(futures): + responses.extend(future.result()) + + return responses + + def get_torrents_by_indexer( + self, indexer: str, query: str, is_tv: bool, session: requests.Session + ) -> list[IndexerQueryResult]: download_volume_factor = 1.0 # Default value upload_volume_factor = 1 # Default value seeders = 0 # Default value - log.debug("Searching for " + query) - responses = [] - for indexer in self.indexers: - log.debug(f"Searching in indexer: {indexer}") - url = ( - self.url - + f"/api/v2.0/indexers/{indexer}/results/torznab/api?apikey={self.api_key}&t={'tvsearch' if is_tv else 'movie'}&q={query}" + url = ( + self.url + + f"/api/v2.0/indexers/{indexer}/results/torznab/api?apikey={self.api_key}&t={'tvsearch' if is_tv else 'movie'}&q={query}" + ) + response = session.get(url) + + if response.status_code != 200: + log.error( + f"Jacket error with indexer {indexer}, error: {response.status_code}" ) - response = requests.get(url) - responses.append(response) + return [] + result_list: list[IndexerQueryResult] = [] + xml_tree = ET.fromstring(response.content) xmlns = { "torznab": "http://torznab.com/schemas/2015/feed", "atom": "http://www.w3.org/2005/Atom", } - result_list: list[IndexerQueryResult] = [] - for response in responses: - if response.status_code == 200: - xml_tree = ET.fromstring(response.content) - for item in xml_tree.findall("channel/item"): - attributes: list[Element] = [ - x for x in item.findall("torznab:attr", xmlns) - ] - for attribute in attributes: - if attribute.attrib["name"] == "seeders": - seeders = int(attribute.attrib["value"]) - if attribute.attrib["name"] == "downloadvolumefactor": - download_volume_factor = float(attribute.attrib["value"]) - if attribute.attrib["name"] == "uploadvolumefactor": - upload_volume_factor = int(attribute.attrib["value"]) - flags = [] - if download_volume_factor == 0: - flags.append("freeleech") - if download_volume_factor == 0.5: - flags.append("halfleech") - if download_volume_factor == 0.75: - flags.append("freeleech75") - if download_volume_factor == 0.25: - flags.append("freeleech25") - if upload_volume_factor == 2: - flags.append("doubleupload") + for item in xml_tree.findall("channel/item"): + attributes: list[Element] = [x for x in item.findall("torznab:attr", xmlns)] + for attribute in attributes: + if attribute.attrib["name"] == "seeders": + seeders = int(attribute.attrib["value"]) + if attribute.attrib["name"] == "downloadvolumefactor": + download_volume_factor = float(attribute.attrib["value"]) + if attribute.attrib["name"] == "uploadvolumefactor": + upload_volume_factor = int(attribute.attrib["value"]) + flags = [] + if download_volume_factor == 0: + flags.append("freeleech") + if download_volume_factor == 0.5: + flags.append("halfleech") + if download_volume_factor == 0.75: + flags.append("freeleech75") + if download_volume_factor == 0.25: + flags.append("freeleech25") + if upload_volume_factor == 2: + flags.append("doubleupload") - result = IndexerQueryResult( - title=item.find("title").text, - download_url=str(item.find("enclosure").attrib["url"]), - seeders=seeders, - flags=flags, - size=int(item.find("size").text), - usenet=False, # always False, because Jackett doesn't support usenet - age=0, # always 0 for torrents, as Jackett does not provide age information in a convenient format - indexer=item.find("jackettindexer").text - if item.find("jackettindexer") is not None - else None, - ) - result_list.append(result) - log.debug(f"Raw result: {result.model_dump()}") - else: - log.error(f"Jacket Error: {response.status_code}") - return [] + result = IndexerQueryResult( + title=item.find("title").text, + download_url=str(item.find("enclosure").attrib["url"]), + seeders=seeders, + flags=flags, + size=int(item.find("size").text), + usenet=False, # always False, because Jackett doesn't support usenet + age=0, # always 0 for torrents, as Jackett does not provide age information in a convenient format + indexer=item.find("jackettindexer").text + if item.find("jackettindexer") is not None + else None, + ) + result_list.append(result) + + log.info( + f"found {len(result_list)} results for query '{query}' from indexer '{indexer}'" + ) return result_list diff --git a/media_manager/indexer/indexers/prowlarr.py b/media_manager/indexer/indexers/prowlarr.py index 2311c7a..a22375d 100644 --- a/media_manager/indexer/indexers/prowlarr.py +++ b/media_manager/indexer/indexers/prowlarr.py @@ -1,6 +1,9 @@ +import concurrent import logging +from concurrent.futures import ThreadPoolExecutor import requests +from requests.adapters import HTTPAdapter from media_manager.indexer.indexers.generic import GenericIndexer from media_manager.config import AllEncompassingConfig @@ -22,6 +25,7 @@ class Prowlarr(GenericIndexer): config = AllEncompassingConfig().indexers.prowlarr self.api_key = config.api_key self.url = config.url + self.reject_torrents_on_url_error = config.reject_torrents_on_url_error log.debug("Registering Prowlarr as Indexer") def search(self, query: str, is_tv: bool) -> list[IndexerQueryResult]: @@ -34,70 +38,89 @@ class Prowlarr(GenericIndexer): "categories": "5000" if is_tv else "2000", # TV: 5000, Movies: 2000 "limit": 10000, } + with requests.Session() as session: + adapter = HTTPAdapter(pool_connections=100, pool_maxsize=100) + session.mount("http://", adapter) + session.mount("https://", adapter) - response = requests.get(url, params=params) - if response.status_code == 200: + response = session.get(url, params=params) + log.debug(f"Prowlarr response time for query '{query}': {response.elapsed}") + + if response.status_code != 200: + log.error(f"Prowlarr Error: {response.status_code}") + return [] + + futures = [] result_list: list[IndexerQueryResult] = [] - for result in response.json(): - if result["protocol"] == "torrent": - initial_url = None - if "downloadUrl" in result: - log.info(f"Using download URL: {result['downloadUrl']}") - initial_url = result["downloadUrl"] - elif "magnetUrl" in result: - log.info( - f"Using magnet URL as fallback for download URL: {result['magnetUrl']}" - ) - initial_url = result["magnetUrl"] - elif "guid" in result: - log.warning( - f"Using guid as fallback for download URL: {result['guid']}" - ) - initial_url = result["guid"] - else: - log.error(f"No valid download URL found for result: {result}") - continue - if not initial_url.startswith("magnet:"): - try: - final_download_url = follow_redirects_to_final_torrent_url( - initial_url=initial_url - ) - except RuntimeError as e: - log.error( - f"Failed to follow redirects for {initial_url}, falling back to the initial url as download url, error: {e}" - ) - final_download_url = initial_url - else: - final_download_url = initial_url - result_list.append( - IndexerQueryResult( - download_url=final_download_url, - title=result["sortTitle"], - seeders=result["seeders"], - flags=result["indexerFlags"], - size=result["size"], - usenet=False, - age=0, # Torrent results do not need age information - indexer=result["indexer"] if "indexer" in result else None, - ) - ) - else: - result_list.append( - IndexerQueryResult( - download_url=result["downloadUrl"], - title=result["sortTitle"], - seeders=0, # Usenet results do not have seeders - flags=result["indexerFlags"], - size=result["size"], - usenet=True, - age=int(result["ageMinutes"]) * 60, - indexer=result["indexer"] if "indexer" in result else None, - ) - ) - log.debug("torrent result: " + result.__str__()) + with ThreadPoolExecutor() as executor: + for item in response.json(): + future = executor.submit(self.process_result, item, session) + futures.append(future) + + for future in concurrent.futures.as_completed(futures): + result = future.result() + if result is not None: + result_list.append(result) return result_list + + def process_result( + self, result, session: requests.Session + ) -> IndexerQueryResult | None: + if result["protocol"] == "torrent": + initial_url = None + if "downloadUrl" in result: + log.info(f"Using download URL: {result['downloadUrl']}") + initial_url = result["downloadUrl"] + elif "magnetUrl" in result: + log.info( + f"Using magnet URL as fallback for download URL: {result['magnetUrl']}" + ) + initial_url = result["magnetUrl"] + elif "guid" in result: + log.warning( + f"Using guid as fallback for download URL: {result['guid']}" + ) + initial_url = result["guid"] + else: + log.error(f"No valid download URL found for result: {result}") + return None + + if not initial_url.startswith("magnet:"): + try: + final_download_url = follow_redirects_to_final_torrent_url( + initial_url=initial_url, + session=session, + ) + except RuntimeError as e: + log.debug( + f"Failed to follow redirects for {initial_url}, falling back to the initial url as download url, error: {e}" + ) + if self.reject_torrents_on_url_error: + return None + else: + final_download_url = initial_url + else: + final_download_url = initial_url + return IndexerQueryResult( + download_url=final_download_url, + title=result["sortTitle"], + seeders=result["seeders"], + flags=result["indexerFlags"], + size=result["size"], + usenet=False, + age=0, # Torrent results do not need age information + indexer=result["indexer"] if "indexer" in result else None, + ) else: - log.error(f"Prowlarr Error: {response.status_code}") - return [] + return IndexerQueryResult( + download_url=result["downloadUrl"], + title=result["sortTitle"], + seeders=0, # Usenet results do not have seeders + flags=result["indexerFlags"], + size=result["size"], + usenet=True, + age=int(result["ageMinutes"]) * 60, + indexer=result["indexer"] if "indexer" in result else None, + ) diff --git a/media_manager/indexer/utils.py b/media_manager/indexer/utils.py index f933be7..a2c6dd2 100644 --- a/media_manager/indexer/utils.py +++ b/media_manager/indexer/utils.py @@ -1,4 +1,5 @@ import logging +from urllib.parse import urljoin import requests @@ -111,57 +112,41 @@ def evaluate_indexer_query_results( return query_results -def follow_redirects_to_final_torrent_url(initial_url: str) -> str | None: +def follow_redirects_to_final_torrent_url( + initial_url: str, session: requests.Session, timeout: float = 10 +) -> str: """ Follows redirects to get the final torrent URL. :param initial_url: The initial URL to follow. - :return: The final torrent URL or None if it fails. + :param session: A requests session to use for the requests. + :param timeout: Timeout in seconds for each redirect request. + :return: The final torrent URL. + :raises: RuntimeError if it fails. """ current_url = initial_url - final_url = None try: - while True: - response = requests.get(current_url, allow_redirects=False) + for _ in range(10): # Limit redirects to prevent infinite loops + response = session.get(current_url, allow_redirects=False, timeout=timeout) if 300 <= response.status_code < 400: redirect_url = response.headers.get("Location") - if redirect_url.startswith("http://") or redirect_url.startswith( - "https://" - ): - # It's an HTTP/HTTPS redirect, continue following - current_url = redirect_url - log.info(f"Following HTTP/HTTPS redirect to: {current_url}") - elif redirect_url.startswith("magnet:"): - # It's a Magnet URL, this is our final destination - final_url = redirect_url - log.info(f"Reached Magnet URL: {final_url}") - break - else: - log.error( - f"Reached unexpected non-HTTP/HTTPS/magnet URL: {redirect_url}" - ) - raise RuntimeError( - f"Reached unexpected non-HTTP/HTTPS/magnet URL: {redirect_url}" - ) - else: - # Not a redirect, so the current URL is the final one - final_url = current_url - log.info(f"Reached final (non-redirect) URL: {final_url}") - break - except requests.exceptions.RequestException as e: - log.error(f"An error occurred during the request: {e}") - raise RuntimeError(f"An error occurred during the request: {e}") - if not final_url: - log.error("Final URL could not be determined.") - raise RuntimeError("Final URL could not be determined.") - if final_url.startswith("http://") or final_url.startswith("https://"): - log.info("Final URL protocol: HTTP/HTTPS") - elif final_url.startswith("magnet:"): - log.info("Final URL protocol: Magnet") - else: - log.error(f"Final URL is not a valid HTTP/HTTPS or Magnet URL: {final_url}") - raise RuntimeError( - f"Final URL is not a valid HTTP/HTTPS or Magnet URL: {final_url}" - ) + if not redirect_url: + raise RuntimeError("Redirect response without Location header") - return final_url + # Resolve relative redirects against the last URL + current_url = urljoin(current_url, redirect_url) + log.debug(f"Following redirect to: {current_url}") + + if current_url.startswith("magnet:"): + return current_url + else: + response.raise_for_status() # Raise an exception for bad status codes + return current_url + else: + raise RuntimeError("Exceeded maximum number of redirects") + + except requests.exceptions.RequestException as e: + log.debug(f"An error occurred during the request for {initial_url}: {e}") + raise RuntimeError(f"An error occurred during the request: {e}") from e + + return current_url diff --git a/media_manager/main.py b/media_manager/main.py index 287a4bb..4e4d941 100644 --- a/media_manager/main.py +++ b/media_manager/main.py @@ -44,6 +44,13 @@ logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(funcName)s(): %(message)s", stream=sys.stdout, ) + +logging.getLogger("urllib3").setLevel(logging.WARNING) +logging.getLogger("requests").setLevel(logging.WARNING) +logging.getLogger("transmission_rpc").setLevel(logging.WARNING) +logging.getLogger("qbittorrentapi").setLevel(logging.WARNING) +logging.getLogger("sabnzbd_api").setLevel(logging.WARNING) + log = logging.getLogger(__name__) from psycopg.errors import UniqueViolation # noqa: E402 diff --git a/media_manager/torrent/download_clients/transmission.py b/media_manager/torrent/download_clients/transmission.py index 03b89cf..48ac85b 100644 --- a/media_manager/torrent/download_clients/transmission.py +++ b/media_manager/torrent/download_clients/transmission.py @@ -36,7 +36,6 @@ class TransmissionDownloadClient(AbstractDownloadClient): password=self.config.password, protocol="https" if self.config.https_enabled else "http", path=self.config.path, - logger=log, ) # Test connection self._client.session_stats()