diff --git a/src/holt59/scans/__main__.py b/src/holt59/scans/__main__.py index 9fa97ec..a40a32f 100644 --- a/src/holt59/scans/__main__.py +++ b/src/holt59/scans/__main__.py @@ -9,13 +9,6 @@ from .scans import Chapter, Manga, ScanFetcher _MangaT = TypeVar("_MangaT", bound=Manga) _ChapterT = TypeVar("_ChapterT", bound=Chapter) -# List of scan numbers to ignore -IGNORE_NUMBERS: dict[str, set[str]] = { - "One Piece": {str(i) for i in range(910 + 1)}, - "Solo Leveling": set(), - "Hunter X Hunter": {str(i) for i in range(390 + 1)} | {"357.1"}, -} - def list_mangas(fetcher: ScanFetcher[_MangaT, _ChapterT]): for manga in fetcher.list_mangas(): @@ -27,11 +20,16 @@ def list_chapters(fetcher: ScanFetcher[_MangaT, _ChapterT], name: str): assert manga is not None chapters = fetcher.list_chapters(manga) - for chapter in chapters: - print(chapter) + print( + "Found {} chapters for {}: {}".format( + len(chapters), name, ", ".join(chapter.number for chapter in chapters) + ) + ) -def fetch_chapters(fetcher: ScanFetcher[_MangaT, _ChapterT], name: str, folder: Path): +def fetch_chapters( + fetcher: ScanFetcher[_MangaT, _ChapterT], name: str, folder: Path, ignore: set[str] +): manga = fetcher.find_manga(name) assert manga is not None @@ -47,7 +45,7 @@ def fetch_chapters(fetcher: ScanFetcher[_MangaT, _ChapterT], name: str, folder: number = chapter.number # ignore the number - if number in IGNORE_NUMBERS.get(name, set()): + if number in ignore: continue pdf = folder.joinpath("ops_{}.pdf".format(number)) @@ -62,6 +60,21 @@ def fetch_chapters(fetcher: ScanFetcher[_MangaT, _ChapterT], name: str, folder: ) +def parse_ignore(value: str) -> set[str]: + ignore: set[str] = set() + + for part in value.split(","): + part = part.strip() + try: + start, end = (int(p) for p in part.split("-")) + for number in range(start, end + 1): + ignore.add(str(number)) + except ValueError: + ignore.add(part) + + return ignore + + def main() -> None: parser = argparse.ArgumentParser("Manga Scan Fetcher") parser.add_argument( @@ -70,6 +83,7 @@ def main() -> None: sub_parsers = parser.add_subparsers(dest="command") fetch_parser = sub_parsers.add_parser("fetch") + fetch_parser.add_argument("-i", "--ignore", type=str, required=False, default="") fetch_parser.add_argument("-o", "--output", type=Path, required=False, default=None) fetch_parser.add_argument("manga", type=str) @@ -99,11 +113,12 @@ def main() -> None: case "fetch": manga = args.manga folder: Path | None = args.output + ignore: str = args.ignore if folder is None: folder = Path("scans", manga) folder.mkdir(exist_ok=True) - fetch_chapters(fetcher, manga, folder) + fetch_chapters(fetcher, manga, folder, parse_ignore(ignore)) if __name__ == "__main__": diff --git a/src/holt59/scans/lelscans.py b/src/holt59/scans/lelscans.py index cf2f946..04c7b7d 100644 --- a/src/holt59/scans/lelscans.py +++ b/src/holt59/scans/lelscans.py @@ -2,6 +2,7 @@ import logging import tempfile +import urllib.error import urllib.request from dataclasses import dataclass from multiprocessing.pool import ThreadPool @@ -41,7 +42,6 @@ class LelScansChapter(Chapter): class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]): def _request(self, url: str) -> bytes | str: request = urllib.request.Request(url=url, headers=_REQUEST_HEADERS) - return cast(bytes | str, urllib.request.urlopen(request).read()) def list_mangas(self) -> list[LelScansManga]: @@ -110,7 +110,15 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]): .attrs["src"] .strip() ) - data = self._request(url) + try: + data = self._request(url) + except urllib.error.HTTPError: + LOGGER.warning( + " Failed to retrieve page {:02}/{:02d} from {}.".format( + number, len(pages), url + ) + ) + return None filepath = folder.joinpath(url.split("/")[-1].split("?")[0]) with open(filepath, "wb") as fp: