From 9ed044859bb76c00482e968a24c4aed6285a6276 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mika=C3=ABl=20Capelle?= Date: Wed, 19 Jul 2023 19:11:42 +0200 Subject: [PATCH] Add main code. --- fetch_scans.py | 53 +++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 6 ----- src/scans/lelscans.py | 15 ++++++++---- src/scans/scans.py | 21 +++++++++++++++++ 4 files changed, 85 insertions(+), 10 deletions(-) create mode 100644 fetch_scans.py diff --git a/fetch_scans.py b/fetch_scans.py new file mode 100644 index 0000000..89de34e --- /dev/null +++ b/fetch_scans.py @@ -0,0 +1,53 @@ +# -*- encoding: utf-8 -*- + +import logging +from pathlib import Path + +from scans.lelscans import LelScansFetcher + +# Folder containing the scans +SCAN_FOLDER = Path("scans") + +# List of scan numbers to ignore +IGNORE_NUMBERS = [] # [str(i) for i in range(1, 910 + 1)] + + +def main(): + logging.basicConfig(level=logging.INFO) + + manga = "One Punch Man" # "One Piece" + SCAN_FOLDER.joinpath(manga).mkdir(exist_ok=True) + + fetcher = LelScansFetcher() + + one_piece = fetcher.find_manga(manga) + assert one_piece is not None + + chapters = fetcher.list_chapters(one_piece) + print( + "Found {} scans from {} to {}... ".format( + len(chapters), chapters[0].number, chapters[-1].number + ) + ) + + # check the scans that need to be downloaded + for chapter in chapters: + number = chapter.number + + # ignore the number + if number in IGNORE_NUMBERS: + continue + + folder = SCAN_FOLDER.joinpath(manga, number.replace(".", "_")) + pdf = SCAN_FOLDER.joinpath(manga, "ops_{}.pdf".format(number)) + + if pdf.exists(): + continue + + # check if the scan exists + if not pdf.exists(): + fetcher.fetch_chapter(chapter, folder, pdf) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index a19973f..c4f9a8c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,9 +52,3 @@ warn_unused_configs = true [[tool.mypy.overrides]] module = "img2pdf.*" ignore_missing_imports = true - -[tool.pyright] -# reportUnknownVariableType = false -# reportMissingTypeStubs = false -# reportUnknownMemberType = false -# reportUnknownArgumentType = false diff --git a/src/scans/lelscans.py b/src/scans/lelscans.py index 844a5f8..0db00fd 100644 --- a/src/scans/lelscans.py +++ b/src/scans/lelscans.py @@ -1,5 +1,6 @@ # -*- encoding: utf-8 -*- +import logging import tempfile import urllib.request from dataclasses import dataclass @@ -8,7 +9,7 @@ from pathlib import Path from typing import cast from bs4 import BeautifulSoup -from PIL import Image +from PIL import Image, ImageFile from .scans import Chapter, Manga, ScanFetcher @@ -21,6 +22,11 @@ _REQUEST_HEADERS = { ) } +# fix for some images +ImageFile.LOAD_TRUNCATED_IMAGES = True + +LOGGER = logging.getLogger(__package__) + @dataclass(frozen=True) class LelScansManga(Manga): @@ -72,7 +78,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]): chapter: LelScansChapter, folder: Path, ) -> list[Path]: - print("Retrieving scan {}... ".format(chapter.number)) + LOGGER.info("Retrieving scan {}... ".format(chapter.number)) folder.mkdir(exist_ok=True) # retrieve the main page @@ -92,7 +98,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]): # Download each page of the scan def retrieve_page(page: tuple[int, str]) -> Path | None: number, url = page - print(" Retrieving page {:02}/{:02d}".format(number, len(pages))) + LOGGER.info(" Retrieving page {:02}/{:02d}".format(number, len(pages))) soup = BeautifulSoup( self._request(url), "html.parser", @@ -114,7 +120,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]): try: Image.open(filepath).convert("RGB").save(filepath) except (OSError, KeyError): - print( + LOGGER.warning( " Failed to convert page {:02}/{:02d}".format(number, len(pages)) ) @@ -136,6 +142,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]): images = self._fetch_chapter(chapter, folder) if pdf is not None: + LOGGER.info("Merging scan {}... ".format(chapter.number)) self._merge_scan(pdf, images) diff --git a/src/scans/scans.py b/src/scans/scans.py index 20e250c..1bc6af1 100644 --- a/src/scans/scans.py +++ b/src/scans/scans.py @@ -1,5 +1,6 @@ # -*- encoding: utf-8 -*- +import re from abc import abstractmethod from dataclasses import dataclass from pathlib import Path @@ -34,6 +35,26 @@ class ScanFetcher(Generic[_MangaT, _ChapterT]): """ pass + def find_manga(self, name_regex: re.Pattern | str) -> _MangaT | None: + """ + Fetch the list of manga and find the one that match the given regex. + + Args: + name_regex: Regex to use to match name. + + Returns: + The first manga found, or None if no manga was found. + """ + if isinstance(name_regex, str): + name_regex = re.compile(name_regex) + + mangas = self.list_mangas() + for manga in mangas: + if name_regex.search(manga.name): + return manga + + return None + @abstractmethod def list_chapters(self, manga: _MangaT) -> list[_ChapterT]: """