Add main code.

2023-07-19 19:11:42 +02:00
parent 1273fefe12
commit 9ed044859b
4 changed files with 85 additions and 10 deletions
@@ -0,0 +1,53 @@
 # -*- encoding: utf-8 -*-
 import logging
 from pathlib import Path
 from scans.lelscans import LelScansFetcher
 # Folder containing the scans
 SCAN_FOLDER = Path("scans")
 # List of scan numbers to ignore
 IGNORE_NUMBERS = []  # [str(i) for i in range(1, 910 + 1)]
 def main():
    logging.basicConfig(level=logging.INFO)
    manga = "One Punch Man"  # "One Piece"
    SCAN_FOLDER.joinpath(manga).mkdir(exist_ok=True)
    fetcher = LelScansFetcher()
    one_piece = fetcher.find_manga(manga)
    assert one_piece is not None
    chapters = fetcher.list_chapters(one_piece)
    print(
        "Found {} scans from {} to {}... ".format(
            len(chapters), chapters[0].number, chapters[-1].number
        )
    )
    # check the scans that need to be downloaded
    for chapter in chapters:
        number = chapter.number
        # ignore the number
        if number in IGNORE_NUMBERS:
            continue
        folder = SCAN_FOLDER.joinpath(manga, number.replace(".", "_"))
        pdf = SCAN_FOLDER.joinpath(manga, "ops_{}.pdf".format(number))
        if pdf.exists():
            continue
        # check if the scan exists
        if not pdf.exists():
            fetcher.fetch_chapter(chapter, folder, pdf)
 if __name__ == "__main__":
    main()
@@ -52,9 +52,3 @@ warn_unused_configs = true
 [[tool.mypy.overrides]]
 module = "img2pdf.*"
 ignore_missing_imports = true
 [tool.pyright]
 # reportUnknownVariableType = false
 # reportMissingTypeStubs = false
 # reportUnknownMemberType = false
 # reportUnknownArgumentType = false
@@ -1,5 +1,6 @@
 # -*- encoding: utf-8 -*-
 import logging
 import tempfile
 import urllib.request
 from dataclasses import dataclass
@@ -8,7 +9,7 @@ from pathlib import Path
 from typing import cast
 from bs4 import BeautifulSoup
-from PIL import Image
+from PIL import Image, ImageFile
 from .scans import Chapter, Manga, ScanFetcher
@@ -21,6 +22,11 @@ _REQUEST_HEADERS = {
    )
 }
 # fix for some images
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 LOGGER = logging.getLogger(__package__)
@dataclass(frozen=True)
 class LelScansManga(Manga):
@@ -72,7 +78,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
        chapter: LelScansChapter,
        folder: Path,
    ) -> list[Path]:
-        print("Retrieving scan {}... ".format(chapter.number))
+        LOGGER.info("Retrieving scan {}... ".format(chapter.number))
        folder.mkdir(exist_ok=True)
        # retrieve the main page
@@ -92,7 +98,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
        # Download each page of the scan
        def retrieve_page(page: tuple[int, str]) -> Path | None:
            number, url = page
-            print("  Retrieving page {:02}/{:02d}".format(number, len(pages)))
+            LOGGER.info("  Retrieving page {:02}/{:02d}".format(number, len(pages)))
            soup = BeautifulSoup(
                self._request(url),
                "html.parser",
@@ -114,7 +120,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
            try:
                Image.open(filepath).convert("RGB").save(filepath)
            except (OSError, KeyError):
-                print(
+                LOGGER.warning(
                    "    Failed to convert page {:02}/{:02d}".format(number, len(pages))
                )
@@ -136,6 +142,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
            images = self._fetch_chapter(chapter, folder)
        if pdf is not None:
            LOGGER.info("Merging scan {}... ".format(chapter.number))
            self._merge_scan(pdf, images)
@@ -1,5 +1,6 @@
 # -*- encoding: utf-8 -*-
 import re
 from abc import abstractmethod
 from dataclasses import dataclass
 from pathlib import Path
@@ -34,6 +35,26 @@ class ScanFetcher(Generic[_MangaT, _ChapterT]):
        """
        pass
    def find_manga(self, name_regex: re.Pattern | str) -> _MangaT | None:
        """
        Fetch the list of manga and find the one that match the given regex.
        Args:
            name_regex: Regex to use to match name.
        Returns:
            The first manga found, or None if no manga was found.
        """
        if isinstance(name_regex, str):
            name_regex = re.compile(name_regex)
        mangas = self.list_mangas()
        for manga in mangas:
            if name_regex.search(manga.name):
                return manga
        return None
    @abstractmethod
    def list_chapters(self, manga: _MangaT) -> list[_ChapterT]:
        """