From 9ed044859bb76c00482e968a24c4aed6285a6276 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mika=C3=ABl=20Capelle?= <capelle.mikael@gmail.com>
Date: Wed, 19 Jul 2023 19:11:42 +0200
Subject: [PATCH] Add main code.

---
 fetch_scans.py        | 53 +++++++++++++++++++++++++++++++++++++++++++
 pyproject.toml        |  6 -----
 src/scans/lelscans.py | 15 ++++++++----
 src/scans/scans.py    | 21 +++++++++++++++++
 4 files changed, 85 insertions(+), 10 deletions(-)
 create mode 100644 fetch_scans.py

diff --git a/fetch_scans.py b/fetch_scans.py
new file mode 100644
index 0000000..89de34e
--- /dev/null
+++ b/fetch_scans.py
@@ -0,0 +1,53 @@
+# -*- encoding: utf-8 -*-
+
+import logging
+from pathlib import Path
+
+from scans.lelscans import LelScansFetcher
+
+# Folder containing the scans
+SCAN_FOLDER = Path("scans")
+
+# List of scan numbers to ignore
+IGNORE_NUMBERS = []  # [str(i) for i in range(1, 910 + 1)]
+
+
+def main():
+    logging.basicConfig(level=logging.INFO)
+
+    manga = "One Punch Man"  # "One Piece"
+    SCAN_FOLDER.joinpath(manga).mkdir(exist_ok=True)
+
+    fetcher = LelScansFetcher()
+
+    one_piece = fetcher.find_manga(manga)
+    assert one_piece is not None
+
+    chapters = fetcher.list_chapters(one_piece)
+    print(
+        "Found {} scans from {} to {}... ".format(
+            len(chapters), chapters[0].number, chapters[-1].number
+        )
+    )
+
+    # check the scans that need to be downloaded
+    for chapter in chapters:
+        number = chapter.number
+
+        # ignore the number
+        if number in IGNORE_NUMBERS:
+            continue
+
+        folder = SCAN_FOLDER.joinpath(manga, number.replace(".", "_"))
+        pdf = SCAN_FOLDER.joinpath(manga, "ops_{}.pdf".format(number))
+
+        if pdf.exists():
+            continue
+
+        # check if the scan exists
+        if not pdf.exists():
+            fetcher.fetch_chapter(chapter, folder, pdf)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
index a19973f..c4f9a8c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -52,9 +52,3 @@ warn_unused_configs = true
 [[tool.mypy.overrides]]
 module = "img2pdf.*"
 ignore_missing_imports = true
-
-[tool.pyright]
-# reportUnknownVariableType = false
-# reportMissingTypeStubs = false
-# reportUnknownMemberType = false
-# reportUnknownArgumentType = false
diff --git a/src/scans/lelscans.py b/src/scans/lelscans.py
index 844a5f8..0db00fd 100644
--- a/src/scans/lelscans.py
+++ b/src/scans/lelscans.py
@@ -1,5 +1,6 @@
 # -*- encoding: utf-8 -*-
 
+import logging
 import tempfile
 import urllib.request
 from dataclasses import dataclass
@@ -8,7 +9,7 @@ from pathlib import Path
 from typing import cast
 
 from bs4 import BeautifulSoup
-from PIL import Image
+from PIL import Image, ImageFile
 
 from .scans import Chapter, Manga, ScanFetcher
 
@@ -21,6 +22,11 @@ _REQUEST_HEADERS = {
     )
 }
 
+# fix for some images
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+
+LOGGER = logging.getLogger(__package__)
+
 
 @dataclass(frozen=True)
 class LelScansManga(Manga):
@@ -72,7 +78,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
         chapter: LelScansChapter,
         folder: Path,
     ) -> list[Path]:
-        print("Retrieving scan {}... ".format(chapter.number))
+        LOGGER.info("Retrieving scan {}... ".format(chapter.number))
         folder.mkdir(exist_ok=True)
 
         # retrieve the main page
@@ -92,7 +98,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
         # Download each page of the scan
         def retrieve_page(page: tuple[int, str]) -> Path | None:
             number, url = page
-            print("  Retrieving page {:02}/{:02d}".format(number, len(pages)))
+            LOGGER.info("  Retrieving page {:02}/{:02d}".format(number, len(pages)))
             soup = BeautifulSoup(
                 self._request(url),
                 "html.parser",
@@ -114,7 +120,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
             try:
                 Image.open(filepath).convert("RGB").save(filepath)
             except (OSError, KeyError):
-                print(
+                LOGGER.warning(
                     "    Failed to convert page {:02}/{:02d}".format(number, len(pages))
                 )
 
@@ -136,6 +142,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
             images = self._fetch_chapter(chapter, folder)
 
         if pdf is not None:
+            LOGGER.info("Merging scan {}... ".format(chapter.number))
             self._merge_scan(pdf, images)
 
 
diff --git a/src/scans/scans.py b/src/scans/scans.py
index 20e250c..1bc6af1 100644
--- a/src/scans/scans.py
+++ b/src/scans/scans.py
@@ -1,5 +1,6 @@
 # -*- encoding: utf-8 -*-
 
+import re
 from abc import abstractmethod
 from dataclasses import dataclass
 from pathlib import Path
@@ -34,6 +35,26 @@ class ScanFetcher(Generic[_MangaT, _ChapterT]):
         """
         pass
 
+    def find_manga(self, name_regex: re.Pattern | str) -> _MangaT | None:
+        """
+        Fetch the list of manga and find the one that match the given regex.
+
+        Args:
+            name_regex: Regex to use to match name.
+
+        Returns:
+            The first manga found, or None if no manga was found.
+        """
+        if isinstance(name_regex, str):
+            name_regex = re.compile(name_regex)
+
+        mangas = self.list_mangas()
+        for manga in mangas:
+            if name_regex.search(manga.name):
+                return manga
+
+        return None
+
     @abstractmethod
     def list_chapters(self, manga: _MangaT) -> list[_ChapterT]:
         """