Add main code.

This commit is contained in:
Mikaël Capelle 2023-07-19 19:11:42 +02:00
parent 1273fefe12
commit 9ed044859b
4 changed files with 85 additions and 10 deletions

53
fetch_scans.py Normal file
View File

@ -0,0 +1,53 @@
# -*- encoding: utf-8 -*-
import logging
from pathlib import Path
from scans.lelscans import LelScansFetcher
# Folder containing the scans
SCAN_FOLDER = Path("scans")
# List of scan numbers to ignore
IGNORE_NUMBERS = [] # [str(i) for i in range(1, 910 + 1)]
def main():
logging.basicConfig(level=logging.INFO)
manga = "One Punch Man" # "One Piece"
SCAN_FOLDER.joinpath(manga).mkdir(exist_ok=True)
fetcher = LelScansFetcher()
one_piece = fetcher.find_manga(manga)
assert one_piece is not None
chapters = fetcher.list_chapters(one_piece)
print(
"Found {} scans from {} to {}... ".format(
len(chapters), chapters[0].number, chapters[-1].number
)
)
# check the scans that need to be downloaded
for chapter in chapters:
number = chapter.number
# ignore the number
if number in IGNORE_NUMBERS:
continue
folder = SCAN_FOLDER.joinpath(manga, number.replace(".", "_"))
pdf = SCAN_FOLDER.joinpath(manga, "ops_{}.pdf".format(number))
if pdf.exists():
continue
# check if the scan exists
if not pdf.exists():
fetcher.fetch_chapter(chapter, folder, pdf)
if __name__ == "__main__":
main()

View File

@ -52,9 +52,3 @@ warn_unused_configs = true
[[tool.mypy.overrides]] [[tool.mypy.overrides]]
module = "img2pdf.*" module = "img2pdf.*"
ignore_missing_imports = true ignore_missing_imports = true
[tool.pyright]
# reportUnknownVariableType = false
# reportMissingTypeStubs = false
# reportUnknownMemberType = false
# reportUnknownArgumentType = false

View File

@ -1,5 +1,6 @@
# -*- encoding: utf-8 -*- # -*- encoding: utf-8 -*-
import logging
import tempfile import tempfile
import urllib.request import urllib.request
from dataclasses import dataclass from dataclasses import dataclass
@ -8,7 +9,7 @@ from pathlib import Path
from typing import cast from typing import cast
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from PIL import Image from PIL import Image, ImageFile
from .scans import Chapter, Manga, ScanFetcher from .scans import Chapter, Manga, ScanFetcher
@ -21,6 +22,11 @@ _REQUEST_HEADERS = {
) )
} }
# fix for some images
ImageFile.LOAD_TRUNCATED_IMAGES = True
LOGGER = logging.getLogger(__package__)
@dataclass(frozen=True) @dataclass(frozen=True)
class LelScansManga(Manga): class LelScansManga(Manga):
@ -72,7 +78,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
chapter: LelScansChapter, chapter: LelScansChapter,
folder: Path, folder: Path,
) -> list[Path]: ) -> list[Path]:
print("Retrieving scan {}... ".format(chapter.number)) LOGGER.info("Retrieving scan {}... ".format(chapter.number))
folder.mkdir(exist_ok=True) folder.mkdir(exist_ok=True)
# retrieve the main page # retrieve the main page
@ -92,7 +98,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
# Download each page of the scan # Download each page of the scan
def retrieve_page(page: tuple[int, str]) -> Path | None: def retrieve_page(page: tuple[int, str]) -> Path | None:
number, url = page number, url = page
print(" Retrieving page {:02}/{:02d}".format(number, len(pages))) LOGGER.info(" Retrieving page {:02}/{:02d}".format(number, len(pages)))
soup = BeautifulSoup( soup = BeautifulSoup(
self._request(url), self._request(url),
"html.parser", "html.parser",
@ -114,7 +120,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
try: try:
Image.open(filepath).convert("RGB").save(filepath) Image.open(filepath).convert("RGB").save(filepath)
except (OSError, KeyError): except (OSError, KeyError):
print( LOGGER.warning(
" Failed to convert page {:02}/{:02d}".format(number, len(pages)) " Failed to convert page {:02}/{:02d}".format(number, len(pages))
) )
@ -136,6 +142,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
images = self._fetch_chapter(chapter, folder) images = self._fetch_chapter(chapter, folder)
if pdf is not None: if pdf is not None:
LOGGER.info("Merging scan {}... ".format(chapter.number))
self._merge_scan(pdf, images) self._merge_scan(pdf, images)

View File

@ -1,5 +1,6 @@
# -*- encoding: utf-8 -*- # -*- encoding: utf-8 -*-
import re
from abc import abstractmethod from abc import abstractmethod
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
@ -34,6 +35,26 @@ class ScanFetcher(Generic[_MangaT, _ChapterT]):
""" """
pass pass
def find_manga(self, name_regex: re.Pattern | str) -> _MangaT | None:
"""
Fetch the list of manga and find the one that match the given regex.
Args:
name_regex: Regex to use to match name.
Returns:
The first manga found, or None if no manga was found.
"""
if isinstance(name_regex, str):
name_regex = re.compile(name_regex)
mangas = self.list_mangas()
for manga in mangas:
if name_regex.search(manga.name):
return manga
return None
@abstractmethod @abstractmethod
def list_chapters(self, manga: _MangaT) -> list[_ChapterT]: def list_chapters(self, manga: _MangaT) -> list[_ChapterT]:
""" """