Add main code.
This commit is contained in:
parent
1273fefe12
commit
9ed044859b
53
fetch_scans.py
Normal file
53
fetch_scans.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
# -*- encoding: utf-8 -*-
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from scans.lelscans import LelScansFetcher
|
||||||
|
|
||||||
|
# Folder containing the scans
|
||||||
|
SCAN_FOLDER = Path("scans")
|
||||||
|
|
||||||
|
# List of scan numbers to ignore
|
||||||
|
IGNORE_NUMBERS = [] # [str(i) for i in range(1, 910 + 1)]
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
manga = "One Punch Man" # "One Piece"
|
||||||
|
SCAN_FOLDER.joinpath(manga).mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
fetcher = LelScansFetcher()
|
||||||
|
|
||||||
|
one_piece = fetcher.find_manga(manga)
|
||||||
|
assert one_piece is not None
|
||||||
|
|
||||||
|
chapters = fetcher.list_chapters(one_piece)
|
||||||
|
print(
|
||||||
|
"Found {} scans from {} to {}... ".format(
|
||||||
|
len(chapters), chapters[0].number, chapters[-1].number
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# check the scans that need to be downloaded
|
||||||
|
for chapter in chapters:
|
||||||
|
number = chapter.number
|
||||||
|
|
||||||
|
# ignore the number
|
||||||
|
if number in IGNORE_NUMBERS:
|
||||||
|
continue
|
||||||
|
|
||||||
|
folder = SCAN_FOLDER.joinpath(manga, number.replace(".", "_"))
|
||||||
|
pdf = SCAN_FOLDER.joinpath(manga, "ops_{}.pdf".format(number))
|
||||||
|
|
||||||
|
if pdf.exists():
|
||||||
|
continue
|
||||||
|
|
||||||
|
# check if the scan exists
|
||||||
|
if not pdf.exists():
|
||||||
|
fetcher.fetch_chapter(chapter, folder, pdf)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@ -52,9 +52,3 @@ warn_unused_configs = true
|
|||||||
[[tool.mypy.overrides]]
|
[[tool.mypy.overrides]]
|
||||||
module = "img2pdf.*"
|
module = "img2pdf.*"
|
||||||
ignore_missing_imports = true
|
ignore_missing_imports = true
|
||||||
|
|
||||||
[tool.pyright]
|
|
||||||
# reportUnknownVariableType = false
|
|
||||||
# reportMissingTypeStubs = false
|
|
||||||
# reportUnknownMemberType = false
|
|
||||||
# reportUnknownArgumentType = false
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
# -*- encoding: utf-8 -*-
|
# -*- encoding: utf-8 -*-
|
||||||
|
|
||||||
|
import logging
|
||||||
import tempfile
|
import tempfile
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
@ -8,7 +9,7 @@ from pathlib import Path
|
|||||||
from typing import cast
|
from typing import cast
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from PIL import Image
|
from PIL import Image, ImageFile
|
||||||
|
|
||||||
from .scans import Chapter, Manga, ScanFetcher
|
from .scans import Chapter, Manga, ScanFetcher
|
||||||
|
|
||||||
@ -21,6 +22,11 @@ _REQUEST_HEADERS = {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# fix for some images
|
||||||
|
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||||
|
|
||||||
|
LOGGER = logging.getLogger(__package__)
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class LelScansManga(Manga):
|
class LelScansManga(Manga):
|
||||||
@ -72,7 +78,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
|
|||||||
chapter: LelScansChapter,
|
chapter: LelScansChapter,
|
||||||
folder: Path,
|
folder: Path,
|
||||||
) -> list[Path]:
|
) -> list[Path]:
|
||||||
print("Retrieving scan {}... ".format(chapter.number))
|
LOGGER.info("Retrieving scan {}... ".format(chapter.number))
|
||||||
folder.mkdir(exist_ok=True)
|
folder.mkdir(exist_ok=True)
|
||||||
|
|
||||||
# retrieve the main page
|
# retrieve the main page
|
||||||
@ -92,7 +98,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
|
|||||||
# Download each page of the scan
|
# Download each page of the scan
|
||||||
def retrieve_page(page: tuple[int, str]) -> Path | None:
|
def retrieve_page(page: tuple[int, str]) -> Path | None:
|
||||||
number, url = page
|
number, url = page
|
||||||
print(" Retrieving page {:02}/{:02d}".format(number, len(pages)))
|
LOGGER.info(" Retrieving page {:02}/{:02d}".format(number, len(pages)))
|
||||||
soup = BeautifulSoup(
|
soup = BeautifulSoup(
|
||||||
self._request(url),
|
self._request(url),
|
||||||
"html.parser",
|
"html.parser",
|
||||||
@ -114,7 +120,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
|
|||||||
try:
|
try:
|
||||||
Image.open(filepath).convert("RGB").save(filepath)
|
Image.open(filepath).convert("RGB").save(filepath)
|
||||||
except (OSError, KeyError):
|
except (OSError, KeyError):
|
||||||
print(
|
LOGGER.warning(
|
||||||
" Failed to convert page {:02}/{:02d}".format(number, len(pages))
|
" Failed to convert page {:02}/{:02d}".format(number, len(pages))
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -136,6 +142,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
|
|||||||
images = self._fetch_chapter(chapter, folder)
|
images = self._fetch_chapter(chapter, folder)
|
||||||
|
|
||||||
if pdf is not None:
|
if pdf is not None:
|
||||||
|
LOGGER.info("Merging scan {}... ".format(chapter.number))
|
||||||
self._merge_scan(pdf, images)
|
self._merge_scan(pdf, images)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
# -*- encoding: utf-8 -*-
|
# -*- encoding: utf-8 -*-
|
||||||
|
|
||||||
|
import re
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -34,6 +35,26 @@ class ScanFetcher(Generic[_MangaT, _ChapterT]):
|
|||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def find_manga(self, name_regex: re.Pattern | str) -> _MangaT | None:
|
||||||
|
"""
|
||||||
|
Fetch the list of manga and find the one that match the given regex.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
name_regex: Regex to use to match name.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The first manga found, or None if no manga was found.
|
||||||
|
"""
|
||||||
|
if isinstance(name_regex, str):
|
||||||
|
name_regex = re.compile(name_regex)
|
||||||
|
|
||||||
|
mangas = self.list_mangas()
|
||||||
|
for manga in mangas:
|
||||||
|
if name_regex.search(manga.name):
|
||||||
|
return manga
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def list_chapters(self, manga: _MangaT) -> list[_ChapterT]:
|
def list_chapters(self, manga: _MangaT) -> list[_ChapterT]:
|
||||||
"""
|
"""
|
||||||
|
Loading…
Reference in New Issue
Block a user