Initial commit.
This commit is contained in:
commit
1273fefe12
8
.gitignore
vendored
Normal file
8
.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# python
|
||||||
|
*.egg-info
|
||||||
|
__pycache__
|
||||||
|
venv
|
||||||
|
build
|
||||||
|
|
||||||
|
# others
|
||||||
|
/scans
|
60
pyproject.toml
Normal file
60
pyproject.toml
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
[build-system]
|
||||||
|
requires = ["setuptools", "setuptools-scm"]
|
||||||
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "manga-scan-fetcher"
|
||||||
|
version = "0.0.1"
|
||||||
|
authors = [
|
||||||
|
{ name = "Mikaël Capelle", email = "capelle.mikael@gmail.com" },
|
||||||
|
]
|
||||||
|
description = ""
|
||||||
|
requires-python = ">=3.10"
|
||||||
|
license = { text = "MIT" }
|
||||||
|
classifiers = [
|
||||||
|
"Programming Language :: Python",
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"Programming Language :: Python :: 3.10",
|
||||||
|
"License :: MIT",
|
||||||
|
]
|
||||||
|
dependencies = [
|
||||||
|
"img2pdf",
|
||||||
|
"Pillow",
|
||||||
|
"bs4"
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.optional-dependencies]
|
||||||
|
dev = [
|
||||||
|
"black",
|
||||||
|
"flake8",
|
||||||
|
"flake8-black",
|
||||||
|
"flake8-pyproject",
|
||||||
|
"mypy",
|
||||||
|
"pytest",
|
||||||
|
"isort",
|
||||||
|
"types-beautifulsoup4",
|
||||||
|
"types-Pillow"
|
||||||
|
]
|
||||||
|
|
||||||
|
[tool.flake8]
|
||||||
|
max-line-length = 88
|
||||||
|
# See https://github.com/PyCQA/pycodestyle/issues/373
|
||||||
|
extend-ignore = ['E203', 'E231']
|
||||||
|
|
||||||
|
[tool.isort]
|
||||||
|
profile = "black"
|
||||||
|
multi_line_output = 3
|
||||||
|
|
||||||
|
[tool.mypy]
|
||||||
|
warn_return_any = true
|
||||||
|
warn_unused_configs = true
|
||||||
|
|
||||||
|
[[tool.mypy.overrides]]
|
||||||
|
module = "img2pdf.*"
|
||||||
|
ignore_missing_imports = true
|
||||||
|
|
||||||
|
[tool.pyright]
|
||||||
|
# reportUnknownVariableType = false
|
||||||
|
# reportMissingTypeStubs = false
|
||||||
|
# reportUnknownMemberType = false
|
||||||
|
# reportUnknownArgumentType = false
|
0
src/scans/__init__.py
Normal file
0
src/scans/__init__.py
Normal file
144
src/scans/lelscans.py
Normal file
144
src/scans/lelscans.py
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
# -*- encoding: utf-8 -*-
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
import urllib.request
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from multiprocessing.pool import ThreadPool
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import cast
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
|
from .scans import Chapter, Manga, ScanFetcher
|
||||||
|
|
||||||
|
_BASE_URL = "https://lelscan.net"
|
||||||
|
|
||||||
|
_REQUEST_HEADERS = {
|
||||||
|
"User-Agent": (
|
||||||
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 "
|
||||||
|
"(KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class LelScansManga(Manga):
|
||||||
|
url: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class LelScansChapter(Chapter):
|
||||||
|
url: str
|
||||||
|
|
||||||
|
|
||||||
|
class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
|
||||||
|
def _request(self, url: str) -> bytes | str:
|
||||||
|
request = urllib.request.Request(url=url, headers=_REQUEST_HEADERS)
|
||||||
|
|
||||||
|
return cast(bytes | str, urllib.request.urlopen(request).read())
|
||||||
|
|
||||||
|
def list_mangas(self) -> list[LelScansManga]:
|
||||||
|
soup = BeautifulSoup(self._request(_BASE_URL), "html.parser")
|
||||||
|
assert soup.body is not None
|
||||||
|
|
||||||
|
# find the select
|
||||||
|
select = soup.body.select("#header-image > h2 > form > select")[1]
|
||||||
|
return sorted(
|
||||||
|
(
|
||||||
|
LelScansManga(name=option.text, url=option.attrs["value"])
|
||||||
|
for option in select.select("option")
|
||||||
|
),
|
||||||
|
key=lambda m: m.name,
|
||||||
|
)
|
||||||
|
|
||||||
|
def list_chapters(self, manga: LelScansManga) -> list[LelScansChapter]:
|
||||||
|
soup = BeautifulSoup(self._request(manga.url), "html.parser")
|
||||||
|
assert soup.body is not None
|
||||||
|
|
||||||
|
# find the select
|
||||||
|
select = soup.body.select("#header-image > h2 > form > select")[0]
|
||||||
|
|
||||||
|
return sorted(
|
||||||
|
(
|
||||||
|
LelScansChapter(manga, option.text, url=option.attrs["value"])
|
||||||
|
for option in select.select("option")
|
||||||
|
),
|
||||||
|
key=lambda c: float(c.number),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _fetch_chapter(
|
||||||
|
self,
|
||||||
|
chapter: LelScansChapter,
|
||||||
|
folder: Path,
|
||||||
|
) -> list[Path]:
|
||||||
|
print("Retrieving scan {}... ".format(chapter.number))
|
||||||
|
folder.mkdir(exist_ok=True)
|
||||||
|
|
||||||
|
# retrieve the main page
|
||||||
|
soup = BeautifulSoup(self._request(chapter.url), "html.parser")
|
||||||
|
|
||||||
|
# retrieve the pages
|
||||||
|
anchors = soup.select("#navigation a")
|
||||||
|
pages: list[tuple[int, str]] = []
|
||||||
|
for anchor in anchors:
|
||||||
|
try:
|
||||||
|
# skip non-page chapter
|
||||||
|
pages.append((int(anchor.text), anchor.attrs["href"]))
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
pages = sorted(pages)
|
||||||
|
|
||||||
|
# Download each page of the scan
|
||||||
|
def retrieve_page(page: tuple[int, str]) -> Path | None:
|
||||||
|
number, url = page
|
||||||
|
print(" Retrieving page {:02}/{:02d}".format(number, len(pages)))
|
||||||
|
soup = BeautifulSoup(
|
||||||
|
self._request(url),
|
||||||
|
"html.parser",
|
||||||
|
)
|
||||||
|
|
||||||
|
url = (
|
||||||
|
_BASE_URL
|
||||||
|
+ soup.select("#image > table > tr > td > a > img")[0]
|
||||||
|
.attrs["src"]
|
||||||
|
.strip()
|
||||||
|
)
|
||||||
|
data = self._request(url)
|
||||||
|
|
||||||
|
filepath = folder.joinpath(url.split("/")[-1].split("?")[0])
|
||||||
|
with open(filepath, "wb") as fp:
|
||||||
|
fp.write(data) # type: ignore
|
||||||
|
|
||||||
|
# Remove alpha channel, if any:
|
||||||
|
try:
|
||||||
|
Image.open(filepath).convert("RGB").save(filepath)
|
||||||
|
except (OSError, KeyError):
|
||||||
|
print(
|
||||||
|
" Failed to convert page {:02}/{:02d}".format(number, len(pages))
|
||||||
|
)
|
||||||
|
|
||||||
|
return filepath
|
||||||
|
|
||||||
|
with ThreadPool() as pool:
|
||||||
|
return [image for image in pool.map(retrieve_page, pages) if image]
|
||||||
|
|
||||||
|
def fetch_chapter(
|
||||||
|
self,
|
||||||
|
chapter: LelScansChapter,
|
||||||
|
folder: Path | None = None,
|
||||||
|
pdf: Path | None = None,
|
||||||
|
):
|
||||||
|
if folder is None:
|
||||||
|
with tempfile.TemporaryDirectory() as t:
|
||||||
|
images = self._fetch_chapter(chapter, Path(t))
|
||||||
|
else:
|
||||||
|
images = self._fetch_chapter(chapter, folder)
|
||||||
|
|
||||||
|
if pdf is not None:
|
||||||
|
self._merge_scan(pdf, images)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
for manga in LelScansFetcher().list_mangas():
|
||||||
|
print(manga)
|
74
src/scans/scans.py
Normal file
74
src/scans/scans.py
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
# -*- encoding: utf-8 -*-
|
||||||
|
|
||||||
|
from abc import abstractmethod
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Generic, TypeVar
|
||||||
|
|
||||||
|
import img2pdf
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Manga:
|
||||||
|
name: str
|
||||||
|
""" Name of the manga. """
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Chapter:
|
||||||
|
manga: Manga
|
||||||
|
number: str
|
||||||
|
|
||||||
|
|
||||||
|
_MangaT = TypeVar("_MangaT", bound=Manga)
|
||||||
|
_ChapterT = TypeVar("_ChapterT", bound=Chapter)
|
||||||
|
|
||||||
|
|
||||||
|
class ScanFetcher(Generic[_MangaT, _ChapterT]):
|
||||||
|
@abstractmethod
|
||||||
|
def list_mangas(self) -> list[_MangaT]:
|
||||||
|
"""
|
||||||
|
Retrieve the list of mangas available from this fetcher.
|
||||||
|
The exact type of the items in the returned list is not defined
|
||||||
|
but the type must inherit from the Manga class.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def list_chapters(self, manga: _MangaT) -> list[_ChapterT]:
|
||||||
|
"""
|
||||||
|
Return the list of chapters available for the given manga.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
manga: An object of type Manga corresponding to the manga
|
||||||
|
for which chapters should be retrieved.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of manga chapter.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def _merge_scan(self, pdf: Path, images: list[Path]):
|
||||||
|
"""
|
||||||
|
Create a PDF using the given images.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open(pdf, "wb") as fp:
|
||||||
|
data = img2pdf.convert([image.as_posix() for image in images])
|
||||||
|
assert data is not None
|
||||||
|
fp.write(data)
|
||||||
|
except Exception as e:
|
||||||
|
pdf.unlink()
|
||||||
|
raise e
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def fetch_chapter(
|
||||||
|
self,
|
||||||
|
chapter: _ChapterT,
|
||||||
|
folder: Path | None = None,
|
||||||
|
pdf: Path | None = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Retrieve the given chapter and store it in the specified folder.
|
||||||
|
"""
|
||||||
|
pass
|
Loading…
Reference in New Issue
Block a user