Initial commit.
This commit is contained in:
commit
1273fefe12
8
.gitignore
vendored
Normal file
8
.gitignore
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
# python
|
||||
*.egg-info
|
||||
__pycache__
|
||||
venv
|
||||
build
|
||||
|
||||
# others
|
||||
/scans
|
60
pyproject.toml
Normal file
60
pyproject.toml
Normal file
@ -0,0 +1,60 @@
|
||||
[build-system]
|
||||
requires = ["setuptools", "setuptools-scm"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "manga-scan-fetcher"
|
||||
version = "0.0.1"
|
||||
authors = [
|
||||
{ name = "Mikaël Capelle", email = "capelle.mikael@gmail.com" },
|
||||
]
|
||||
description = ""
|
||||
requires-python = ">=3.10"
|
||||
license = { text = "MIT" }
|
||||
classifiers = [
|
||||
"Programming Language :: Python",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"License :: MIT",
|
||||
]
|
||||
dependencies = [
|
||||
"img2pdf",
|
||||
"Pillow",
|
||||
"bs4"
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
dev = [
|
||||
"black",
|
||||
"flake8",
|
||||
"flake8-black",
|
||||
"flake8-pyproject",
|
||||
"mypy",
|
||||
"pytest",
|
||||
"isort",
|
||||
"types-beautifulsoup4",
|
||||
"types-Pillow"
|
||||
]
|
||||
|
||||
[tool.flake8]
|
||||
max-line-length = 88
|
||||
# See https://github.com/PyCQA/pycodestyle/issues/373
|
||||
extend-ignore = ['E203', 'E231']
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
multi_line_output = 3
|
||||
|
||||
[tool.mypy]
|
||||
warn_return_any = true
|
||||
warn_unused_configs = true
|
||||
|
||||
[[tool.mypy.overrides]]
|
||||
module = "img2pdf.*"
|
||||
ignore_missing_imports = true
|
||||
|
||||
[tool.pyright]
|
||||
# reportUnknownVariableType = false
|
||||
# reportMissingTypeStubs = false
|
||||
# reportUnknownMemberType = false
|
||||
# reportUnknownArgumentType = false
|
0
src/scans/__init__.py
Normal file
0
src/scans/__init__.py
Normal file
144
src/scans/lelscans.py
Normal file
144
src/scans/lelscans.py
Normal file
@ -0,0 +1,144 @@
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
import tempfile
|
||||
import urllib.request
|
||||
from dataclasses import dataclass
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from pathlib import Path
|
||||
from typing import cast
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from PIL import Image
|
||||
|
||||
from .scans import Chapter, Manga, ScanFetcher
|
||||
|
||||
_BASE_URL = "https://lelscan.net"
|
||||
|
||||
_REQUEST_HEADERS = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 "
|
||||
"(KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LelScansManga(Manga):
|
||||
url: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LelScansChapter(Chapter):
|
||||
url: str
|
||||
|
||||
|
||||
class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
|
||||
def _request(self, url: str) -> bytes | str:
|
||||
request = urllib.request.Request(url=url, headers=_REQUEST_HEADERS)
|
||||
|
||||
return cast(bytes | str, urllib.request.urlopen(request).read())
|
||||
|
||||
def list_mangas(self) -> list[LelScansManga]:
|
||||
soup = BeautifulSoup(self._request(_BASE_URL), "html.parser")
|
||||
assert soup.body is not None
|
||||
|
||||
# find the select
|
||||
select = soup.body.select("#header-image > h2 > form > select")[1]
|
||||
return sorted(
|
||||
(
|
||||
LelScansManga(name=option.text, url=option.attrs["value"])
|
||||
for option in select.select("option")
|
||||
),
|
||||
key=lambda m: m.name,
|
||||
)
|
||||
|
||||
def list_chapters(self, manga: LelScansManga) -> list[LelScansChapter]:
|
||||
soup = BeautifulSoup(self._request(manga.url), "html.parser")
|
||||
assert soup.body is not None
|
||||
|
||||
# find the select
|
||||
select = soup.body.select("#header-image > h2 > form > select")[0]
|
||||
|
||||
return sorted(
|
||||
(
|
||||
LelScansChapter(manga, option.text, url=option.attrs["value"])
|
||||
for option in select.select("option")
|
||||
),
|
||||
key=lambda c: float(c.number),
|
||||
)
|
||||
|
||||
def _fetch_chapter(
|
||||
self,
|
||||
chapter: LelScansChapter,
|
||||
folder: Path,
|
||||
) -> list[Path]:
|
||||
print("Retrieving scan {}... ".format(chapter.number))
|
||||
folder.mkdir(exist_ok=True)
|
||||
|
||||
# retrieve the main page
|
||||
soup = BeautifulSoup(self._request(chapter.url), "html.parser")
|
||||
|
||||
# retrieve the pages
|
||||
anchors = soup.select("#navigation a")
|
||||
pages: list[tuple[int, str]] = []
|
||||
for anchor in anchors:
|
||||
try:
|
||||
# skip non-page chapter
|
||||
pages.append((int(anchor.text), anchor.attrs["href"]))
|
||||
except ValueError:
|
||||
pass
|
||||
pages = sorted(pages)
|
||||
|
||||
# Download each page of the scan
|
||||
def retrieve_page(page: tuple[int, str]) -> Path | None:
|
||||
number, url = page
|
||||
print(" Retrieving page {:02}/{:02d}".format(number, len(pages)))
|
||||
soup = BeautifulSoup(
|
||||
self._request(url),
|
||||
"html.parser",
|
||||
)
|
||||
|
||||
url = (
|
||||
_BASE_URL
|
||||
+ soup.select("#image > table > tr > td > a > img")[0]
|
||||
.attrs["src"]
|
||||
.strip()
|
||||
)
|
||||
data = self._request(url)
|
||||
|
||||
filepath = folder.joinpath(url.split("/")[-1].split("?")[0])
|
||||
with open(filepath, "wb") as fp:
|
||||
fp.write(data) # type: ignore
|
||||
|
||||
# Remove alpha channel, if any:
|
||||
try:
|
||||
Image.open(filepath).convert("RGB").save(filepath)
|
||||
except (OSError, KeyError):
|
||||
print(
|
||||
" Failed to convert page {:02}/{:02d}".format(number, len(pages))
|
||||
)
|
||||
|
||||
return filepath
|
||||
|
||||
with ThreadPool() as pool:
|
||||
return [image for image in pool.map(retrieve_page, pages) if image]
|
||||
|
||||
def fetch_chapter(
|
||||
self,
|
||||
chapter: LelScansChapter,
|
||||
folder: Path | None = None,
|
||||
pdf: Path | None = None,
|
||||
):
|
||||
if folder is None:
|
||||
with tempfile.TemporaryDirectory() as t:
|
||||
images = self._fetch_chapter(chapter, Path(t))
|
||||
else:
|
||||
images = self._fetch_chapter(chapter, folder)
|
||||
|
||||
if pdf is not None:
|
||||
self._merge_scan(pdf, images)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
for manga in LelScansFetcher().list_mangas():
|
||||
print(manga)
|
74
src/scans/scans.py
Normal file
74
src/scans/scans.py
Normal file
@ -0,0 +1,74 @@
|
||||
# -*- encoding: utf-8 -*-
|
||||
|
||||
from abc import abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Generic, TypeVar
|
||||
|
||||
import img2pdf
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Manga:
|
||||
name: str
|
||||
""" Name of the manga. """
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Chapter:
|
||||
manga: Manga
|
||||
number: str
|
||||
|
||||
|
||||
_MangaT = TypeVar("_MangaT", bound=Manga)
|
||||
_ChapterT = TypeVar("_ChapterT", bound=Chapter)
|
||||
|
||||
|
||||
class ScanFetcher(Generic[_MangaT, _ChapterT]):
|
||||
@abstractmethod
|
||||
def list_mangas(self) -> list[_MangaT]:
|
||||
"""
|
||||
Retrieve the list of mangas available from this fetcher.
|
||||
The exact type of the items in the returned list is not defined
|
||||
but the type must inherit from the Manga class.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def list_chapters(self, manga: _MangaT) -> list[_ChapterT]:
|
||||
"""
|
||||
Return the list of chapters available for the given manga.
|
||||
|
||||
Args:
|
||||
manga: An object of type Manga corresponding to the manga
|
||||
for which chapters should be retrieved.
|
||||
|
||||
Returns:
|
||||
A list of manga chapter.
|
||||
"""
|
||||
pass
|
||||
|
||||
def _merge_scan(self, pdf: Path, images: list[Path]):
|
||||
"""
|
||||
Create a PDF using the given images.
|
||||
"""
|
||||
try:
|
||||
with open(pdf, "wb") as fp:
|
||||
data = img2pdf.convert([image.as_posix() for image in images])
|
||||
assert data is not None
|
||||
fp.write(data)
|
||||
except Exception as e:
|
||||
pdf.unlink()
|
||||
raise e
|
||||
|
||||
@abstractmethod
|
||||
def fetch_chapter(
|
||||
self,
|
||||
chapter: _ChapterT,
|
||||
folder: Path | None = None,
|
||||
pdf: Path | None = None,
|
||||
):
|
||||
"""
|
||||
Retrieve the given chapter and store it in the specified folder.
|
||||
"""
|
||||
pass
|
Loading…
Reference in New Issue
Block a user