Initial commit.
This commit is contained in:
		
							
								
								
									
										0
									
								
								src/scans/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								src/scans/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										144
									
								
								src/scans/lelscans.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										144
									
								
								src/scans/lelscans.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,144 @@
 | 
			
		||||
# -*- encoding: utf-8 -*-
 | 
			
		||||
 | 
			
		||||
import tempfile
 | 
			
		||||
import urllib.request
 | 
			
		||||
from dataclasses import dataclass
 | 
			
		||||
from multiprocessing.pool import ThreadPool
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from typing import cast
 | 
			
		||||
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
from PIL import Image
 | 
			
		||||
 | 
			
		||||
from .scans import Chapter, Manga, ScanFetcher
 | 
			
		||||
 | 
			
		||||
_BASE_URL = "https://lelscan.net"
 | 
			
		||||
 | 
			
		||||
_REQUEST_HEADERS = {
 | 
			
		||||
    "User-Agent": (
 | 
			
		||||
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 "
 | 
			
		||||
        "(KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
 | 
			
		||||
    )
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@dataclass(frozen=True)
 | 
			
		||||
class LelScansManga(Manga):
 | 
			
		||||
    url: str
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@dataclass(frozen=True)
 | 
			
		||||
class LelScansChapter(Chapter):
 | 
			
		||||
    url: str
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
 | 
			
		||||
    def _request(self, url: str) -> bytes | str:
 | 
			
		||||
        request = urllib.request.Request(url=url, headers=_REQUEST_HEADERS)
 | 
			
		||||
 | 
			
		||||
        return cast(bytes | str, urllib.request.urlopen(request).read())
 | 
			
		||||
 | 
			
		||||
    def list_mangas(self) -> list[LelScansManga]:
 | 
			
		||||
        soup = BeautifulSoup(self._request(_BASE_URL), "html.parser")
 | 
			
		||||
        assert soup.body is not None
 | 
			
		||||
 | 
			
		||||
        # find the select
 | 
			
		||||
        select = soup.body.select("#header-image > h2 > form > select")[1]
 | 
			
		||||
        return sorted(
 | 
			
		||||
            (
 | 
			
		||||
                LelScansManga(name=option.text, url=option.attrs["value"])
 | 
			
		||||
                for option in select.select("option")
 | 
			
		||||
            ),
 | 
			
		||||
            key=lambda m: m.name,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def list_chapters(self, manga: LelScansManga) -> list[LelScansChapter]:
 | 
			
		||||
        soup = BeautifulSoup(self._request(manga.url), "html.parser")
 | 
			
		||||
        assert soup.body is not None
 | 
			
		||||
 | 
			
		||||
        # find the select
 | 
			
		||||
        select = soup.body.select("#header-image > h2 > form > select")[0]
 | 
			
		||||
 | 
			
		||||
        return sorted(
 | 
			
		||||
            (
 | 
			
		||||
                LelScansChapter(manga, option.text, url=option.attrs["value"])
 | 
			
		||||
                for option in select.select("option")
 | 
			
		||||
            ),
 | 
			
		||||
            key=lambda c: float(c.number),
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def _fetch_chapter(
 | 
			
		||||
        self,
 | 
			
		||||
        chapter: LelScansChapter,
 | 
			
		||||
        folder: Path,
 | 
			
		||||
    ) -> list[Path]:
 | 
			
		||||
        print("Retrieving scan {}... ".format(chapter.number))
 | 
			
		||||
        folder.mkdir(exist_ok=True)
 | 
			
		||||
 | 
			
		||||
        # retrieve the main page
 | 
			
		||||
        soup = BeautifulSoup(self._request(chapter.url), "html.parser")
 | 
			
		||||
 | 
			
		||||
        # retrieve the pages
 | 
			
		||||
        anchors = soup.select("#navigation a")
 | 
			
		||||
        pages: list[tuple[int, str]] = []
 | 
			
		||||
        for anchor in anchors:
 | 
			
		||||
            try:
 | 
			
		||||
                # skip non-page chapter
 | 
			
		||||
                pages.append((int(anchor.text), anchor.attrs["href"]))
 | 
			
		||||
            except ValueError:
 | 
			
		||||
                pass
 | 
			
		||||
        pages = sorted(pages)
 | 
			
		||||
 | 
			
		||||
        # Download each page of the scan
 | 
			
		||||
        def retrieve_page(page: tuple[int, str]) -> Path | None:
 | 
			
		||||
            number, url = page
 | 
			
		||||
            print("  Retrieving page {:02}/{:02d}".format(number, len(pages)))
 | 
			
		||||
            soup = BeautifulSoup(
 | 
			
		||||
                self._request(url),
 | 
			
		||||
                "html.parser",
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            url = (
 | 
			
		||||
                _BASE_URL
 | 
			
		||||
                + soup.select("#image > table > tr > td > a > img")[0]
 | 
			
		||||
                .attrs["src"]
 | 
			
		||||
                .strip()
 | 
			
		||||
            )
 | 
			
		||||
            data = self._request(url)
 | 
			
		||||
 | 
			
		||||
            filepath = folder.joinpath(url.split("/")[-1].split("?")[0])
 | 
			
		||||
            with open(filepath, "wb") as fp:
 | 
			
		||||
                fp.write(data)  # type: ignore
 | 
			
		||||
 | 
			
		||||
            # Remove alpha channel, if any:
 | 
			
		||||
            try:
 | 
			
		||||
                Image.open(filepath).convert("RGB").save(filepath)
 | 
			
		||||
            except (OSError, KeyError):
 | 
			
		||||
                print(
 | 
			
		||||
                    "    Failed to convert page {:02}/{:02d}".format(number, len(pages))
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
            return filepath
 | 
			
		||||
 | 
			
		||||
        with ThreadPool() as pool:
 | 
			
		||||
            return [image for image in pool.map(retrieve_page, pages) if image]
 | 
			
		||||
 | 
			
		||||
    def fetch_chapter(
 | 
			
		||||
        self,
 | 
			
		||||
        chapter: LelScansChapter,
 | 
			
		||||
        folder: Path | None = None,
 | 
			
		||||
        pdf: Path | None = None,
 | 
			
		||||
    ):
 | 
			
		||||
        if folder is None:
 | 
			
		||||
            with tempfile.TemporaryDirectory() as t:
 | 
			
		||||
                images = self._fetch_chapter(chapter, Path(t))
 | 
			
		||||
        else:
 | 
			
		||||
            images = self._fetch_chapter(chapter, folder)
 | 
			
		||||
 | 
			
		||||
        if pdf is not None:
 | 
			
		||||
            self._merge_scan(pdf, images)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    for manga in LelScansFetcher().list_mangas():
 | 
			
		||||
        print(manga)
 | 
			
		||||
							
								
								
									
										74
									
								
								src/scans/scans.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								src/scans/scans.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,74 @@
 | 
			
		||||
# -*- encoding: utf-8 -*-
 | 
			
		||||
 | 
			
		||||
from abc import abstractmethod
 | 
			
		||||
from dataclasses import dataclass
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from typing import Generic, TypeVar
 | 
			
		||||
 | 
			
		||||
import img2pdf
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@dataclass(frozen=True)
 | 
			
		||||
class Manga:
 | 
			
		||||
    name: str
 | 
			
		||||
    """ Name of the manga. """
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@dataclass(frozen=True)
 | 
			
		||||
class Chapter:
 | 
			
		||||
    manga: Manga
 | 
			
		||||
    number: str
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
_MangaT = TypeVar("_MangaT", bound=Manga)
 | 
			
		||||
_ChapterT = TypeVar("_ChapterT", bound=Chapter)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ScanFetcher(Generic[_MangaT, _ChapterT]):
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def list_mangas(self) -> list[_MangaT]:
 | 
			
		||||
        """
 | 
			
		||||
        Retrieve the list of mangas available from this fetcher.
 | 
			
		||||
        The exact type of the items in the returned list is not defined
 | 
			
		||||
        but the type must inherit from the Manga class.
 | 
			
		||||
        """
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def list_chapters(self, manga: _MangaT) -> list[_ChapterT]:
 | 
			
		||||
        """
 | 
			
		||||
        Return the list of chapters available for the given manga.
 | 
			
		||||
 | 
			
		||||
        Args:
 | 
			
		||||
            manga: An object of type Manga corresponding to the manga
 | 
			
		||||
              for which chapters should be retrieved.
 | 
			
		||||
 | 
			
		||||
        Returns:
 | 
			
		||||
            A list of manga chapter.
 | 
			
		||||
        """
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
    def _merge_scan(self, pdf: Path, images: list[Path]):
 | 
			
		||||
        """
 | 
			
		||||
        Create a PDF using the given images.
 | 
			
		||||
        """
 | 
			
		||||
        try:
 | 
			
		||||
            with open(pdf, "wb") as fp:
 | 
			
		||||
                data = img2pdf.convert([image.as_posix() for image in images])
 | 
			
		||||
                assert data is not None
 | 
			
		||||
                fp.write(data)
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            pdf.unlink()
 | 
			
		||||
            raise e
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def fetch_chapter(
 | 
			
		||||
        self,
 | 
			
		||||
        chapter: _ChapterT,
 | 
			
		||||
        folder: Path | None = None,
 | 
			
		||||
        pdf: Path | None = None,
 | 
			
		||||
    ):
 | 
			
		||||
        """
 | 
			
		||||
        Retrieve the given chapter and store it in the specified folder.
 | 
			
		||||
        """
 | 
			
		||||
        pass
 | 
			
		||||
		Reference in New Issue
	
	Block a user