Add main code.
This commit is contained in:
		
							
								
								
									
										53
									
								
								fetch_scans.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								fetch_scans.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,53 @@
 | 
				
			|||||||
 | 
					# -*- encoding: utf-8 -*-
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import logging
 | 
				
			||||||
 | 
					from pathlib import Path
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from scans.lelscans import LelScansFetcher
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Folder containing the scans
 | 
				
			||||||
 | 
					SCAN_FOLDER = Path("scans")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# List of scan numbers to ignore
 | 
				
			||||||
 | 
					IGNORE_NUMBERS = []  # [str(i) for i in range(1, 910 + 1)]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def main():
 | 
				
			||||||
 | 
					    logging.basicConfig(level=logging.INFO)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    manga = "One Punch Man"  # "One Piece"
 | 
				
			||||||
 | 
					    SCAN_FOLDER.joinpath(manga).mkdir(exist_ok=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fetcher = LelScansFetcher()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    one_piece = fetcher.find_manga(manga)
 | 
				
			||||||
 | 
					    assert one_piece is not None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    chapters = fetcher.list_chapters(one_piece)
 | 
				
			||||||
 | 
					    print(
 | 
				
			||||||
 | 
					        "Found {} scans from {} to {}... ".format(
 | 
				
			||||||
 | 
					            len(chapters), chapters[0].number, chapters[-1].number
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # check the scans that need to be downloaded
 | 
				
			||||||
 | 
					    for chapter in chapters:
 | 
				
			||||||
 | 
					        number = chapter.number
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # ignore the number
 | 
				
			||||||
 | 
					        if number in IGNORE_NUMBERS:
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        folder = SCAN_FOLDER.joinpath(manga, number.replace(".", "_"))
 | 
				
			||||||
 | 
					        pdf = SCAN_FOLDER.joinpath(manga, "ops_{}.pdf".format(number))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if pdf.exists():
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # check if the scan exists
 | 
				
			||||||
 | 
					        if not pdf.exists():
 | 
				
			||||||
 | 
					            fetcher.fetch_chapter(chapter, folder, pdf)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == "__main__":
 | 
				
			||||||
 | 
					    main()
 | 
				
			||||||
@@ -52,9 +52,3 @@ warn_unused_configs = true
 | 
				
			|||||||
[[tool.mypy.overrides]]
 | 
					[[tool.mypy.overrides]]
 | 
				
			||||||
module = "img2pdf.*"
 | 
					module = "img2pdf.*"
 | 
				
			||||||
ignore_missing_imports = true
 | 
					ignore_missing_imports = true
 | 
				
			||||||
 | 
					 | 
				
			||||||
[tool.pyright]
 | 
					 | 
				
			||||||
# reportUnknownVariableType = false
 | 
					 | 
				
			||||||
# reportMissingTypeStubs = false
 | 
					 | 
				
			||||||
# reportUnknownMemberType = false
 | 
					 | 
				
			||||||
# reportUnknownArgumentType = false
 | 
					 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,5 +1,6 @@
 | 
				
			|||||||
# -*- encoding: utf-8 -*-
 | 
					# -*- encoding: utf-8 -*-
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import logging
 | 
				
			||||||
import tempfile
 | 
					import tempfile
 | 
				
			||||||
import urllib.request
 | 
					import urllib.request
 | 
				
			||||||
from dataclasses import dataclass
 | 
					from dataclasses import dataclass
 | 
				
			||||||
@@ -8,7 +9,7 @@ from pathlib import Path
 | 
				
			|||||||
from typing import cast
 | 
					from typing import cast
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from bs4 import BeautifulSoup
 | 
					from bs4 import BeautifulSoup
 | 
				
			||||||
from PIL import Image
 | 
					from PIL import Image, ImageFile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .scans import Chapter, Manga, ScanFetcher
 | 
					from .scans import Chapter, Manga, ScanFetcher
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -21,6 +22,11 @@ _REQUEST_HEADERS = {
 | 
				
			|||||||
    )
 | 
					    )
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# fix for some images
 | 
				
			||||||
 | 
					ImageFile.LOAD_TRUNCATED_IMAGES = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					LOGGER = logging.getLogger(__package__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@dataclass(frozen=True)
 | 
					@dataclass(frozen=True)
 | 
				
			||||||
class LelScansManga(Manga):
 | 
					class LelScansManga(Manga):
 | 
				
			||||||
@@ -72,7 +78,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
 | 
				
			|||||||
        chapter: LelScansChapter,
 | 
					        chapter: LelScansChapter,
 | 
				
			||||||
        folder: Path,
 | 
					        folder: Path,
 | 
				
			||||||
    ) -> list[Path]:
 | 
					    ) -> list[Path]:
 | 
				
			||||||
        print("Retrieving scan {}... ".format(chapter.number))
 | 
					        LOGGER.info("Retrieving scan {}... ".format(chapter.number))
 | 
				
			||||||
        folder.mkdir(exist_ok=True)
 | 
					        folder.mkdir(exist_ok=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # retrieve the main page
 | 
					        # retrieve the main page
 | 
				
			||||||
@@ -92,7 +98,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
 | 
				
			|||||||
        # Download each page of the scan
 | 
					        # Download each page of the scan
 | 
				
			||||||
        def retrieve_page(page: tuple[int, str]) -> Path | None:
 | 
					        def retrieve_page(page: tuple[int, str]) -> Path | None:
 | 
				
			||||||
            number, url = page
 | 
					            number, url = page
 | 
				
			||||||
            print("  Retrieving page {:02}/{:02d}".format(number, len(pages)))
 | 
					            LOGGER.info("  Retrieving page {:02}/{:02d}".format(number, len(pages)))
 | 
				
			||||||
            soup = BeautifulSoup(
 | 
					            soup = BeautifulSoup(
 | 
				
			||||||
                self._request(url),
 | 
					                self._request(url),
 | 
				
			||||||
                "html.parser",
 | 
					                "html.parser",
 | 
				
			||||||
@@ -114,7 +120,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
 | 
				
			|||||||
            try:
 | 
					            try:
 | 
				
			||||||
                Image.open(filepath).convert("RGB").save(filepath)
 | 
					                Image.open(filepath).convert("RGB").save(filepath)
 | 
				
			||||||
            except (OSError, KeyError):
 | 
					            except (OSError, KeyError):
 | 
				
			||||||
                print(
 | 
					                LOGGER.warning(
 | 
				
			||||||
                    "    Failed to convert page {:02}/{:02d}".format(number, len(pages))
 | 
					                    "    Failed to convert page {:02}/{:02d}".format(number, len(pages))
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -136,6 +142,7 @@ class LelScansFetcher(ScanFetcher[LelScansManga, LelScansChapter]):
 | 
				
			|||||||
            images = self._fetch_chapter(chapter, folder)
 | 
					            images = self._fetch_chapter(chapter, folder)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if pdf is not None:
 | 
					        if pdf is not None:
 | 
				
			||||||
 | 
					            LOGGER.info("Merging scan {}... ".format(chapter.number))
 | 
				
			||||||
            self._merge_scan(pdf, images)
 | 
					            self._merge_scan(pdf, images)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,5 +1,6 @@
 | 
				
			|||||||
# -*- encoding: utf-8 -*-
 | 
					# -*- encoding: utf-8 -*-
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
from abc import abstractmethod
 | 
					from abc import abstractmethod
 | 
				
			||||||
from dataclasses import dataclass
 | 
					from dataclasses import dataclass
 | 
				
			||||||
from pathlib import Path
 | 
					from pathlib import Path
 | 
				
			||||||
@@ -34,6 +35,26 @@ class ScanFetcher(Generic[_MangaT, _ChapterT]):
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        pass
 | 
					        pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def find_manga(self, name_regex: re.Pattern | str) -> _MangaT | None:
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Fetch the list of manga and find the one that match the given regex.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        Args:
 | 
				
			||||||
 | 
					            name_regex: Regex to use to match name.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        Returns:
 | 
				
			||||||
 | 
					            The first manga found, or None if no manga was found.
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        if isinstance(name_regex, str):
 | 
				
			||||||
 | 
					            name_regex = re.compile(name_regex)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        mangas = self.list_mangas()
 | 
				
			||||||
 | 
					        for manga in mangas:
 | 
				
			||||||
 | 
					            if name_regex.search(manga.name):
 | 
				
			||||||
 | 
					                return manga
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @abstractmethod
 | 
					    @abstractmethod
 | 
				
			||||||
    def list_chapters(self, manga: _MangaT) -> list[_ChapterT]:
 | 
					    def list_chapters(self, manga: _MangaT) -> list[_ChapterT]:
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user