commit 77342f3ff1ffa930946d6e0487a9277486dc62d4 Author: Kurbanov Bulat Date: Sat Nov 20 17:36:48 2021 +0300 Init diff --git a/.github/workflows/build_docker_image.yml b/.github/workflows/build_docker_image.yml new file mode 100644 index 0000000..41be1da --- /dev/null +++ b/.github/workflows/build_docker_image.yml @@ -0,0 +1,49 @@ +name: Build docker image + +on: + push: + branches: + - 'main' + +jobs: + Build-Docker-Image: + runs-on: ubuntu-latest + steps: + - + name: Checkout + uses: actions/checkout@v2 + + - + name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + + - id: repository_name + uses: ASzc/change-string-case-action@v1 + with: + string: ${{ github.repository }} + + - + name: Login to ghcr.io + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - + name: Build and push + id: docker_build + uses: docker/build-push-action@v2 + env: + IMAGE: ${{ steps.repository_name.outputs.lowercase }} + with: + push: true + tags: ghcr.io/${{ env.IMAGE }}:latest + context: . + file: ./docker/build.dockerfile + + - + name: Invoke deployment hook + uses: joelwmale/webhook-action@master + with: + url: ${{ secrets.WEBHOOK_URL }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2667036 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +.mypy_cache +.vscode + +__pycache__ + +venv diff --git a/docker/build.dockerfile b/docker/build.dockerfile new file mode 100644 index 0000000..2100c50 --- /dev/null +++ b/docker/build.dockerfile @@ -0,0 +1,32 @@ +FROM python:3.10-slim as build-image + +# RUN apt-get update \ +# && apt-get install --no-install-recommends -y gcc build-essential python3-dev libpq-dev libffi-dev \ +# && rm -rf /var/lib/apt/lists/* + +WORKDIR / +COPY ./requirements.txt ./ + +ENV VENV_PATH=/opt/venv +RUN python -m venv $VENV_PATH \ + && . "${VENV_PATH}/bin/activate" \ + && pip install -r requirements.txt --no-cache-dir + + +FROM python:3.10-slim as runtime-image + +# RUN apt-get update \ +# && apt-get install --no-install-recommends -y wget python3-dev libpq-dev libffi-dev default-mysql-client-core \ +# && rm -rf /var/lib/apt/lists/* + +COPY ./src/ /app/ + +ENV VENV_PATH=/opt/venv +COPY --from=build-image $VENV_PATH $VENV_PATH +ENV PATH="$VENV_PATH/bin:$PATH" + +EXPOSE 8080 + +WORKDIR /app/ + +CMD uvicorn main:app --host="0.0.0.0" --port="8080" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a398275 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +fastapi +pydantic +httpx +transliterate +uvicorn[standart] diff --git a/src/app/depends.py b/src/app/depends.py new file mode 100644 index 0000000..b99768e --- /dev/null +++ b/src/app/depends.py @@ -0,0 +1,9 @@ +from fastapi import Security, HTTPException, status + +from core.auth import default_security +from core.config import env_config + + +async def check_token(api_key: str = Security(default_security)): + if api_key != env_config.API_KEY: + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Wrong api key!") diff --git a/src/app/services/__init__.py b/src/app/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/app/services/base.py b/src/app/services/base.py new file mode 100644 index 0000000..8947bac --- /dev/null +++ b/src/app/services/base.py @@ -0,0 +1,7 @@ +from typing import Protocol + + +class BaseDownloader(Protocol): + @classmethod + async def download(cls, book_id: int, file_type: str, source_id: int) -> tuple[bytes, str]: + ... diff --git a/src/app/services/book_library.py b/src/app/services/book_library.py new file mode 100644 index 0000000..62f8a8b --- /dev/null +++ b/src/app/services/book_library.py @@ -0,0 +1,70 @@ +from typing import Generic, TypeVar +import json + +import httpx + +from datetime import date +from pydantic import BaseModel + +from core.config import env_config + + +T = TypeVar('T') + + +class Page(BaseModel, Generic[T]): + items: list[T] + total: int + page: int + size: int + + +class Source(BaseModel): + id: int + name: str + + +class BookAuthor(BaseModel): + id: int + first_name: str + last_name: str + middle_name: str + + +class Book(BaseModel): + id: int + title: str + lang: str + file_type: str + uploaded: date + authors: list[BookAuthor] + + +class BookLibraryClient: + API_KEY = env_config.BOOK_LIBRARY_API_KEY + BASE_URL = env_config.BOOK_LIBRARY_URL + + @classmethod + @property + def auth_headers(cls): + return {'Authorization': cls.API_KEY} + + @classmethod + async def _make_request(cls, url) -> dict: + async with httpx.AsyncClient() as client: + response = await client.get(url, headers=cls.auth_headers) + return response.json() + + @classmethod + async def get_sources(cls) -> list[Source]: + data = await cls._make_request(f"{cls.BASE_URL}/api/v1/sources") + + page = Page[Source].parse_obj(data) + + return [Source.parse_obj(item) for item in page.items] + + @classmethod + async def get_remote_book(cls, source_id: int, book_id: int) -> Book: + data = await cls._make_request(f"{cls.BASE_URL}/api/v1/books/{source_id}/{book_id}") + + return Book.parse_obj(data) diff --git a/src/app/services/dowloaders_manager.py b/src/app/services/dowloaders_manager.py new file mode 100644 index 0000000..232b61d --- /dev/null +++ b/src/app/services/dowloaders_manager.py @@ -0,0 +1,29 @@ +from app.services.base import BaseDownloader +from app.services.fl_downloader import FLDownloader + +from app.services.book_library import BookLibraryClient + + +class DownloadersManager: + SOURCES_TABLE: dict[int, str] = {} + DOWNLOADERS_TABLE: dict[str, type[BaseDownloader]] = { + 'flibusta': FLDownloader, + } + + PREPARED = False + + @classmethod + async def _prepare(cls): + sources = await BookLibraryClient.get_sources() + + for source in sources: + cls.SOURCES_TABLE[source.id] = source.name + + @classmethod + async def get_downloader(cls, source_id: int): + if not cls.PREPARED: + await cls._prepare() + + name = cls.SOURCES_TABLE[source_id] + + return cls.DOWNLOADERS_TABLE[name] diff --git a/src/app/services/fl_downloader.py b/src/app/services/fl_downloader.py new file mode 100644 index 0000000..0c92459 --- /dev/null +++ b/src/app/services/fl_downloader.py @@ -0,0 +1,200 @@ +from typing import Optional + +import asyncio + +import httpx + +from app.services.base import BaseDownloader +from app.services.utils import zip, unzip, get_filename, process_pool_executor +from app.services.book_library import BookLibraryClient, Book + +from core.config import env_config, SourceConfig + + +class NotSuccess(Exception): + pass + + +class ReceivedHTML(Exception): + pass + + +class FLDownloader(BaseDownloader): + def __init__(self, book_id: int, file_type: str, source_id: int): + self.book_id = book_id + self.original_file_type = file_type + self.source_id = source_id + + self.book: Optional[Book] = None + + @property + def file_type(self): + return self.original_file_type.replace("+zip", "") + + @property + def need_zip(self): + return "+zip" in self.original_file_type + + async def get_filename(self) -> str: + if not self.get_book_data_task.done(): + await asyncio.wait_for(self.get_book_data_task, None) + + if self.book is None: + raise ValueError('Book is None!') + + return get_filename(self.book, self.file_type) + + async def get_final_filename(self) -> str: + if self.need_zip: + return (await self.get_filename()) + '.zip' + + return await self.get_filename() + + async def _download_from_source(self, source_config: SourceConfig, file_type: str = None) -> tuple[bytes, bool]: + basic_url: str = source_config.URL + proxy: Optional[str] = source_config.PROXY + + file_type_ = file_type or self.file_type + + if self.file_type in ("fb2", "epub", "mobi"): + url = basic_url + f"/b/{self.book_id}/{file_type_}" + else: + url = basic_url + f"/b/{self.book_id}/download" + + httpx_proxy = None + if proxy is not None: + httpx_proxy = httpx.Proxy( + url=proxy + ) + + async with httpx.AsyncClient(proxies=httpx_proxy) as client: + response = await client.get(url, follow_redirects=True) + content_type = response.headers.get("Content-Type", timeout=10 * 60) + + if response.status_code != 200: + raise NotSuccess(f'Status code is {response.status_code}!') + + if "text/html" in content_type: + raise ReceivedHTML() + + if "application/zip" in content_type: + return response.content, True + + return response.content, False + + async def _download_with_converting(self) -> tuple[bytes, bool]: + tasks = set() + + for source in env_config.FL_SOURCES: + tasks.add( + asyncio.create_task( + self._download_from_source(source, file_type='fb2') + ) + ) + + content: Optional[bytes] = None + is_zip: Optional[bool] = None + + while tasks: + done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED) + + for task in done: + try: + content, is_zip = task.result() + break + except (NotSuccess, ReceivedHTML): + continue + + tasks = pending + + if content is None or is_zip is None: + raise ValueError + + if is_zip: + content = await asyncio.get_event_loop().run_in_executor( + process_pool_executor, unzip, content, 'fb2' + ) + + async with httpx.AsyncClient() as client: + form = {'format': self.file_type} + files = {'file': content} + response = await client.post(env_config.CONVERTER_URL, data=form, files=files, timeout=2 * 60) + + if response.status_code != 200: + raise ValueError + + return content, False + + async def _get_book_data(self): + self.book = await BookLibraryClient.get_remote_book( + self.source_id, self.book_id + ) + + async def _get_content(self) -> tuple[bytes, str]: + tasks = set() + + if self.file_type in ['epub', 'mobi']: + tasks.add( + asyncio.create_task( + self._download_with_converting() + ) + ) + + for source in env_config.FL_SOURCES: + tasks.add( + asyncio.create_task( + self._download_from_source(source) + ) + ) + + content: Optional[bytes] = None + is_zip: Optional[bool] = None + + while tasks: + done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED) + + for task in done: + try: + content, is_zip = task.result() + + for p_task in pending: + p_task.cancel() + + break + except (NotSuccess, ReceivedHTML, ValueError): + continue + + tasks = pending + + + if content is None or is_zip is None: + raise ValueError + + if is_zip: + content = await asyncio.get_event_loop().run_in_executor( + process_pool_executor, unzip, content, self.file_type + ) + + if self.need_zip: + content = await asyncio.get_event_loop().run_in_executor( + process_pool_executor, zip, await self.get_filename(), content + ) + + return content, await self.get_final_filename() + + async def _download(self): + self.get_book_data_task = asyncio.create_task(self._get_book_data()) + + tasks = [ + asyncio.create_task(self._get_content()), + self.get_book_data_task, + ] + + await asyncio.wait(tasks) + + return tasks[0].result() + + @classmethod + async def download(cls, book_id: int, file_type: str, source_id: int) -> tuple[bytes, str]: + downloader = cls(book_id, file_type, source_id) + return await downloader._download() diff --git a/src/app/services/utils.py b/src/app/services/utils.py new file mode 100644 index 0000000..9929e7a --- /dev/null +++ b/src/app/services/utils.py @@ -0,0 +1,88 @@ +import io +import zipfile + +from concurrent.futures.process import ProcessPoolExecutor + +import transliterate + +from app.services.book_library import Book, BookAuthor + + +process_pool_executor = ProcessPoolExecutor(2) + + +def unzip(file_bytes: bytes, file_type: str): + zip_file = zipfile.ZipFile(io.BytesIO(file_bytes)) + for name in zip_file.namelist(): # type: str + if file_type in name.lower(): + return zip_file.read(name) + raise FileNotFoundError + + +def zip(filename, content): + buffer = io.BytesIO() + zip_file = zipfile.ZipFile( + file=buffer, + mode='w', + compression=zipfile.ZIP_DEFLATED, + allowZip64=False, + compresslevel=9 + ) + zip_file.writestr(filename, content) + + for zfile in zip_file.filelist: + zfile.create_system = 0 + + zip_file.close() + + buffer.seek(0) + + return buffer.read() + + +def get_short_name(author: BookAuthor) -> str: + name_parts = [] + + if author.last_name: + name_parts.append(author.last_name) + + if author.first_name: + name_parts.append(author.first_name[:1]) + + if author.middle_name: + name_parts.append(author.middle_name[:1]) + + return " ".join(name_parts) + + +def get_filename(book: Book, file_type: str) -> str: + filename_parts = [] + + if book.authors: + filename_parts.append( + '_'.join([get_short_name(a) for a in book.authors]) + '_-_' + ) + + if book.title.startswith(" "): + filename_parts.append( + book.title[1:] + ) + else: + filename_parts.append( + book.title + ) + + filename = "".join(filename_parts) + + if book.lang in ['ru']: + filename = transliterate.translit(filename, 'ru', reversed=True) + + for c in "(),….’!\"?»«':": + filename = filename.replace(c, '') + + for c, r in (('—', '-'), ('/', '_'), ('№', 'N'), (' ', '_'), ('–', '-'), ('á', 'a'), (' ', '_')): + filename = filename.replace(c, r) + + right_part = f'.{book.id}.{file_type}' + + return filename[:64 - len(right_part)] + right_part diff --git a/src/app/views.py b/src/app/views.py new file mode 100644 index 0000000..0f84a1e --- /dev/null +++ b/src/app/views.py @@ -0,0 +1,26 @@ +from fastapi import APIRouter, Depends +from fastapi.responses import Response + +from app.services.dowloaders_manager import DownloadersManager + +from app.depends import check_token + + +router = APIRouter( + tags=["downloader"], + dependencies=[Depends(check_token)], +) + + +@router.get("/download/{source_id}/{book_id}/{file_type}") +async def download(source_id: int, book_id: int, file_type: str): + downloader = await DownloadersManager.get_downloader(source_id) + + content, filename = await downloader.download(book_id, file_type, source_id) + + return Response( + content, + headers={ + "Conten-Disposition": f"attachment; filename={filename}" + } + ) diff --git a/src/core/app.py b/src/core/app.py new file mode 100644 index 0000000..4f37812 --- /dev/null +++ b/src/core/app.py @@ -0,0 +1,11 @@ +from fastapi import FastAPI + +from app.views import router + + +def start_app() -> FastAPI: + app = FastAPI() + + app.include_router(router) + + return app diff --git a/src/core/auth.py b/src/core/auth.py new file mode 100644 index 0000000..7cc07b5 --- /dev/null +++ b/src/core/auth.py @@ -0,0 +1,4 @@ +from fastapi.security import APIKeyHeader + + +default_security = APIKeyHeader(name="Authorization") diff --git a/src/core/config.py b/src/core/config.py new file mode 100644 index 0000000..c01452d --- /dev/null +++ b/src/core/config.py @@ -0,0 +1,22 @@ +from typing import Optional + +from pydantic import BaseSettings, BaseModel + + +class SourceConfig(BaseModel): + URL: str + PROXY: Optional[str] + + +class EnvConfig(BaseSettings): + API_KEY: str + + FL_SOURCES: list[SourceConfig] + + BOOK_LIBRARY_API_KEY: str + BOOK_LIBRARY_URL: str + + CONVERTER_URL: str + + +env_config = EnvConfig() diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..2739482 --- /dev/null +++ b/src/main.py @@ -0,0 +1,3 @@ +from core.app import start_app + +app = start_app()