mirror of
https://github.com/flibusta-apps/books_downloader.git
synced 2025-12-06 06:55:37 +01:00
Init
This commit is contained in:
49
.github/workflows/build_docker_image.yml
vendored
Normal file
49
.github/workflows/build_docker_image.yml
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
name: Build docker image
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- 'main'
|
||||
|
||||
jobs:
|
||||
Build-Docker-Image:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
|
||||
- id: repository_name
|
||||
uses: ASzc/change-string-case-action@v1
|
||||
with:
|
||||
string: ${{ github.repository }}
|
||||
|
||||
-
|
||||
name: Login to ghcr.io
|
||||
uses: docker/login-action@v1
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
-
|
||||
name: Build and push
|
||||
id: docker_build
|
||||
uses: docker/build-push-action@v2
|
||||
env:
|
||||
IMAGE: ${{ steps.repository_name.outputs.lowercase }}
|
||||
with:
|
||||
push: true
|
||||
tags: ghcr.io/${{ env.IMAGE }}:latest
|
||||
context: .
|
||||
file: ./docker/build.dockerfile
|
||||
|
||||
-
|
||||
name: Invoke deployment hook
|
||||
uses: joelwmale/webhook-action@master
|
||||
with:
|
||||
url: ${{ secrets.WEBHOOK_URL }}
|
||||
6
.gitignore
vendored
Normal file
6
.gitignore
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
.mypy_cache
|
||||
.vscode
|
||||
|
||||
__pycache__
|
||||
|
||||
venv
|
||||
32
docker/build.dockerfile
Normal file
32
docker/build.dockerfile
Normal file
@@ -0,0 +1,32 @@
|
||||
FROM python:3.10-slim as build-image
|
||||
|
||||
# RUN apt-get update \
|
||||
# && apt-get install --no-install-recommends -y gcc build-essential python3-dev libpq-dev libffi-dev \
|
||||
# && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /
|
||||
COPY ./requirements.txt ./
|
||||
|
||||
ENV VENV_PATH=/opt/venv
|
||||
RUN python -m venv $VENV_PATH \
|
||||
&& . "${VENV_PATH}/bin/activate" \
|
||||
&& pip install -r requirements.txt --no-cache-dir
|
||||
|
||||
|
||||
FROM python:3.10-slim as runtime-image
|
||||
|
||||
# RUN apt-get update \
|
||||
# && apt-get install --no-install-recommends -y wget python3-dev libpq-dev libffi-dev default-mysql-client-core \
|
||||
# && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY ./src/ /app/
|
||||
|
||||
ENV VENV_PATH=/opt/venv
|
||||
COPY --from=build-image $VENV_PATH $VENV_PATH
|
||||
ENV PATH="$VENV_PATH/bin:$PATH"
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
WORKDIR /app/
|
||||
|
||||
CMD uvicorn main:app --host="0.0.0.0" --port="8080"
|
||||
5
requirements.txt
Normal file
5
requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
fastapi
|
||||
pydantic
|
||||
httpx
|
||||
transliterate
|
||||
uvicorn[standart]
|
||||
9
src/app/depends.py
Normal file
9
src/app/depends.py
Normal file
@@ -0,0 +1,9 @@
|
||||
from fastapi import Security, HTTPException, status
|
||||
|
||||
from core.auth import default_security
|
||||
from core.config import env_config
|
||||
|
||||
|
||||
async def check_token(api_key: str = Security(default_security)):
|
||||
if api_key != env_config.API_KEY:
|
||||
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Wrong api key!")
|
||||
0
src/app/services/__init__.py
Normal file
0
src/app/services/__init__.py
Normal file
7
src/app/services/base.py
Normal file
7
src/app/services/base.py
Normal file
@@ -0,0 +1,7 @@
|
||||
from typing import Protocol
|
||||
|
||||
|
||||
class BaseDownloader(Protocol):
|
||||
@classmethod
|
||||
async def download(cls, book_id: int, file_type: str, source_id: int) -> tuple[bytes, str]:
|
||||
...
|
||||
70
src/app/services/book_library.py
Normal file
70
src/app/services/book_library.py
Normal file
@@ -0,0 +1,70 @@
|
||||
from typing import Generic, TypeVar
|
||||
import json
|
||||
|
||||
import httpx
|
||||
|
||||
from datetime import date
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.config import env_config
|
||||
|
||||
|
||||
T = TypeVar('T')
|
||||
|
||||
|
||||
class Page(BaseModel, Generic[T]):
|
||||
items: list[T]
|
||||
total: int
|
||||
page: int
|
||||
size: int
|
||||
|
||||
|
||||
class Source(BaseModel):
|
||||
id: int
|
||||
name: str
|
||||
|
||||
|
||||
class BookAuthor(BaseModel):
|
||||
id: int
|
||||
first_name: str
|
||||
last_name: str
|
||||
middle_name: str
|
||||
|
||||
|
||||
class Book(BaseModel):
|
||||
id: int
|
||||
title: str
|
||||
lang: str
|
||||
file_type: str
|
||||
uploaded: date
|
||||
authors: list[BookAuthor]
|
||||
|
||||
|
||||
class BookLibraryClient:
|
||||
API_KEY = env_config.BOOK_LIBRARY_API_KEY
|
||||
BASE_URL = env_config.BOOK_LIBRARY_URL
|
||||
|
||||
@classmethod
|
||||
@property
|
||||
def auth_headers(cls):
|
||||
return {'Authorization': cls.API_KEY}
|
||||
|
||||
@classmethod
|
||||
async def _make_request(cls, url) -> dict:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(url, headers=cls.auth_headers)
|
||||
return response.json()
|
||||
|
||||
@classmethod
|
||||
async def get_sources(cls) -> list[Source]:
|
||||
data = await cls._make_request(f"{cls.BASE_URL}/api/v1/sources")
|
||||
|
||||
page = Page[Source].parse_obj(data)
|
||||
|
||||
return [Source.parse_obj(item) for item in page.items]
|
||||
|
||||
@classmethod
|
||||
async def get_remote_book(cls, source_id: int, book_id: int) -> Book:
|
||||
data = await cls._make_request(f"{cls.BASE_URL}/api/v1/books/{source_id}/{book_id}")
|
||||
|
||||
return Book.parse_obj(data)
|
||||
29
src/app/services/dowloaders_manager.py
Normal file
29
src/app/services/dowloaders_manager.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from app.services.base import BaseDownloader
|
||||
from app.services.fl_downloader import FLDownloader
|
||||
|
||||
from app.services.book_library import BookLibraryClient
|
||||
|
||||
|
||||
class DownloadersManager:
|
||||
SOURCES_TABLE: dict[int, str] = {}
|
||||
DOWNLOADERS_TABLE: dict[str, type[BaseDownloader]] = {
|
||||
'flibusta': FLDownloader,
|
||||
}
|
||||
|
||||
PREPARED = False
|
||||
|
||||
@classmethod
|
||||
async def _prepare(cls):
|
||||
sources = await BookLibraryClient.get_sources()
|
||||
|
||||
for source in sources:
|
||||
cls.SOURCES_TABLE[source.id] = source.name
|
||||
|
||||
@classmethod
|
||||
async def get_downloader(cls, source_id: int):
|
||||
if not cls.PREPARED:
|
||||
await cls._prepare()
|
||||
|
||||
name = cls.SOURCES_TABLE[source_id]
|
||||
|
||||
return cls.DOWNLOADERS_TABLE[name]
|
||||
200
src/app/services/fl_downloader.py
Normal file
200
src/app/services/fl_downloader.py
Normal file
@@ -0,0 +1,200 @@
|
||||
from typing import Optional
|
||||
|
||||
import asyncio
|
||||
|
||||
import httpx
|
||||
|
||||
from app.services.base import BaseDownloader
|
||||
from app.services.utils import zip, unzip, get_filename, process_pool_executor
|
||||
from app.services.book_library import BookLibraryClient, Book
|
||||
|
||||
from core.config import env_config, SourceConfig
|
||||
|
||||
|
||||
class NotSuccess(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class ReceivedHTML(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class FLDownloader(BaseDownloader):
|
||||
def __init__(self, book_id: int, file_type: str, source_id: int):
|
||||
self.book_id = book_id
|
||||
self.original_file_type = file_type
|
||||
self.source_id = source_id
|
||||
|
||||
self.book: Optional[Book] = None
|
||||
|
||||
@property
|
||||
def file_type(self):
|
||||
return self.original_file_type.replace("+zip", "")
|
||||
|
||||
@property
|
||||
def need_zip(self):
|
||||
return "+zip" in self.original_file_type
|
||||
|
||||
async def get_filename(self) -> str:
|
||||
if not self.get_book_data_task.done():
|
||||
await asyncio.wait_for(self.get_book_data_task, None)
|
||||
|
||||
if self.book is None:
|
||||
raise ValueError('Book is None!')
|
||||
|
||||
return get_filename(self.book, self.file_type)
|
||||
|
||||
async def get_final_filename(self) -> str:
|
||||
if self.need_zip:
|
||||
return (await self.get_filename()) + '.zip'
|
||||
|
||||
return await self.get_filename()
|
||||
|
||||
async def _download_from_source(self, source_config: SourceConfig, file_type: str = None) -> tuple[bytes, bool]:
|
||||
basic_url: str = source_config.URL
|
||||
proxy: Optional[str] = source_config.PROXY
|
||||
|
||||
file_type_ = file_type or self.file_type
|
||||
|
||||
if self.file_type in ("fb2", "epub", "mobi"):
|
||||
url = basic_url + f"/b/{self.book_id}/{file_type_}"
|
||||
else:
|
||||
url = basic_url + f"/b/{self.book_id}/download"
|
||||
|
||||
httpx_proxy = None
|
||||
if proxy is not None:
|
||||
httpx_proxy = httpx.Proxy(
|
||||
url=proxy
|
||||
)
|
||||
|
||||
async with httpx.AsyncClient(proxies=httpx_proxy) as client:
|
||||
response = await client.get(url, follow_redirects=True)
|
||||
content_type = response.headers.get("Content-Type", timeout=10 * 60)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise NotSuccess(f'Status code is {response.status_code}!')
|
||||
|
||||
if "text/html" in content_type:
|
||||
raise ReceivedHTML()
|
||||
|
||||
if "application/zip" in content_type:
|
||||
return response.content, True
|
||||
|
||||
return response.content, False
|
||||
|
||||
async def _download_with_converting(self) -> tuple[bytes, bool]:
|
||||
tasks = set()
|
||||
|
||||
for source in env_config.FL_SOURCES:
|
||||
tasks.add(
|
||||
asyncio.create_task(
|
||||
self._download_from_source(source, file_type='fb2')
|
||||
)
|
||||
)
|
||||
|
||||
content: Optional[bytes] = None
|
||||
is_zip: Optional[bool] = None
|
||||
|
||||
while tasks:
|
||||
done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
|
||||
|
||||
for task in done:
|
||||
try:
|
||||
content, is_zip = task.result()
|
||||
break
|
||||
except (NotSuccess, ReceivedHTML):
|
||||
continue
|
||||
|
||||
tasks = pending
|
||||
|
||||
if content is None or is_zip is None:
|
||||
raise ValueError
|
||||
|
||||
if is_zip:
|
||||
content = await asyncio.get_event_loop().run_in_executor(
|
||||
process_pool_executor, unzip, content, 'fb2'
|
||||
)
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
form = {'format': self.file_type}
|
||||
files = {'file': content}
|
||||
response = await client.post(env_config.CONVERTER_URL, data=form, files=files, timeout=2 * 60)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise ValueError
|
||||
|
||||
return content, False
|
||||
|
||||
async def _get_book_data(self):
|
||||
self.book = await BookLibraryClient.get_remote_book(
|
||||
self.source_id, self.book_id
|
||||
)
|
||||
|
||||
async def _get_content(self) -> tuple[bytes, str]:
|
||||
tasks = set()
|
||||
|
||||
if self.file_type in ['epub', 'mobi']:
|
||||
tasks.add(
|
||||
asyncio.create_task(
|
||||
self._download_with_converting()
|
||||
)
|
||||
)
|
||||
|
||||
for source in env_config.FL_SOURCES:
|
||||
tasks.add(
|
||||
asyncio.create_task(
|
||||
self._download_from_source(source)
|
||||
)
|
||||
)
|
||||
|
||||
content: Optional[bytes] = None
|
||||
is_zip: Optional[bool] = None
|
||||
|
||||
while tasks:
|
||||
done, pending = await asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED)
|
||||
|
||||
for task in done:
|
||||
try:
|
||||
content, is_zip = task.result()
|
||||
|
||||
for p_task in pending:
|
||||
p_task.cancel()
|
||||
|
||||
break
|
||||
except (NotSuccess, ReceivedHTML, ValueError):
|
||||
continue
|
||||
|
||||
tasks = pending
|
||||
|
||||
|
||||
if content is None or is_zip is None:
|
||||
raise ValueError
|
||||
|
||||
if is_zip:
|
||||
content = await asyncio.get_event_loop().run_in_executor(
|
||||
process_pool_executor, unzip, content, self.file_type
|
||||
)
|
||||
|
||||
if self.need_zip:
|
||||
content = await asyncio.get_event_loop().run_in_executor(
|
||||
process_pool_executor, zip, await self.get_filename(), content
|
||||
)
|
||||
|
||||
return content, await self.get_final_filename()
|
||||
|
||||
async def _download(self):
|
||||
self.get_book_data_task = asyncio.create_task(self._get_book_data())
|
||||
|
||||
tasks = [
|
||||
asyncio.create_task(self._get_content()),
|
||||
self.get_book_data_task,
|
||||
]
|
||||
|
||||
await asyncio.wait(tasks)
|
||||
|
||||
return tasks[0].result()
|
||||
|
||||
@classmethod
|
||||
async def download(cls, book_id: int, file_type: str, source_id: int) -> tuple[bytes, str]:
|
||||
downloader = cls(book_id, file_type, source_id)
|
||||
return await downloader._download()
|
||||
88
src/app/services/utils.py
Normal file
88
src/app/services/utils.py
Normal file
@@ -0,0 +1,88 @@
|
||||
import io
|
||||
import zipfile
|
||||
|
||||
from concurrent.futures.process import ProcessPoolExecutor
|
||||
|
||||
import transliterate
|
||||
|
||||
from app.services.book_library import Book, BookAuthor
|
||||
|
||||
|
||||
process_pool_executor = ProcessPoolExecutor(2)
|
||||
|
||||
|
||||
def unzip(file_bytes: bytes, file_type: str):
|
||||
zip_file = zipfile.ZipFile(io.BytesIO(file_bytes))
|
||||
for name in zip_file.namelist(): # type: str
|
||||
if file_type in name.lower():
|
||||
return zip_file.read(name)
|
||||
raise FileNotFoundError
|
||||
|
||||
|
||||
def zip(filename, content):
|
||||
buffer = io.BytesIO()
|
||||
zip_file = zipfile.ZipFile(
|
||||
file=buffer,
|
||||
mode='w',
|
||||
compression=zipfile.ZIP_DEFLATED,
|
||||
allowZip64=False,
|
||||
compresslevel=9
|
||||
)
|
||||
zip_file.writestr(filename, content)
|
||||
|
||||
for zfile in zip_file.filelist:
|
||||
zfile.create_system = 0
|
||||
|
||||
zip_file.close()
|
||||
|
||||
buffer.seek(0)
|
||||
|
||||
return buffer.read()
|
||||
|
||||
|
||||
def get_short_name(author: BookAuthor) -> str:
|
||||
name_parts = []
|
||||
|
||||
if author.last_name:
|
||||
name_parts.append(author.last_name)
|
||||
|
||||
if author.first_name:
|
||||
name_parts.append(author.first_name[:1])
|
||||
|
||||
if author.middle_name:
|
||||
name_parts.append(author.middle_name[:1])
|
||||
|
||||
return " ".join(name_parts)
|
||||
|
||||
|
||||
def get_filename(book: Book, file_type: str) -> str:
|
||||
filename_parts = []
|
||||
|
||||
if book.authors:
|
||||
filename_parts.append(
|
||||
'_'.join([get_short_name(a) for a in book.authors]) + '_-_'
|
||||
)
|
||||
|
||||
if book.title.startswith(" "):
|
||||
filename_parts.append(
|
||||
book.title[1:]
|
||||
)
|
||||
else:
|
||||
filename_parts.append(
|
||||
book.title
|
||||
)
|
||||
|
||||
filename = "".join(filename_parts)
|
||||
|
||||
if book.lang in ['ru']:
|
||||
filename = transliterate.translit(filename, 'ru', reversed=True)
|
||||
|
||||
for c in "(),….’!\"?»«':":
|
||||
filename = filename.replace(c, '')
|
||||
|
||||
for c, r in (('—', '-'), ('/', '_'), ('№', 'N'), (' ', '_'), ('–', '-'), ('á', 'a'), (' ', '_')):
|
||||
filename = filename.replace(c, r)
|
||||
|
||||
right_part = f'.{book.id}.{file_type}'
|
||||
|
||||
return filename[:64 - len(right_part)] + right_part
|
||||
26
src/app/views.py
Normal file
26
src/app/views.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from fastapi import APIRouter, Depends
|
||||
from fastapi.responses import Response
|
||||
|
||||
from app.services.dowloaders_manager import DownloadersManager
|
||||
|
||||
from app.depends import check_token
|
||||
|
||||
|
||||
router = APIRouter(
|
||||
tags=["downloader"],
|
||||
dependencies=[Depends(check_token)],
|
||||
)
|
||||
|
||||
|
||||
@router.get("/download/{source_id}/{book_id}/{file_type}")
|
||||
async def download(source_id: int, book_id: int, file_type: str):
|
||||
downloader = await DownloadersManager.get_downloader(source_id)
|
||||
|
||||
content, filename = await downloader.download(book_id, file_type, source_id)
|
||||
|
||||
return Response(
|
||||
content,
|
||||
headers={
|
||||
"Conten-Disposition": f"attachment; filename={filename}"
|
||||
}
|
||||
)
|
||||
11
src/core/app.py
Normal file
11
src/core/app.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from fastapi import FastAPI
|
||||
|
||||
from app.views import router
|
||||
|
||||
|
||||
def start_app() -> FastAPI:
|
||||
app = FastAPI()
|
||||
|
||||
app.include_router(router)
|
||||
|
||||
return app
|
||||
4
src/core/auth.py
Normal file
4
src/core/auth.py
Normal file
@@ -0,0 +1,4 @@
|
||||
from fastapi.security import APIKeyHeader
|
||||
|
||||
|
||||
default_security = APIKeyHeader(name="Authorization")
|
||||
22
src/core/config.py
Normal file
22
src/core/config.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseSettings, BaseModel
|
||||
|
||||
|
||||
class SourceConfig(BaseModel):
|
||||
URL: str
|
||||
PROXY: Optional[str]
|
||||
|
||||
|
||||
class EnvConfig(BaseSettings):
|
||||
API_KEY: str
|
||||
|
||||
FL_SOURCES: list[SourceConfig]
|
||||
|
||||
BOOK_LIBRARY_API_KEY: str
|
||||
BOOK_LIBRARY_URL: str
|
||||
|
||||
CONVERTER_URL: str
|
||||
|
||||
|
||||
env_config = EnvConfig()
|
||||
3
src/main.py
Normal file
3
src/main.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from core.app import start_app
|
||||
|
||||
app = start_app()
|
||||
Reference in New Issue
Block a user