From b3162c801b04e536961bfc9e5e5f1cd72c485e0e Mon Sep 17 00:00:00 2001 From: Bulat Kurbanov Date: Tue, 16 Aug 2022 17:21:54 +0300 Subject: [PATCH] Fix saving data to temp file --- poetry.lock | 96 ++++++++----------------------- pyproject.toml | 5 +- src/app/services/fl_downloader.py | 36 ++++++++---- src/app/services/utils.py | 27 ++++++++- 4 files changed, 80 insertions(+), 84 deletions(-) diff --git a/poetry.lock b/poetry.lock index 3a1f021..b5711fe 100644 --- a/poetry.lock +++ b/poetry.lock @@ -36,11 +36,11 @@ aiofiles = ">=0.6.0" [[package]] name = "certifi" -version = "2021.10.8" +version = "2022.6.15" description = "Python package for providing Mozilla's CA Bundle." category = "main" optional = false -python-versions = "*" +python-versions = ">=3.6" [[package]] name = "click" @@ -55,7 +55,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} [[package]] name = "colorama" -version = "0.4.4" +version = "0.4.5" description = "Cross-platform colored terminal text." category = "main" optional = false @@ -63,7 +63,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" [[package]] name = "fastapi" -version = "0.78.0" +version = "0.79.0" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" category = "main" optional = false @@ -172,8 +172,8 @@ prometheus-client = ">=0.8.0,<1.0.0" [[package]] name = "pydantic" -version = "1.9.0" -description = "Data validation and settings management using python 3.6 type hinting" +version = "1.9.2" +description = "Data validation and settings management using python type hints" category = "main" optional = false python-versions = ">=3.6.1" @@ -201,7 +201,7 @@ idna2008 = ["idna"] [[package]] name = "sentry-sdk" -version = "1.5.12" +version = "1.9.5" description = "Python client for Sentry (https://sentry.io)" category = "main" optional = false @@ -209,7 +209,10 @@ python-versions = "*" [package.dependencies] certifi = "*" -urllib3 = ">=1.10.0" +urllib3 = [ + {version = ">=1.26.9", markers = "python_version >= \"3.5\""}, + {version = ">=1.26.11", markers = "python_version >= \"3.6\""}, +] [package.extras] aiohttp = ["aiohttp (>=3.5)"] @@ -219,6 +222,7 @@ celery = ["celery (>=3)"] chalice = ["chalice (>=1.16.0)"] django = ["django (>=1.8)"] falcon = ["falcon (>=1.4)"] +fastapi = ["fastapi (>=0.79.0)"] flask = ["flask (>=0.11)", "blinker (>=1.1)"] httpx = ["httpx (>=0.16.0)"] pure_eval = ["pure-eval", "executing", "asttokens"] @@ -227,6 +231,7 @@ quart = ["quart (>=0.16.1)", "blinker (>=1.1)"] rq = ["rq (>=0.6)"] sanic = ["sanic (>=0.8)"] sqlalchemy = ["sqlalchemy (>=1.2)"] +starlette = ["starlette (>=0.19.1)"] tornado = ["tornado (>=5)"] [[package]] @@ -273,7 +278,7 @@ six = ">=1.1.0" [[package]] name = "typing-extensions" -version = "4.2.0" +version = "4.3.0" description = "Backported and Experimental Type Hints for Python 3.7+" category = "main" optional = false @@ -281,11 +286,11 @@ python-versions = ">=3.7" [[package]] name = "urllib3" -version = "1.26.9" +version = "1.26.11" description = "HTTP library with thread-safe connection pooling, file post, and more." category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4" +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, <4" [package.extras] brotli = ["brotlicffi (>=0.8.0)", "brotli (>=1.0.9)", "brotlipy (>=0.6.0)"] @@ -323,7 +328,7 @@ test = ["aiohttp", "flake8 (>=3.9.2,<3.10.0)", "psutil", "pycodestyle (>=2.7.0,< [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "6af500f649fbb560037d859cc8b1e1edf880491689da9367b5bd9e18e5ad9c71" +content-hash = "87f726798e2e7e491011214329318fdebbd822aa693d9e06112aee67f387ee2d" [metadata.files] aiofiles = [ @@ -338,21 +343,15 @@ asynctempfile = [ {file = "asynctempfile-0.5.0-py3-none-any.whl", hash = "sha256:cec59bdb71c850e3de9bb4415f88998165c364709696240eea9ec5204a7439af"}, {file = "asynctempfile-0.5.0.tar.gz", hash = "sha256:4a647c747357e8827397baadbdfe87f3095d30923fa789e797111eb02160884a"}, ] -certifi = [ - {file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"}, - {file = "certifi-2021.10.8.tar.gz", hash = "sha256:78884e7c1d4b00ce3cea67b44566851c4343c120abd683433ce934a68ea58872"}, -] +certifi = [] click = [ {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, ] -colorama = [ - {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, - {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, -] +colorama = [] fastapi = [ - {file = "fastapi-0.78.0-py3-none-any.whl", hash = "sha256:15fcabd5c78c266fa7ae7d8de9b384bfc2375ee0503463a6febbe3bab69d6f65"}, - {file = "fastapi-0.78.0.tar.gz", hash = "sha256:3233d4a789ba018578658e2af1a4bb5e38bdd122ff722b313666a9b2c6786a83"}, + {file = "fastapi-0.79.0-py3-none-any.whl", hash = "sha256:d337563424ceada23857f73d5abe8dae0c28e4cccb53b2af06e78b7bb4a1c7d7"}, + {file = "fastapi-0.79.0.tar.gz", hash = "sha256:cf0ff6db25b91d321050c4112baab0908c90f19b40bf257f9591d2f9780d1f22"}, ] gunicorn = [ {file = "gunicorn-20.1.0-py3-none-any.whl", hash = "sha256:9dcc4547dbb1cb284accfb15ab5667a0e5d1881cc443e0677b4882a4067a807e"}, @@ -379,51 +378,12 @@ prometheus-client = [ {file = "prometheus_client-0.14.1.tar.gz", hash = "sha256:5459c427624961076277fdc6dc50540e2bacb98eebde99886e59ec55ed92093a"}, ] prometheus-fastapi-instrumentator = [] -pydantic = [ - {file = "pydantic-1.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cb23bcc093697cdea2708baae4f9ba0e972960a835af22560f6ae4e7e47d33f5"}, - {file = "pydantic-1.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1d5278bd9f0eee04a44c712982343103bba63507480bfd2fc2790fa70cd64cf4"}, - {file = "pydantic-1.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab624700dc145aa809e6f3ec93fb8e7d0f99d9023b713f6a953637429b437d37"}, - {file = "pydantic-1.9.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c8d7da6f1c1049eefb718d43d99ad73100c958a5367d30b9321b092771e96c25"}, - {file = "pydantic-1.9.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:3c3b035103bd4e2e4a28da9da7ef2fa47b00ee4a9cf4f1a735214c1bcd05e0f6"}, - {file = "pydantic-1.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3011b975c973819883842c5ab925a4e4298dffccf7782c55ec3580ed17dc464c"}, - {file = "pydantic-1.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:086254884d10d3ba16da0588604ffdc5aab3f7f09557b998373e885c690dd398"}, - {file = "pydantic-1.9.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:0fe476769acaa7fcddd17cadd172b156b53546ec3614a4d880e5d29ea5fbce65"}, - {file = "pydantic-1.9.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8e9dcf1ac499679aceedac7e7ca6d8641f0193c591a2d090282aaf8e9445a46"}, - {file = "pydantic-1.9.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1e4c28f30e767fd07f2ddc6f74f41f034d1dd6bc526cd59e63a82fe8bb9ef4c"}, - {file = "pydantic-1.9.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:c86229333cabaaa8c51cf971496f10318c4734cf7b641f08af0a6fbf17ca3054"}, - {file = "pydantic-1.9.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:c0727bda6e38144d464daec31dff936a82917f431d9c39c39c60a26567eae3ed"}, - {file = "pydantic-1.9.0-cp36-cp36m-win_amd64.whl", hash = "sha256:dee5ef83a76ac31ab0c78c10bd7d5437bfdb6358c95b91f1ba7ff7b76f9996a1"}, - {file = "pydantic-1.9.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:d9c9bdb3af48e242838f9f6e6127de9be7063aad17b32215ccc36a09c5cf1070"}, - {file = "pydantic-1.9.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ee7e3209db1e468341ef41fe263eb655f67f5c5a76c924044314e139a1103a2"}, - {file = "pydantic-1.9.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b6037175234850ffd094ca77bf60fb54b08b5b22bc85865331dd3bda7a02fa1"}, - {file = "pydantic-1.9.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b2571db88c636d862b35090ccf92bf24004393f85c8870a37f42d9f23d13e032"}, - {file = "pydantic-1.9.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8b5ac0f1c83d31b324e57a273da59197c83d1bb18171e512908fe5dc7278a1d6"}, - {file = "pydantic-1.9.0-cp37-cp37m-win_amd64.whl", hash = "sha256:bbbc94d0c94dd80b3340fc4f04fd4d701f4b038ebad72c39693c794fd3bc2d9d"}, - {file = "pydantic-1.9.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e0896200b6a40197405af18828da49f067c2fa1f821491bc8f5bde241ef3f7d7"}, - {file = "pydantic-1.9.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7bdfdadb5994b44bd5579cfa7c9b0e1b0e540c952d56f627eb227851cda9db77"}, - {file = "pydantic-1.9.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:574936363cd4b9eed8acdd6b80d0143162f2eb654d96cb3a8ee91d3e64bf4cf9"}, - {file = "pydantic-1.9.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c556695b699f648c58373b542534308922c46a1cda06ea47bc9ca45ef5b39ae6"}, - {file = "pydantic-1.9.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:f947352c3434e8b937e3aa8f96f47bdfe6d92779e44bb3f41e4c213ba6a32145"}, - {file = "pydantic-1.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5e48ef4a8b8c066c4a31409d91d7ca372a774d0212da2787c0d32f8045b1e034"}, - {file = "pydantic-1.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:96f240bce182ca7fe045c76bcebfa0b0534a1bf402ed05914a6f1dadff91877f"}, - {file = "pydantic-1.9.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:815ddebb2792efd4bba5488bc8fde09c29e8ca3227d27cf1c6990fc830fd292b"}, - {file = "pydantic-1.9.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6c5b77947b9e85a54848343928b597b4f74fc364b70926b3c4441ff52620640c"}, - {file = "pydantic-1.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c68c3bc88dbda2a6805e9a142ce84782d3930f8fdd9655430d8576315ad97ce"}, - {file = "pydantic-1.9.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5a79330f8571faf71bf93667d3ee054609816f10a259a109a0738dac983b23c3"}, - {file = "pydantic-1.9.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f5a64b64ddf4c99fe201ac2724daada8595ada0d102ab96d019c1555c2d6441d"}, - {file = "pydantic-1.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a733965f1a2b4090a5238d40d983dcd78f3ecea221c7af1497b845a9709c1721"}, - {file = "pydantic-1.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:2cc6a4cb8a118ffec2ca5fcb47afbacb4f16d0ab8b7350ddea5e8ef7bcc53a16"}, - {file = "pydantic-1.9.0-py3-none-any.whl", hash = "sha256:085ca1de245782e9b46cefcf99deecc67d418737a1fd3f6a4f511344b613a5b3"}, - {file = "pydantic-1.9.0.tar.gz", hash = "sha256:742645059757a56ecd886faf4ed2441b9c0cd406079c2b4bee51bcc3fbcd510a"}, -] +pydantic = [] rfc3986 = [ {file = "rfc3986-1.5.0-py2.py3-none-any.whl", hash = "sha256:a86d6e1f5b1dc238b218b012df0aa79409667bb209e58da56d0b94704e712a97"}, {file = "rfc3986-1.5.0.tar.gz", hash = "sha256:270aaf10d87d0d4e095063c65bf3ddbc6ee3d0b226328ce21e036f946e421835"}, ] -sentry-sdk = [ - {file = "sentry-sdk-1.5.12.tar.gz", hash = "sha256:259535ba66933eacf85ab46524188c84dcb4c39f40348455ce15e2c0aca68863"}, - {file = "sentry_sdk-1.5.12-py2.py3-none-any.whl", hash = "sha256:778b53f0a6c83b1ee43d3b7886318ba86d975e686cb2c7906ccc35b334360be1"}, -] +sentry-sdk = [] six = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, @@ -440,14 +400,8 @@ transliterate = [ {file = "transliterate-1.10.2-py2.py3-none-any.whl", hash = "sha256:010a5021bf6021689c4fade0985f3f7b3db1f2f16a48a09a56797f171c08ed42"}, {file = "transliterate-1.10.2.tar.gz", hash = "sha256:bc608e0d48e687db9c2b1d7ea7c381afe0d1849cad216087d8e03d8d06a57c85"}, ] -typing-extensions = [ - {file = "typing_extensions-4.2.0-py3-none-any.whl", hash = "sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708"}, - {file = "typing_extensions-4.2.0.tar.gz", hash = "sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376"}, -] -urllib3 = [ - {file = "urllib3-1.26.9-py2.py3-none-any.whl", hash = "sha256:44ece4d53fb1706f667c9bd1c648f5469a2ec925fcf3a776667042d645472c14"}, - {file = "urllib3-1.26.9.tar.gz", hash = "sha256:aabaf16477806a5e1dd19aa41f8c2b7950dd3c746362d7e3223dbe6de6ac448e"}, -] +typing-extensions = [] +urllib3 = [] uvicorn = [] uvloop = [ {file = "uvloop-0.16.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6224f1401025b748ffecb7a6e2652b17768f30b1a6a3f7b44660e5b5b690b12d"}, diff --git a/pyproject.toml b/pyproject.toml index 233240d..28ed092 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ authors = ["Kurbanov Bulat "] [tool.poetry.dependencies] python = "^3.9" -fastapi = ">=0.78.0" +fastapi = ">=0.79.0" httpx = ">=0.23.0" transliterate = "^1.10.2" uvicorn = {version = ">=0.18.2", extras = ["standart"]} @@ -36,7 +36,8 @@ exclude = ''' [tool.flake8] ignore = [ # Whitespace before ':' ( https://www.flake8rules.com/rules/E203.html ) - "E203" + "E203", + "W503" ] max-line-length=88 max-complexity = 15 diff --git a/src/app/services/fl_downloader.py b/src/app/services/fl_downloader.py index e26ed79..80aa24d 100644 --- a/src/app/services/fl_downloader.py +++ b/src/app/services/fl_downloader.py @@ -10,7 +10,13 @@ import httpx from app.services.base import BaseDownloader from app.services.book_library import BookLibraryClient from app.services.exceptions import NotSuccess, ReceivedHTML, ConvertationError -from app.services.utils import zip, unzip, get_filename, process_pool_executor +from app.services.utils import ( + zip, + unzip, + get_filename, + process_pool_executor, + async_retry, +) from core.config import env_config, SourceConfig @@ -49,6 +55,7 @@ class FLDownloader(BaseDownloader): return await self.get_filename() + @async_retry(httpx.ReadTimeout, times=5, delay=10) async def _download_from_source( self, source_config: SourceConfig, file_type: Optional[str] = None ) -> tuple[httpx.AsyncClient, httpx.Response, bool]: @@ -62,7 +69,10 @@ class FLDownloader(BaseDownloader): else: url = basic_url + f"/b/{self.book_id}/download" - client_kwargs = {"timeout": 10 * 60, "follow_redirects": True} + client_kwargs = { + "timeout": httpx.Timeout(10 * 60, connect=15, read=60), + "follow_redirects": True, + } if proxy is not None: client = httpx.AsyncClient(proxies=httpx.Proxy(url=proxy), **client_kwargs) @@ -84,8 +94,13 @@ class FLDownloader(BaseDownloader): raise NotSuccess(f"Status code is {response.status_code}!") content_type = response.headers.get("Content-Type") + content_disposition = response.headers.get("Content-Disposition", "") - if "text/html" in content_type: + if ( + "text/html" in content_type + and self.file_type.lower() != "html" + and "html" not in content_disposition.lower() + ): raise ReceivedHTML() return client, response, "application/zip" in content_type @@ -159,15 +174,18 @@ class FLDownloader(BaseDownloader): await temp_file.flush() await temp_file.seek(0) - async def _unzip(self, response: httpx.Response) -> Optional[str]: + async def _unzip(self, response: httpx.Response, file_type: str) -> Optional[str]: async with asynctempfile.NamedTemporaryFile(delete=True) as temp_file: - await self._write_response_content_to_ntf(temp_file, response) + try: + await self._write_response_content_to_ntf(temp_file, response) + except httpx.HTTPError: + return None await temp_file.flush() try: return await asyncio.get_event_loop().run_in_executor( - process_pool_executor, unzip, temp_file.name, "fb2" + process_pool_executor, unzip, temp_file.name, file_type ) except (FileNotFoundError, zipfile.BadZipFile): return None @@ -191,7 +209,7 @@ class FLDownloader(BaseDownloader): try: if is_zip: - filename_to_convert = await self._unzip(response) + filename_to_convert = await self._unzip(response, "fb2") else: async with asynctempfile.NamedTemporaryFile(delete=False) as temp_file: await self._write_response_content_to_ntf(temp_file, response) @@ -225,8 +243,6 @@ class FLDownloader(BaseDownloader): if response.status_code != 200: raise ConvertationError - print(response.status_code, filename_to_convert) - return converter_client, converter_response, False except (asyncio.CancelledError, ConvertationError): await converter_response.aclose() @@ -252,7 +268,7 @@ class FLDownloader(BaseDownloader): try: if is_zip and self.file_type.lower() not in self.EXCLUDE_UNZIP: - temp_filename = await self._unzip(response) + temp_filename = await self._unzip(response, self.file_type) else: async with asynctempfile.NamedTemporaryFile(delete=False) as temp_file: temp_filename = temp_file.name diff --git a/src/app/services/utils.py b/src/app/services/utils.py index 87708ce..bd34fcf 100644 --- a/src/app/services/utils.py +++ b/src/app/services/utils.py @@ -1,3 +1,4 @@ +import asyncio from concurrent.futures.process import ProcessPoolExecutor import os import re @@ -27,7 +28,7 @@ def unzip(temp_zipfile: str, file_type: str) -> Optional[str]: result = tempfile.NamedTemporaryFile(delete=False) - for name in zip_file.namelist(): # type: str + for name in zip_file.namelist(): if file_type.lower() in name.lower() or name.lower() == "elector": with zip_file.open(name, "r") as internal_file: while chunk := internal_file.read(2048): @@ -127,3 +128,27 @@ def get_filename(book_id: int, book: Book, file_type: str) -> str: right_part = f".{book_id}.{file_type_}" return filename[: 64 - len(right_part) - 1] + right_part + + +def async_retry(*exceptions: type[Exception], times: int = 1, delay: float = 1.0): + """ + :param times: retry count + :param delay: delay time + :param default_content: set default content + :return + """ + + def func_wrapper(f): + async def wrapper(*args, **kwargs): + for retry in range(times): + try: + return await f(*args, **kwargs) + except exceptions as e: + if retry + 1 == times: + raise e + + await asyncio.sleep(delay) + + return wrapper + + return func_wrapper