mirror of
https://github.com/flibusta-apps/book_library_server.git
synced 2025-12-06 15:15:36 +01:00
New TGRM search implementation
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
from datetime import date
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
@@ -44,6 +45,9 @@ class AuthorBook(BaseModel):
|
|||||||
title: str
|
title: str
|
||||||
lang: str
|
lang: str
|
||||||
file_type: str
|
file_type: str
|
||||||
|
available_types: list[str]
|
||||||
|
uploaded: date
|
||||||
|
annotation_exists: bool
|
||||||
|
|
||||||
class Config(ORJSONConfig):
|
class Config(ORJSONConfig):
|
||||||
pass
|
pass
|
||||||
|
|||||||
@@ -3,11 +3,40 @@ from app.models import Author
|
|||||||
from app.services.common import TRGMSearchService
|
from app.services.common import TRGMSearchService
|
||||||
|
|
||||||
|
|
||||||
|
GET_OBJECTS_IDS_QUERY = """
|
||||||
|
SELECT ARRAY(
|
||||||
|
WITH filtered_authors AS (
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
GREATEST(
|
||||||
|
similarity((last_name || ' ' || first_name || ' ' || middle_name), :query),
|
||||||
|
similarity((last_name || ' ' || first_name), :query),
|
||||||
|
similarity((last_name), :query)
|
||||||
|
) as sml,
|
||||||
|
(
|
||||||
|
SELECT count(*) FROM book_authors
|
||||||
|
LEFT JOIN books ON books.id = book
|
||||||
|
WHERE author = authors.id AND books.is_deleted = 'f'
|
||||||
|
) as books_count
|
||||||
|
FROM authors
|
||||||
|
WHERE (
|
||||||
|
(last_name || ' ' || first_name || ' ' || middle_name) % :query OR
|
||||||
|
(last_name || ' ' || first_name) % :query OR
|
||||||
|
(last_name) % :query
|
||||||
|
) AND
|
||||||
|
EXISTS (
|
||||||
|
SELECT * FROM book_authors
|
||||||
|
LEFT JOIN books ON books.id = book
|
||||||
|
WHERE author = authors.id AND books.is_deleted = 'f'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
SELECT fauthors.id FROM filtered_authors as fauthors
|
||||||
|
ORDER BY fauthors.sml DESC, fauthors.books_count DESC
|
||||||
|
);
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
class AuthorTGRMSearchService(TRGMSearchService):
|
class AuthorTGRMSearchService(TRGMSearchService):
|
||||||
MODEL_CLASS = Author
|
MODEL_CLASS = Author
|
||||||
FIELDS = [
|
|
||||||
Author.Meta.table.c.last_name,
|
|
||||||
Author.Meta.table.c.first_name,
|
|
||||||
Author.Meta.table.c.middle_name
|
|
||||||
]
|
|
||||||
PREFETCH_RELATED = ["source", "annotations"]
|
PREFETCH_RELATED = ["source", "annotations"]
|
||||||
|
GET_OBJECT_IDS_QUERY = GET_OBJECTS_IDS_QUERY
|
||||||
|
|||||||
@@ -8,15 +8,22 @@ from app.services.common import TRGMSearchService
|
|||||||
from app.serializers.book import CreateBook, CreateRemoteBook
|
from app.serializers.book import CreateBook, CreateRemoteBook
|
||||||
|
|
||||||
|
|
||||||
|
GET_OBJECTS_IDS_QUERY = """
|
||||||
|
SELECT ARRAY(
|
||||||
|
WITH filtered_books AS (
|
||||||
|
SELECT id, similarity(title, :query) as sml FROM books
|
||||||
|
WHERE books.title % :query
|
||||||
|
)
|
||||||
|
SELECT fbooks.id FROM filtered_books as fbooks
|
||||||
|
ORDER BY fbooks.sml DESC, fbooks.id
|
||||||
|
);
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
class BookTGRMSearchService(TRGMSearchService):
|
class BookTGRMSearchService(TRGMSearchService):
|
||||||
MODEL_CLASS = BookDB
|
MODEL_CLASS = BookDB
|
||||||
FIELDS = [
|
|
||||||
BookDB.Meta.table.c.title
|
|
||||||
]
|
|
||||||
PREFETCH_RELATED = ["source", "authors", "annotations"]
|
PREFETCH_RELATED = ["source", "authors", "annotations"]
|
||||||
FILTERS = [
|
GET_OBJECT_IDS_QUERY = GET_OBJECTS_IDS_QUERY
|
||||||
BookDB.Meta.table.c.is_deleted == False,
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class BookCreator:
|
class BookCreator:
|
||||||
|
|||||||
@@ -1,7 +1,5 @@
|
|||||||
from typing import Optional, Generic, TypeVar, Union
|
from typing import Optional, Generic, TypeVar, Union
|
||||||
from itertools import permutations
|
|
||||||
from databases import Database
|
from databases import Database
|
||||||
import json
|
|
||||||
|
|
||||||
from fastapi_pagination.api import resolve_params
|
from fastapi_pagination.api import resolve_params
|
||||||
from fastapi_pagination.bases import AbstractParams, RawParams
|
from fastapi_pagination.bases import AbstractParams, RawParams
|
||||||
@@ -10,17 +8,7 @@ import aioredis
|
|||||||
import orjson
|
import orjson
|
||||||
|
|
||||||
from ormar import Model, QuerySet
|
from ormar import Model, QuerySet
|
||||||
from sqlalchemy import text, func, select, or_, Table, Column, cast, Text
|
from sqlalchemy import Table
|
||||||
from sqlalchemy.orm import Session
|
|
||||||
|
|
||||||
|
|
||||||
def join_fields(fields):
|
|
||||||
result = fields[0]
|
|
||||||
|
|
||||||
for el in fields[1:]:
|
|
||||||
result += text("' '") + el
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
T = TypeVar('T', bound=Model)
|
T = TypeVar('T', bound=Model)
|
||||||
@@ -28,10 +16,9 @@ T = TypeVar('T', bound=Model)
|
|||||||
|
|
||||||
class TRGMSearchService(Generic[T]):
|
class TRGMSearchService(Generic[T]):
|
||||||
MODEL_CLASS: Optional[T] = None
|
MODEL_CLASS: Optional[T] = None
|
||||||
FIELDS: Optional[list[Column]] = None
|
|
||||||
SELECT_RELATED: Optional[Union[list[str], str]] = None
|
SELECT_RELATED: Optional[Union[list[str], str]] = None
|
||||||
PREFETCH_RELATED: Optional[Union[list[str], str]] = None
|
PREFETCH_RELATED: Optional[Union[list[str], str]] = None
|
||||||
FILTERS = []
|
GET_OBJECT_IDS_QUERY: Optional[str] = None
|
||||||
CACHE_TTL = 5 * 60
|
CACHE_TTL = 5 * 60
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@@ -60,52 +47,18 @@ class TRGMSearchService(Generic[T]):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@property
|
@property
|
||||||
def fields_combinations(cls):
|
def object_ids_query(cls) -> str:
|
||||||
assert cls.FIELDS is not None, f"FIELDS in {cls.__name__} don't set!"
|
assert cls.GET_OBJECT_IDS_QUERY is not None, f"GET_OBJECT_IDS_QUERY in {cls.__name__} don't set!"
|
||||||
assert len(cls.FIELDS) != 0, f"FIELDS in {cls.__name__} must be not empty!"
|
return cls.GET_OBJECT_IDS_QUERY
|
||||||
|
|
||||||
return permutations(cls.FIELDS, len(cls.FIELDS))
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_similarity_subquery(cls, query: str):
|
|
||||||
combs = cls.fields_combinations
|
|
||||||
|
|
||||||
return func.greatest(
|
|
||||||
*[func.similarity(join_fields(comb), cast(query, Text)) for comb in combs]
|
|
||||||
).label("sml")
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def get_similarity_filter_subquery(cls, query: str):
|
|
||||||
return or_(
|
|
||||||
*[join_fields(comb) % f"{query}::text" for comb in cls.fields_combinations]
|
|
||||||
)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
async def _get_object_ids(cls, query_data: str) -> list[int]:
|
async def _get_object_ids(cls, query_data: str) -> list[int]:
|
||||||
similarity = cls.get_similarity_subquery(query_data)
|
row = await cls.database.fetch_one(cls.object_ids_query, {"query": query_data})
|
||||||
similarity_filter = cls.get_similarity_filter_subquery(query_data)
|
|
||||||
|
|
||||||
session = Session(cls.database.connection())
|
|
||||||
|
|
||||||
filtered_objects_query = session.query(
|
|
||||||
cls.table.c.id, similarity
|
|
||||||
).order_by(
|
|
||||||
text('sml DESC')
|
|
||||||
).filter(
|
|
||||||
similarity_filter,
|
|
||||||
*cls.FILTERS
|
|
||||||
).cte('objs')
|
|
||||||
|
|
||||||
object_ids_query = session.query(
|
|
||||||
func.array_agg(filtered_objects_query.c.id)
|
|
||||||
).cte()
|
|
||||||
|
|
||||||
row = await cls.database.fetch_one(object_ids_query)
|
|
||||||
|
|
||||||
if row is None:
|
if row is None:
|
||||||
raise ValueError('Something is wrong!')
|
raise ValueError('Something is wrong!')
|
||||||
|
|
||||||
return row['array_agg_1']
|
return row['array']
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_cache_key(cls, query_data: str) -> str:
|
def get_cache_key(cls, query_data: str) -> str:
|
||||||
|
|||||||
@@ -3,9 +3,32 @@ from app.models import Sequence
|
|||||||
from app.services.common import TRGMSearchService
|
from app.services.common import TRGMSearchService
|
||||||
|
|
||||||
|
|
||||||
|
GET_OBJECTS_IDS_QUERY = """
|
||||||
|
EXPLAIN ANALYZE SELECT ARRAY (
|
||||||
|
WITH filtered_sequences AS (
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
similarity(name, :query) as sml,
|
||||||
|
(
|
||||||
|
SELECT count(*) FROM book_sequences
|
||||||
|
LEFT JOIN books ON books.id = book
|
||||||
|
WHERE sequence = sequences.id AND books.is_deleted = 'f'
|
||||||
|
) as books_count
|
||||||
|
FROM sequences
|
||||||
|
WHERE name % :query AND
|
||||||
|
EXISTS (
|
||||||
|
SELECT * FROM book_sequences
|
||||||
|
LEFT JOIN books ON books.id = book
|
||||||
|
WHERE sequence = sequences.id AND books.is_deleted = 'f'
|
||||||
|
)
|
||||||
|
)
|
||||||
|
SELECT fsequences.id FROM filtered_sequences as fsequences
|
||||||
|
ORDER BY fsequences.sml DESC, fsequences.books_count DESC
|
||||||
|
);
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
class SequenceTGRMSearchService(TRGMSearchService):
|
class SequenceTGRMSearchService(TRGMSearchService):
|
||||||
MODEL_CLASS = Sequence
|
MODEL_CLASS = Sequence
|
||||||
FIELDS = [
|
|
||||||
Sequence.Meta.table.c.name
|
|
||||||
]
|
|
||||||
PREFETCH_RELATED = ["source"]
|
PREFETCH_RELATED = ["source"]
|
||||||
|
GET_OBJECTS_IDS_QUERY = GET_OBJECTS_IDS_QUERY
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from typing import Protocol, TypeVar, Any, Generic, Sequence, runtime_checkable
|
from typing import Protocol, TypeVar, Any, Generic, Sequence, runtime_checkable
|
||||||
|
|
||||||
from pydantic import PositiveInt
|
from pydantic import conint
|
||||||
|
|
||||||
from fastapi_pagination import Page, Params
|
from fastapi_pagination import Page, Params
|
||||||
from fastapi_pagination.bases import AbstractParams
|
from fastapi_pagination.bases import AbstractParams
|
||||||
@@ -16,7 +16,7 @@ T = TypeVar('T', ToDict, Any)
|
|||||||
|
|
||||||
|
|
||||||
class CustomPage(Page[T], Generic[T]):
|
class CustomPage(Page[T], Generic[T]):
|
||||||
total_pages: PositiveInt
|
total_pages: conint(ge=0) # type: ignore
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def create(
|
def create(
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ async def get_author_annotation(id: int):
|
|||||||
@author_router.get("/{id}/books", response_model=CustomPage[AuthorBook], dependencies=[Depends(Params)])
|
@author_router.get("/{id}/books", response_model=CustomPage[AuthorBook], dependencies=[Depends(Params)])
|
||||||
async def get_author_books(id: int):
|
async def get_author_books(id: int):
|
||||||
return await paginate(
|
return await paginate(
|
||||||
BookDB.objects.filter(author__id=id).order_by('title')
|
BookDB.objects.select_related(["source", "annotations"]).filter(authors__id=id).order_by('title')
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -6,8 +6,9 @@ from fastapi_pagination import Params
|
|||||||
from fastapi_pagination.ext.ormar import paginate
|
from fastapi_pagination.ext.ormar import paginate
|
||||||
from app.utils.pagination import CustomPage
|
from app.utils.pagination import CustomPage
|
||||||
|
|
||||||
from app.models import Book as BookDB, Author as AuthorDB, AuthorAnnotation as AuthorAnnotationDB
|
from app.models import Book as BookDB, Author as AuthorDB, BookAnnotation as BookAnnotationDB
|
||||||
from app.serializers.book import Book, RemoteBook, BookDetail, CreateBook, UpdateBook, CreateRemoteBook
|
from app.serializers.book import Book, RemoteBook, BookDetail, CreateBook, UpdateBook, CreateRemoteBook
|
||||||
|
from app.serializers.book_annotation import BookAnnotation
|
||||||
from app.services.book import BookTGRMSearchService, BookCreator
|
from app.services.book import BookTGRMSearchService, BookCreator
|
||||||
from app.filters.book import get_book_filter
|
from app.filters.book import get_book_filter
|
||||||
from app.depends import check_token
|
from app.depends import check_token
|
||||||
@@ -82,9 +83,9 @@ async def update_book(id: int, data: UpdateBook):
|
|||||||
return book
|
return book
|
||||||
|
|
||||||
|
|
||||||
@book_router.get("/{id}/annotation")
|
@book_router.get("/{id}/annotation", response_model=BookAnnotation)
|
||||||
async def get_book_annotation(id: int):
|
async def get_book_annotation(id: int):
|
||||||
annotation = await AuthorAnnotationDB.objects.get(book__id=id)
|
annotation = await BookAnnotationDB.objects.get(book__id=id)
|
||||||
|
|
||||||
if annotation is None:
|
if annotation is None:
|
||||||
raise HTTPException(status.HTTP_404_NOT_FOUND)
|
raise HTTPException(status.HTTP_404_NOT_FOUND)
|
||||||
|
|||||||
Reference in New Issue
Block a user