New TGRM search implementation

This commit is contained in:
2021-12-05 23:54:50 +03:00
parent 5fbe26c0f2
commit 7e825d099b
8 changed files with 91 additions and 74 deletions

View File

@@ -3,11 +3,40 @@ from app.models import Author
from app.services.common import TRGMSearchService
GET_OBJECTS_IDS_QUERY = """
SELECT ARRAY(
WITH filtered_authors AS (
SELECT
id,
GREATEST(
similarity((last_name || ' ' || first_name || ' ' || middle_name), :query),
similarity((last_name || ' ' || first_name), :query),
similarity((last_name), :query)
) as sml,
(
SELECT count(*) FROM book_authors
LEFT JOIN books ON books.id = book
WHERE author = authors.id AND books.is_deleted = 'f'
) as books_count
FROM authors
WHERE (
(last_name || ' ' || first_name || ' ' || middle_name) % :query OR
(last_name || ' ' || first_name) % :query OR
(last_name) % :query
) AND
EXISTS (
SELECT * FROM book_authors
LEFT JOIN books ON books.id = book
WHERE author = authors.id AND books.is_deleted = 'f'
)
)
SELECT fauthors.id FROM filtered_authors as fauthors
ORDER BY fauthors.sml DESC, fauthors.books_count DESC
);
"""
class AuthorTGRMSearchService(TRGMSearchService):
MODEL_CLASS = Author
FIELDS = [
Author.Meta.table.c.last_name,
Author.Meta.table.c.first_name,
Author.Meta.table.c.middle_name
]
PREFETCH_RELATED = ["source", "annotations"]
GET_OBJECT_IDS_QUERY = GET_OBJECTS_IDS_QUERY

View File

@@ -8,15 +8,22 @@ from app.services.common import TRGMSearchService
from app.serializers.book import CreateBook, CreateRemoteBook
GET_OBJECTS_IDS_QUERY = """
SELECT ARRAY(
WITH filtered_books AS (
SELECT id, similarity(title, :query) as sml FROM books
WHERE books.title % :query
)
SELECT fbooks.id FROM filtered_books as fbooks
ORDER BY fbooks.sml DESC, fbooks.id
);
"""
class BookTGRMSearchService(TRGMSearchService):
MODEL_CLASS = BookDB
FIELDS = [
BookDB.Meta.table.c.title
]
PREFETCH_RELATED = ["source", "authors", "annotations"]
FILTERS = [
BookDB.Meta.table.c.is_deleted == False,
]
GET_OBJECT_IDS_QUERY = GET_OBJECTS_IDS_QUERY
class BookCreator:

View File

@@ -1,7 +1,5 @@
from typing import Optional, Generic, TypeVar, Union
from itertools import permutations
from databases import Database
import json
from fastapi_pagination.api import resolve_params
from fastapi_pagination.bases import AbstractParams, RawParams
@@ -10,17 +8,7 @@ import aioredis
import orjson
from ormar import Model, QuerySet
from sqlalchemy import text, func, select, or_, Table, Column, cast, Text
from sqlalchemy.orm import Session
def join_fields(fields):
result = fields[0]
for el in fields[1:]:
result += text("' '") + el
return result
from sqlalchemy import Table
T = TypeVar('T', bound=Model)
@@ -28,10 +16,9 @@ T = TypeVar('T', bound=Model)
class TRGMSearchService(Generic[T]):
MODEL_CLASS: Optional[T] = None
FIELDS: Optional[list[Column]] = None
SELECT_RELATED: Optional[Union[list[str], str]] = None
PREFETCH_RELATED: Optional[Union[list[str], str]] = None
FILTERS = []
GET_OBJECT_IDS_QUERY: Optional[str] = None
CACHE_TTL = 5 * 60
@classmethod
@@ -60,52 +47,18 @@ class TRGMSearchService(Generic[T]):
@classmethod
@property
def fields_combinations(cls):
assert cls.FIELDS is not None, f"FIELDS in {cls.__name__} don't set!"
assert len(cls.FIELDS) != 0, f"FIELDS in {cls.__name__} must be not empty!"
return permutations(cls.FIELDS, len(cls.FIELDS))
@classmethod
def get_similarity_subquery(cls, query: str):
combs = cls.fields_combinations
return func.greatest(
*[func.similarity(join_fields(comb), cast(query, Text)) for comb in combs]
).label("sml")
@classmethod
def get_similarity_filter_subquery(cls, query: str):
return or_(
*[join_fields(comb) % f"{query}::text" for comb in cls.fields_combinations]
)
def object_ids_query(cls) -> str:
assert cls.GET_OBJECT_IDS_QUERY is not None, f"GET_OBJECT_IDS_QUERY in {cls.__name__} don't set!"
return cls.GET_OBJECT_IDS_QUERY
@classmethod
async def _get_object_ids(cls, query_data: str) -> list[int]:
similarity = cls.get_similarity_subquery(query_data)
similarity_filter = cls.get_similarity_filter_subquery(query_data)
session = Session(cls.database.connection())
filtered_objects_query = session.query(
cls.table.c.id, similarity
).order_by(
text('sml DESC')
).filter(
similarity_filter,
*cls.FILTERS
).cte('objs')
object_ids_query = session.query(
func.array_agg(filtered_objects_query.c.id)
).cte()
row = await cls.database.fetch_one(object_ids_query)
row = await cls.database.fetch_one(cls.object_ids_query, {"query": query_data})
if row is None:
raise ValueError('Something is wrong!')
return row['array_agg_1']
return row['array']
@classmethod
def get_cache_key(cls, query_data: str) -> str:

View File

@@ -3,9 +3,32 @@ from app.models import Sequence
from app.services.common import TRGMSearchService
GET_OBJECTS_IDS_QUERY = """
EXPLAIN ANALYZE SELECT ARRAY (
WITH filtered_sequences AS (
SELECT
id,
similarity(name, :query) as sml,
(
SELECT count(*) FROM book_sequences
LEFT JOIN books ON books.id = book
WHERE sequence = sequences.id AND books.is_deleted = 'f'
) as books_count
FROM sequences
WHERE name % :query AND
EXISTS (
SELECT * FROM book_sequences
LEFT JOIN books ON books.id = book
WHERE sequence = sequences.id AND books.is_deleted = 'f'
)
)
SELECT fsequences.id FROM filtered_sequences as fsequences
ORDER BY fsequences.sml DESC, fsequences.books_count DESC
);
"""
class SequenceTGRMSearchService(TRGMSearchService):
MODEL_CLASS = Sequence
FIELDS = [
Sequence.Meta.table.c.name
]
PREFETCH_RELATED = ["source"]
GET_OBJECTS_IDS_QUERY = GET_OBJECTS_IDS_QUERY