Repository URL to install this package:
|
Version:
0.35.0 ▾
|
pylance
/
query.py
|
|---|
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright The Lance Authors
import abc
from enum import Enum
from typing import Optional
from .lance import PyFullTextQuery
class FullTextQueryType(Enum):
MATCH = "match"
MATCH_PHRASE = "match_phrase"
BOOST = "boost"
MULTI_MATCH = "multi_match"
BOOLEAN = "boolean"
class FullTextOperator(Enum):
AND = "AND"
OR = "OR"
class Occur(Enum):
SHOULD = "SHOULD"
MUST = "MUST"
MUST_NOT = "MUST_NOT"
class FullTextQuery(abc.ABC):
_inner: PyFullTextQuery
@property
def inner(self) -> PyFullTextQuery:
"""
Get the inner query object.
Returns
-------
PyFullTextQuery
The inner query object.
"""
return self._inner
@abc.abstractmethod
def query_type(self) -> FullTextQueryType:
"""
Get the query type of the query.
Returns
-------
str
The type of the query.
"""
def __and__(self, other: "FullTextQuery") -> "FullTextQuery":
"""
Combine two queries with a logical AND operation.
Parameters
----------
other : FullTextQuery
The other query to combine with.
Returns
-------
FullTextQuery
A new query that combines both queries with AND.
"""
return BooleanQuery([(Occur.MUST, self), (Occur.MUST, other)])
def __or__(self, other: "FullTextQuery") -> "FullTextQuery":
"""
Combine two queries with a logical OR operation.
Parameters
----------
other : FullTextQuery
The other query to combine with.
Returns
-------
FullTextQuery
A new query that combines both queries with OR.
"""
return BooleanQuery([(Occur.SHOULD, self), (Occur.SHOULD, other)])
class MatchQuery(FullTextQuery):
def __init__(
self,
query: str,
column: str,
*,
boost: float = 1.0,
fuzziness: int = 0,
max_expansions: int = 50,
operator: FullTextOperator = FullTextOperator.OR,
prefix_length: int = 0,
):
"""
Match query for full-text search.
Parameters
----------
query : str
The query string to match against.
column : str
The name of the column to match against.
boost : float, default 1.0
The boost factor for the query.
The score of each matching document is multiplied by this value.
fuzziness : int, optional
The maximum edit distance for each term in the match query.
Defaults to 0 (exact match).
If None, fuzziness is applied automatically by the rules:
- 0 for terms with length <= 2
- 1 for terms with length <= 5
- 2 for terms with length > 5
max_expansions : int, optional
The maximum number of terms to consider for fuzzy matching.
Defaults to 50.
operator : FullTextOperator, default OR
The operator to use for combining the query results.
Can be either `AND` or `OR`.
If `AND`, all terms in the query must match.
If `OR`, at least one term in the query must match.
prefix_length : int, default 0
The number of beginning characters being unchanged for fuzzy matching.
This is useful to achieve prefix matching.
"""
self._inner = PyFullTextQuery.match_query(
query,
column,
boost=boost,
fuzziness=fuzziness,
max_expansions=max_expansions,
operator=operator.value,
prefix_length=prefix_length,
)
def query_type(self) -> FullTextQueryType:
return FullTextQueryType.MATCH
class PhraseQuery(FullTextQuery):
def __init__(self, query: str, column: str, *, slop: int = 0):
"""
Phrase query for full-text search.
Parameters
----------
query : str
The query string to match against.
column : str
The name of the column to match against.
"""
self._inner = PyFullTextQuery.phrase_query(query, column, slop)
def query_type(self) -> FullTextQueryType:
return FullTextQueryType.MATCH_PHRASE
class BoostQuery(FullTextQuery):
def __init__(
self,
positive: FullTextQuery,
negative: FullTextQuery,
*,
negative_boost: float = 0.5,
):
"""
Boost query for full-text search.
Parameters
----------
positive : dict
The positive query object.
negative : dict
The negative query object.
negative_boost : float, default 0.5
The boost factor for the negative query.
"""
self._inner = PyFullTextQuery.boost_query(
positive.inner, negative.inner, negative_boost
)
def query_type(self) -> FullTextQueryType:
return FullTextQueryType.BOOST
class MultiMatchQuery(FullTextQuery):
def __init__(
self,
query: str,
columns: list[str],
*,
boosts: Optional[list[float]] = None,
operator: FullTextOperator = FullTextOperator.OR,
):
"""
Multi-match query for full-text search.
Parameters
----------
query : str | list[Query]
If a string, the query string to match against.
columns : list[str]
The list of columns to match against.
boosts : list[float], optional
The list of boost factors for each column. If not provided,
all columns will have the same boost factor.
operator : FullTextOperator, default OR
The operator to use for combining the query results.
Can be either `AND` or `OR`.
It would be applied to all columns individually.
For example, if the operator is `AND`,
then the query "hello world" is equal to
`match("hello AND world", column1) OR match("hello AND world", column2)`.
"""
self._inner = PyFullTextQuery.multi_match_query(
query, columns, boosts=boosts, operator=operator.value
)
def query_type(self) -> FullTextQueryType:
return FullTextQueryType.MULTI_MATCH
class BooleanQuery(FullTextQuery):
def __init__(self, queries: list[tuple[Occur, FullTextQuery]]):
"""
Boolean query for full-text search.
Parameters
----------
queries : list[tuple(Occur, FullTextQuery)]
The list of queries with their occurrence requirements.
"""
self._inner = PyFullTextQuery.boolean_query(
[(occur.value, query.inner) for occur, query in queries]
)
def query_type(self) -> FullTextQueryType:
return FullTextQueryType.BOOLEAN