Repository URL to install this package:
Version:
0.1.0-1 ▾
|
odigos-demo-inventory
/
opt
/
odigos-demo-inventory
/
site-packages
/
rapidfuzz
/
distance
/
Postfix_py.py
|
---|
# SPDX-License-Identifier: MIT
# Copyright (C) 2022 Max Bachmann
from __future__ import annotations
from rapidfuzz._common_py import conv_sequences
from rapidfuzz._utils import is_none, setupPandas
def distance(
s1,
s2,
*,
processor=None,
score_cutoff=None,
):
"""
Calculates the postfix distance between two strings.
Parameters
----------
s1 : Sequence[Hashable]
First string to compare.
s2 : Sequence[Hashable]
Second string to compare.
processor: callable, optional
Optional callable that is used to preprocess the strings before
comparing them. Default is None, which deactivates this behaviour.
score_cutoff : int or None, optional
Maximum distance between s1 and s2, that is
considered as a result. If the distance is bigger than score_cutoff,
score_cutoff + 1 is returned instead. Default is None, which deactivates
this behaviour.
Returns
-------
distance : int
distance between s1 and s2
"""
if processor is not None:
s1 = processor(s1)
s2 = processor(s2)
s1, s2 = conv_sequences(s1, s2)
maximum = max(len(s1), len(s2))
sim = similarity(s1, s2)
dist = maximum - sim
return dist if (score_cutoff is None or dist <= score_cutoff) else score_cutoff + 1
def similarity(
s1,
s2,
*,
processor=None,
score_cutoff=None,
):
"""
Calculates the postfix similarity between two strings.
This is calculated as ``len1 - distance``.
Parameters
----------
s1 : Sequence[Hashable]
First string to compare.
s2 : Sequence[Hashable]
Second string to compare.
processor: callable, optional
Optional callable that is used to preprocess the strings before
comparing them. Default is None, which deactivates this behaviour.
score_cutoff : int, optional
Maximum distance between s1 and s2, that is
considered as a result. If the similarity is smaller than score_cutoff,
0 is returned instead. Default is None, which deactivates
this behaviour.
Returns
-------
distance : int
distance between s1 and s2
"""
if processor is not None:
s1 = processor(s1)
s2 = processor(s2)
s1, s2 = conv_sequences(s1, s2)
sim = 0
for ch1, ch2 in zip(reversed(s1), reversed(s2)):
if ch1 != ch2:
break
sim += 1
return sim if (score_cutoff is None or sim >= score_cutoff) else 0
def normalized_distance(
s1,
s2,
*,
processor=None,
score_cutoff=None,
):
"""
Calculates a normalized postfix similarity in the range [1, 0].
This is calculated as ``distance / (len1 + len2)``.
Parameters
----------
s1 : Sequence[Hashable]
First string to compare.
s2 : Sequence[Hashable]
Second string to compare.
processor: callable, optional
Optional callable that is used to preprocess the strings before
comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
Optional argument for a score threshold as a float between 0 and 1.0.
For norm_dist > score_cutoff 1.0 is returned instead. Default is 1.0,
which deactivates this behaviour.
Returns
-------
norm_dist : float
normalized distance between s1 and s2 as a float between 0 and 1.0
"""
setupPandas()
if is_none(s1) or is_none(s2):
return 1.0
norm_sim = normalized_similarity(s1, s2, processor=processor)
norm_dist = 1.0 - norm_sim
return norm_dist if (score_cutoff is None or norm_dist <= score_cutoff) else 1.0
def normalized_similarity(
s1,
s2,
*,
processor=None,
score_cutoff=None,
):
"""
Calculates a normalized postfix similarity in the range [0, 1].
This is calculated as ``1 - normalized_distance``
Parameters
----------
s1 : Sequence[Hashable]
First string to compare.
s2 : Sequence[Hashable]
Second string to compare.
processor: callable, optional
Optional callable that is used to preprocess the strings before
comparing them. Default is None, which deactivates this behaviour.
score_cutoff : float, optional
Optional argument for a score threshold as a float between 0 and 1.0.
For norm_sim < score_cutoff 0 is returned instead. Default is 0,
which deactivates this behaviour.
Returns
-------
norm_sim : float
normalized similarity between s1 and s2 as a float between 0 and 1.0
"""
setupPandas()
if is_none(s1) or is_none(s2):
return 0.0
if processor is not None:
s1 = processor(s1)
s2 = processor(s2)
s1, s2 = conv_sequences(s1, s2)
maximum = max(len(s1), len(s2))
sim = similarity(s1, s2)
norm_sim = sim / maximum if maximum else 1.0
return norm_sim if (score_cutoff is None or norm_sim >= score_cutoff) else 0.0