Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
python-Levenshtein / _levenshtein.h
Size: Mime:
/* @(#) $Id: Levenshtein.h,v 1.22 2005/01/13 20:02:56 yeti Exp $ */
#ifndef LEVENSHTEIN_H
#define LEVENSHTEIN_H

#ifndef size_t
#  include <stdlib.h>
#endif

/* A bit dirty. */
#ifndef _LEV_STATIC_PY
#  define _LEV_STATIC_PY /* */
#endif

/* In C, this is just wchar_t and unsigned char, in Python, lev_wchar can
 * be anything.  If you really want to cheat, define wchar_t to any integer
 * type you like before including Levenshtein.h and recompile it. */
#ifndef lev_wchar
#  ifndef wchar_t
#    include <wchar.h>
#  endif
#  define lev_wchar wchar_t
#endif
typedef unsigned char lev_byte;

/* Edit opration type
 * DON'T CHANGE! used ad arrays indices and the bits are occasionally used
 * as flags */
typedef enum {
  LEV_EDIT_KEEP = 0,
  LEV_EDIT_REPLACE = 1,
  LEV_EDIT_INSERT = 2,
  LEV_EDIT_DELETE = 3,
  LEV_EDIT_LAST  /* sometimes returned when an error occurs */
} LevEditType;

/* Error codes returned by editop check functions */
typedef enum {
  LEV_EDIT_ERR_OK = 0,
  LEV_EDIT_ERR_TYPE,  /* nonexistent edit type */
  LEV_EDIT_ERR_OUT,  /* edit out of string bounds */
  LEV_EDIT_ERR_ORDER,  /* ops are not ordered */
  LEV_EDIT_ERR_BLOCK,  /* incosistent block boundaries (block ops) */
  LEV_EDIT_ERR_SPAN,  /* sequence is not a full transformation (block ops) */
  LEV_EDIT_ERR_LAST
} LevEditOpError;

/* string averaging method (UNUSED yet) */
typedef enum {
  LEV_AVG_HEAD = 0,  /* take operations from the head */
  LEV_AVG_TAIL,  /* take operations from the tail */
  LEV_AVG_SPREAD,  /* take a equidistantly distributed subset */
  LEV_AVG_BLOCK,  /* take a random continuous block */
  LEV_AVG_RANDOM,  /* take a random subset */
  LEV_AVG_LAST
} LevAveragingType;

/* Edit operation (atomic).
 * This is the `native' atomic edit operation.  It differs from the difflib
 * one's because it represents a change of one character, not a block.  And
 * we usually don't care about LEV_EDIT_KEEP, though the functions can handle
 * them.  The positions are interpreted as at the left edge of a character.
 */
typedef struct {
  LevEditType type;  /* editing operation type */
  size_t spos;  /* source block position */
  size_t dpos;  /* destination position */
} LevEditOp;

/* Edit operation (difflib-compatible).
 * This is not `native', but conversion functions exist.  These fields exactly
 * correspond to the codeops() tuples fields (and this method is also the
 * source of the silly OpCode name).  Sequences must span over complete
 * strings, subsequences are simply edit sequences with more (or larger)
 * LEV_EDIT_KEEP blocks.
 */
typedef struct {
  LevEditType type;  /* editing operation type */
  size_t sbeg, send;  /* source block begin, end */
  size_t dbeg, dend;  /* destination block begin, end */
} LevOpCode;

/* Matching block (difflib-compatible). */
typedef struct {
  size_t spos;
  size_t dpos;
  size_t len;
} LevMatchingBlock;

_LEV_STATIC_PY
size_t
lev_edit_distance(size_t len1,
                  const lev_byte *string1,
                  size_t len2,
                  const lev_byte *string2,
                  int xcost);

_LEV_STATIC_PY
size_t
lev_u_edit_distance(size_t len1,
                    const lev_wchar *string1,
                    size_t len2,
                    const lev_wchar *string2,
                    int xcost);

_LEV_STATIC_PY
size_t
lev_hamming_distance(size_t len,
                     const lev_byte *string1,
                     const lev_byte *string2);

_LEV_STATIC_PY
size_t
lev_u_hamming_distance(size_t len,
                       const lev_wchar *string1,
                       const lev_wchar *string2);

_LEV_STATIC_PY
double
lev_jaro_ratio(size_t len1,
               const lev_byte *string1,
               size_t len2,
               const lev_byte *string2);

_LEV_STATIC_PY
double
lev_u_jaro_ratio(size_t len1,
                 const lev_wchar *string1,
                 size_t len2,
                 const lev_wchar *string2);

_LEV_STATIC_PY
double
lev_jaro_winkler_ratio(size_t len1, const lev_byte *string1,
                       size_t len2, const lev_byte *string2,
                       double pfweight);

_LEV_STATIC_PY
double
lev_u_jaro_winkler_ratio(size_t len1, const lev_wchar *string1,
                         size_t len2, const lev_wchar *string2,
                         double pfweight);

_LEV_STATIC_PY
lev_byte*
lev_greedy_median(size_t n,
                  const size_t *lengths,
                  const lev_byte *strings[],
                  const double *weights,
                  size_t *medlength);

_LEV_STATIC_PY
lev_wchar*
lev_u_greedy_median(size_t n,
                    const size_t *lengths,
                    const lev_wchar *strings[],
                    const double *weights,
                    size_t *medlength);

_LEV_STATIC_PY
lev_byte*
lev_median_improve(size_t len, const lev_byte *s,
                   size_t n, const size_t *lengths,
                   const lev_byte *strings[],
                   const double *weights,
                   size_t *medlength);

_LEV_STATIC_PY
lev_wchar*
lev_u_median_improve(size_t len, const lev_wchar *s,
                     size_t n, const size_t *lengths,
                     const lev_wchar *strings[],
                     const double *weights,
                     size_t *medlength);

_LEV_STATIC_PY
lev_byte*
lev_quick_median(size_t n,
                 const size_t *lengths,
                 const lev_byte *strings[],
                 const double *weights,
                 size_t *medlength);

_LEV_STATIC_PY
lev_wchar*
lev_u_quick_median(size_t n,
                   const size_t *lengths,
                   const lev_wchar *strings[],
                   const double *weights,
                   size_t *medlength);

_LEV_STATIC_PY
lev_byte*
lev_set_median(size_t n,
               const size_t *lengths,
               const lev_byte *strings[],
               const double *weights,
               size_t *medlength);

_LEV_STATIC_PY
size_t
lev_set_median_index(size_t n, const size_t *lengths,
                     const lev_byte *strings[],
                     const double *weights);

_LEV_STATIC_PY
lev_wchar*
lev_u_set_median(size_t n,
                 const size_t *lengths,
                 const lev_wchar *strings[],
                 const double *weights,
                 size_t *medlength);

_LEV_STATIC_PY
size_t
lev_u_set_median_index(size_t n, const size_t *lengths,
                       const lev_wchar *strings[],
                       const double *weights);

_LEV_STATIC_PY
double
lev_edit_seq_distance(size_t n1,
                      const size_t *lengths1,
                      const lev_byte *strings1[],
                      size_t n2,
                      const size_t *lengths2,
                      const lev_byte *strings2[]);

_LEV_STATIC_PY
double
lev_u_edit_seq_distance(size_t n1,
                        const size_t *lengths1,
                        const lev_wchar *strings1[],
                        size_t n2,
                        const size_t *lengths2,
                        const lev_wchar *strings2[]);

_LEV_STATIC_PY
double
lev_set_distance(size_t n1,
                 const size_t *lengths1,
                 const lev_byte *strings1[],
                 size_t n2,
                 const size_t *lengths2,
                 const lev_byte *strings2[]);

_LEV_STATIC_PY
double
lev_u_set_distance(size_t n1,
                   const size_t *lengths1,
                   const lev_wchar *strings1[],
                   size_t n2,
                   const size_t *lengths2,
                   const lev_wchar *strings2[]);

_LEV_STATIC_PY
int
lev_editops_check_errors(size_t len1,
                         size_t len2,
                         size_t n,
                         const LevEditOp *ops);

_LEV_STATIC_PY
int
lev_opcodes_check_errors(size_t len1,
                         size_t len2,
                         size_t nb,
                         const LevOpCode *bops);

_LEV_STATIC_PY
void
lev_editops_invert(size_t n,
                   LevEditOp *ops);

_LEV_STATIC_PY
void
lev_opcodes_invert(size_t nb,
                   LevOpCode *bops);

_LEV_STATIC_PY
LevMatchingBlock*
lev_editops_matching_blocks(size_t len1,
                            size_t len2,
                            size_t n,
                            const LevEditOp *ops,
                            size_t *nmblocks);

_LEV_STATIC_PY
LevMatchingBlock*
lev_opcodes_matching_blocks(size_t len1,
                            size_t len2,
                            size_t nb,
                            const LevOpCode *bops,
                            size_t *nmblocks);

_LEV_STATIC_PY
lev_byte*
lev_editops_apply(size_t len1,
                  const lev_byte* string1,
                  size_t len2,
                  const lev_byte* string2,
                  size_t n,
                  const LevEditOp *ops,
                  size_t *len);

_LEV_STATIC_PY
lev_wchar*
lev_u_editops_apply(size_t len1,
                    const lev_wchar* string1,
                    size_t len2,
                    const lev_wchar* string2,
                    size_t n,
                    const LevEditOp *ops,
                    size_t *len);

_LEV_STATIC_PY
lev_byte*
lev_opcodes_apply(size_t len1,
                  const lev_byte* string1,
                  size_t len2,
                  const lev_byte* string2,
                  size_t nb,
                  const LevOpCode *bops,
                  size_t *len);

_LEV_STATIC_PY
lev_wchar*
lev_u_opcodes_apply(size_t len1,
                    const lev_wchar* string1,
                    size_t len2,
                    const lev_wchar* string2,
                    size_t nb,
                    const LevOpCode *bops,
                    size_t *len);

_LEV_STATIC_PY
LevEditOp*
lev_editops_find(size_t len1,
                 const lev_byte *string1,
                 size_t len2,
                 const lev_byte *string2,
                 size_t *n);

_LEV_STATIC_PY
LevEditOp*
lev_u_editops_find(size_t len1,
                   const lev_wchar *string1,
                   size_t len2,
                   const lev_wchar *string2,
                   size_t *n);

_LEV_STATIC_PY
LevEditOp*
lev_opcodes_to_editops(size_t nb,
                       const LevOpCode *bops,
                       size_t *n,
                       int keepkeep);

_LEV_STATIC_PY
LevOpCode*
lev_editops_to_opcodes(size_t n,
                       const LevEditOp *ops,
                       size_t *nb,
                       size_t len1,
                       size_t len2);

_LEV_STATIC_PY
size_t
lev_editops_total_cost(size_t n,
                       const LevEditOp *ops);

_LEV_STATIC_PY
size_t
lev_opcodes_total_cost(size_t nb,
                       const LevOpCode *bops);

_LEV_STATIC_PY
LevEditOp*
lev_editops_normalize(size_t n,
                      const LevEditOp *ops,
                      size_t *nnorm);

_LEV_STATIC_PY LevEditOp*
lev_editops_subtract(size_t n,
                     const LevEditOp *ops,
                     size_t ns,
                     const LevEditOp *sub,
                     size_t *nrem);

/* UNUSED yet */
_LEV_STATIC_PY
void
lev_init_rng(unsigned long int seed);

#endif /* not LEVENSHTEIN_H */