Repository URL to install this package:
Version:
1.5.4 ▾
|
#include "codepoints.h"
#include "jaro.h"
#include "ruby.h"
VALUE rb_mJaroWinkler, rb_eError, rb_eInvalidWeightError;
VALUE rb_jaro_winkler_distance(size_t argc, VALUE *argv, VALUE self);
VALUE rb_jaro_distance(size_t argc, VALUE *argv, VALUE self);
VALUE distance(size_t argc, VALUE *argv, VALUE self,
double (*distance_fn)(uint32_t *codepoints1, size_t len1,
uint32_t *codepoints2, size_t len2,
Options *));
void Init_jaro_winkler_ext(void) {
rb_mJaroWinkler = rb_define_module("JaroWinkler");
rb_eError = rb_define_class_under(rb_mJaroWinkler, "Error", rb_eRuntimeError);
rb_eInvalidWeightError =
rb_define_class_under(rb_mJaroWinkler, "InvalidWeightError", rb_eError);
rb_define_singleton_method(rb_mJaroWinkler, "distance",
rb_jaro_winkler_distance, -1);
rb_define_singleton_method(rb_mJaroWinkler, "jaro_distance", rb_jaro_distance,
-1);
}
VALUE distance(size_t argc, VALUE *argv, VALUE self,
double (*distance_fn)(uint32_t *codepoints1, size_t len1,
uint32_t *codepoints2, size_t len2,
Options *)) {
VALUE s1, s2, opt;
rb_scan_args((int32_t)argc, argv, "2:", &s1, &s2, &opt);
Check_Type(s1, T_STRING);
Check_Type(s2, T_STRING);
Options c_opt = DEFAULT_OPTIONS;
if (TYPE(opt) == T_HASH) {
VALUE weight = rb_hash_aref(opt, ID2SYM(rb_intern("weight"))),
threshold = rb_hash_aref(opt, ID2SYM(rb_intern("threshold"))),
ignore_case = rb_hash_aref(opt, ID2SYM(rb_intern("ignore_case"))),
adj_table = rb_hash_aref(opt, ID2SYM(rb_intern("adj_table")));
if (!NIL_P(weight))
c_opt.weight = NUM2DBL(weight);
if (c_opt.weight > 0.25)
rb_raise(rb_eInvalidWeightError, "Scaling factor should not exceed 0.25, "
"otherwise the distance can become "
"larger than 1.");
if (!NIL_P(threshold))
c_opt.threshold = NUM2DBL(threshold);
if (!NIL_P(ignore_case))
c_opt.ignore_case =
(TYPE(ignore_case) == T_FALSE || NIL_P(ignore_case)) ? 0 : 1;
if (!NIL_P(adj_table))
c_opt.adj_table =
(TYPE(adj_table) == T_FALSE || NIL_P(adj_table)) ? 0 : 1;
}
CodePoints cp1, cp2;
codepoints_init(&cp1, s1);
codepoints_init(&cp2, s2);
VALUE ret = rb_float_new(
(*distance_fn)(cp1.data, cp1.length, cp2.data, cp2.length, &c_opt));
codepoints_free(&cp1);
codepoints_free(&cp2);
return ret;
}
VALUE rb_jaro_distance(size_t argc, VALUE *argv, VALUE self) {
return distance(argc, argv, self, jaro_distance_from_codes);
}
VALUE rb_jaro_winkler_distance(size_t argc, VALUE *argv, VALUE self) {
return distance(argc, argv, self, jaro_winkler_distance_from_codes);
}