Repository URL to install this package:
|
Version:
1.1.3 ▾
|
#include <stdlib.h>
#include <numpy/arrayobject.h>
#include "svm.h"
#include "_svm_cython_blas_helpers.h"
#ifndef MAX
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#endif
/*
* Convert scipy.sparse.csr to libsvm's sparse data structure
*/
struct svm_csr_node **csr_to_libsvm (double *values, int* indices, int* indptr, npy_int n_samples)
{
struct svm_csr_node **sparse, *temp;
int i, j=0, k=0, n;
sparse = malloc (n_samples * sizeof(struct svm_csr_node *));
if (sparse == NULL)
return NULL;
for (i=0; i<n_samples; ++i) {
n = indptr[i+1] - indptr[i]; /* count elements in row i */
temp = malloc ((n+1) * sizeof(struct svm_csr_node));
if (temp == NULL) {
for (j=0; j<i; j++)
free(sparse[j]);
free(sparse);
return NULL;
}
for (j=0; j<n; ++j) {
temp[j].value = values[k];
temp[j].index = indices[k] + 1; /* libsvm uses 1-based indexing */
++k;
}
/* set sentinel */
temp[n].index = -1;
sparse[i] = temp;
}
return sparse;
}
struct svm_parameter * set_parameter(int svm_type, int kernel_type, int degree,
double gamma, double coef0, double nu, double cache_size, double C,
double eps, double p, int shrinking, int probability, int nr_weight,
char *weight_label, char *weight, int max_iter, int random_seed)
{
struct svm_parameter *param;
param = malloc(sizeof(struct svm_parameter));
if (param == NULL) return NULL;
param->svm_type = svm_type;
param->kernel_type = kernel_type;
param->degree = degree;
param->coef0 = coef0;
param->nu = nu;
param->cache_size = cache_size;
param->C = C;
param->eps = eps;
param->p = p;
param->shrinking = shrinking;
param->probability = probability;
param->nr_weight = nr_weight;
param->weight_label = (int *) weight_label;
param->weight = (double *) weight;
param->gamma = gamma;
param->max_iter = max_iter;
param->random_seed = random_seed;
return param;
}
/*
* Create and return a svm_csr_problem struct from a scipy.sparse.csr matrix. It is
* up to the user to free resulting structure.
*
* TODO: precomputed kernel.
*/
struct svm_csr_problem * csr_set_problem (char *values, npy_intp *n_indices,
char *indices, npy_intp *n_indptr, char *indptr, char *Y,
char *sample_weight, int kernel_type) {
struct svm_csr_problem *problem;
problem = malloc (sizeof (struct svm_csr_problem));
if (problem == NULL) return NULL;
problem->l = (int) n_indptr[0] - 1;
problem->y = (double *) Y;
problem->x = csr_to_libsvm((double *) values, (int *) indices,
(int *) indptr, problem->l);
/* should be removed once we implement weighted samples */
problem->W = (double *) sample_weight;
if (problem->x == NULL) {
free(problem);
return NULL;
}
return problem;
}
struct svm_csr_model *csr_set_model(struct svm_parameter *param, int nr_class,
char *SV_data, npy_intp *SV_indices_dims,
char *SV_indices, npy_intp *SV_indptr_dims,
char *SV_intptr,
char *sv_coef, char *rho, char *nSV,
char *probA, char *probB)
{
struct svm_csr_model *model;
double *dsv_coef = (double *) sv_coef;
int i, m;
m = nr_class * (nr_class-1)/2;
if ((model = malloc(sizeof(struct svm_csr_model))) == NULL)
goto model_error;
if ((model->nSV = malloc(nr_class * sizeof(int))) == NULL)
goto nsv_error;
if ((model->label = malloc(nr_class * sizeof(int))) == NULL)
goto label_error;
if ((model->sv_coef = malloc((nr_class-1)*sizeof(double *))) == NULL)
goto sv_coef_error;
if ((model->rho = malloc( m * sizeof(double))) == NULL)
goto rho_error;
// This is only allocated in dynamic memory while training.
model->n_iter = NULL;
/* in the case of precomputed kernels we do not use
dense_to_precomputed because we don't want the leading 0. As
indices start at 1 (not at 0) this will work */
model->l = (int) SV_indptr_dims[0] - 1;
model->SV = csr_to_libsvm((double *) SV_data, (int *) SV_indices,
(int *) SV_intptr, model->l);
model->nr_class = nr_class;
model->param = *param;
/*
* regression and one-class does not use nSV, label.
*/
if (param->svm_type < 2) {
memcpy(model->nSV, nSV, model->nr_class * sizeof(int));
for(i=0; i < model->nr_class; i++)
model->label[i] = i;
}
for (i=0; i < model->nr_class-1; i++) {
/*
* We cannot squash all this mallocs in a single call since
* svm_destroy_model will free each element of the array.
*/
if ((model->sv_coef[i] = malloc((model->l) * sizeof(double))) == NULL) {
int j;
for (j=0; j<i; j++)
free(model->sv_coef[j]);
goto sv_coef_i_error;
}
memcpy(model->sv_coef[i], dsv_coef, (model->l) * sizeof(double));
dsv_coef += model->l;
}
for (i=0; i<m; ++i) {
(model->rho)[i] = -((double *) rho)[i];
}
/*
* just to avoid segfaults, these features are not wrapped but
* svm_destroy_model will try to free them.
*/
if (param->probability) {
if ((model->probA = malloc(m * sizeof(double))) == NULL)
goto probA_error;
memcpy(model->probA, probA, m * sizeof(double));
if ((model->probB = malloc(m * sizeof(double))) == NULL)
goto probB_error;
memcpy(model->probB, probB, m * sizeof(double));
} else {
model->probA = NULL;
model->probB = NULL;
}
/* We'll free SV ourselves */
model->free_sv = 0;
return model;
probB_error:
free(model->probA);
probA_error:
for (i=0; i < model->nr_class-1; i++)
free(model->sv_coef[i]);
sv_coef_i_error:
free(model->rho);
rho_error:
free(model->sv_coef);
sv_coef_error:
free(model->label);
label_error:
free(model->nSV);
nsv_error:
free(model);
model_error:
return NULL;
}
/*
* Copy support vectors into a scipy.sparse.csr matrix
*/
int csr_copy_SV (char *data, npy_intp *n_indices,
char *indices, npy_intp *n_indptr, char *indptr,
struct svm_csr_model *model, int n_features)
{
int i, j, k=0, index;
double *dvalues = (double *) data;
int *iindices = (int *) indices;
int *iindptr = (int *) indptr;
iindptr[0] = 0;
for (i=0; i<model->l; ++i) { /* iterate over support vectors */
index = model->SV[i][0].index;
for(j=0; index >=0 ; ++j) {
iindices[k] = index - 1;
dvalues[k] = model->SV[i][j].value;
index = model->SV[i][j+1].index;
++k;
}
iindptr[i+1] = k;
}
return 0;
}
/* get number of nonzero coefficients in support vectors */
npy_intp get_nonzero_SV (struct svm_csr_model *model) {
int i, j;
npy_intp count=0;
for (i=0; i<model->l; ++i) {
j = 0;
while (model->SV[i][j].index != -1) {
++j;
++count;
}
}
return count;
}
/*
* Predict using a model, where data is expected to be encoded into a csr matrix.
*/
int csr_copy_predict (npy_intp *data_size, char *data, npy_intp *index_size,
char *index, npy_intp *intptr_size, char *intptr, struct svm_csr_model *model,
char *dec_values, BlasFunctions *blas_functions) {
double *t = (double *) dec_values;
struct svm_csr_node **predict_nodes;
npy_intp i;
predict_nodes = csr_to_libsvm((double *) data, (int *) index,
(int *) intptr, intptr_size[0]-1);
if (predict_nodes == NULL)
return -1;
for(i=0; i < intptr_size[0] - 1; ++i) {
*t = svm_csr_predict(model, predict_nodes[i], blas_functions);
free(predict_nodes[i]);
++t;
}
free(predict_nodes);
return 0;
}
int csr_copy_predict_values (npy_intp *data_size, char *data, npy_intp *index_size,
char *index, npy_intp *intptr_size, char *intptr, struct svm_csr_model *model,
char *dec_values, int nr_class, BlasFunctions *blas_functions) {
struct svm_csr_node **predict_nodes;
npy_intp i;
predict_nodes = csr_to_libsvm((double *) data, (int *) index,
(int *) intptr, intptr_size[0]-1);
if (predict_nodes == NULL)
return -1;
for(i=0; i < intptr_size[0] - 1; ++i) {
svm_csr_predict_values(model, predict_nodes[i],
((double *) dec_values) + i*nr_class,
blas_functions);
free(predict_nodes[i]);
}
free(predict_nodes);
return 0;
}
int csr_copy_predict_proba (npy_intp *data_size, char *data, npy_intp *index_size,
char *index, npy_intp *intptr_size, char *intptr, struct svm_csr_model *model,
char *dec_values, BlasFunctions *blas_functions) {
struct svm_csr_node **predict_nodes;
npy_intp i;
int m = model->nr_class;
predict_nodes = csr_to_libsvm((double *) data, (int *) index,
(int *) intptr, intptr_size[0]-1);
if (predict_nodes == NULL)
return -1;
for(i=0; i < intptr_size[0] - 1; ++i) {
svm_csr_predict_probability(
model, predict_nodes[i], ((double *) dec_values) + i*m, blas_functions);
free(predict_nodes[i]);
}
free(predict_nodes);
return 0;
}
npy_intp get_nr(struct svm_csr_model *model)
{
return (npy_intp) model->nr_class;
}
void copy_intercept(char *data, struct svm_csr_model *model, npy_intp *dims)
{
/* intercept = -rho */
npy_intp i, n = dims[0];
double t, *ddata = (double *) data;
for (i=0; i<n; ++i) {
t = model->rho[i];
/* we do this to avoid ugly -0.0 */
*ddata = (t != 0) ? -t : 0;
++ddata;
}
}
void copy_support (char *data, struct svm_csr_model *model)
{
memcpy (data, model->sv_ind, (model->l) * sizeof(int));
}
/*
* Some helpers to convert from libsvm sparse data structures
* model->sv_coef is a double **, whereas data is just a double *,
* so we have to do some stupid copying.
*/
void copy_sv_coef(char *data, struct svm_csr_model *model)
{
int i, len = model->nr_class-1;
double *temp = (double *) data;
for(i=0; i<len; ++i) {
memcpy(temp, model->sv_coef[i], sizeof(double) * model->l);
temp += model->l;
}
}
/*
* Get the number of iterations run in optimization
*/
void copy_n_iter(char *data, struct svm_csr_model *model)
{
const int n_models = MAX(1, model->nr_class * (model->nr_class-1) / 2);
memcpy(data, model->n_iter, n_models * sizeof(int));
}
/*
* Get the number of support vectors in a model.
*/
npy_intp get_l(struct svm_csr_model *model)
{
return (npy_intp) model->l;
}
void copy_nSV(char *data, struct svm_csr_model *model)
{
if (model->label == NULL) return;
memcpy(data, model->nSV, model->nr_class * sizeof(int));
}
/*
* same as above with model->label
* TODO: merge in the cython layer
*/
void copy_label(char *data, struct svm_csr_model *model)
{
if (model->label == NULL) return;
memcpy(data, model->label, model->nr_class * sizeof(int));
}
void copy_probA(char *data, struct svm_csr_model *model, npy_intp * dims)
{
memcpy(data, model->probA, dims[0] * sizeof(double));
}
void copy_probB(char *data, struct svm_csr_model *model, npy_intp * dims)
{
memcpy(data, model->probB, dims[0] * sizeof(double));
}
/*
* Some free routines. Some of them are nontrivial since a lot of
* sharing happens across objects (they *must* be called in the
* correct order)
*/
int free_problem(struct svm_csr_problem *problem)
{
int i;
if (problem == NULL) return -1;
for (i=0; i<problem->l; ++i)
free (problem->x[i]);
free (problem->x);
free (problem);
return 0;
}
int free_model(struct svm_csr_model *model)
{
/* like svm_free_and_destroy_model, but does not free sv_coef[i] */
/* We don't free n_iter, since we did not create them in set_model. */
if (model == NULL) return -1;
free(model->SV);
free(model->sv_coef);
free(model->rho);
free(model->label);
free(model->probA);
free(model->probB);
free(model->nSV);
free(model);
return 0;
}
int free_param(struct svm_parameter *param)
{
if (param == NULL) return -1;
free(param);
return 0;
}
int free_model_SV(struct svm_csr_model *model)
{
int i;
for (i=model->l-1; i>=0; --i) free(model->SV[i]);
/* svn_destroy_model frees model->SV */
for (i=0; i < model->nr_class-1 ; ++i) free(model->sv_coef[i]);
/* svn_destroy_model frees model->sv_coef */
return 0;
}
/* borrowed from original libsvm code */
static void print_null(const char *s) {}
static void print_string_stdout(const char *s)
{
fputs(s,stdout);
fflush(stdout);
}
/* provide convenience wrapper */
void set_verbosity(int verbosity_flag){
if (verbosity_flag)
svm_set_print_string_function(&print_string_stdout);
else
svm_set_print_string_function(&print_null);
}