Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
pg / ext / pg_text_encoder.c
Size: Mime:
/*
 * pg_text_encoder.c - PG::TextEncoder module
 * $Id: pg_text_encoder.c,v b859963462b2 2015/03/13 17:39:35 lars $
 *
 */

/*
 *
 * Type casts for encoding Ruby objects to PostgreSQL string representations.
 *
 * Encoder classes are defined with pg_define_coder(). This creates a new coder class and
 * assigns an encoder function. The encoder function can decide between two different options
 * to return the encoded data. It can either return it as a Ruby String object or write the
 * encoded data to a memory space provided by the caller. In the second case, the encoder
 * function is called twice, once for deciding the encoding option and returning the expected
 * data length, and a second time when the requested memory space was made available by the
 * calling function, to do the actual conversion and writing. Parameter intermediate can be
 * used to store data between these two calls.
 *
 * Signature of all type cast encoders is:
 *    int encoder_function(t_pg_coder *this, VALUE value, char *out, VALUE *intermediate)
 *
 * Params:
 *   this  - The data part of the coder object that belongs to the encoder function.
 *   value - The Ruby object to cast.
 *   out   - NULL for the first call,
 *           pointer to a buffer with the requested size for the second call.
 *   intermediate - Pointer to a VALUE that might be set by the encoding function to some
 *           value in the first call that can be retrieved later in the second call.
 *           This VALUE is not yet initialized by the caller.
 *
 * Returns:
 *   >= 0  - If out==NULL the encoder function must return the expected output buffer size.
 *           This can be larger than the size of the second call, but may not be smaller.
 *           If out!=NULL the encoder function must return the actually used output buffer size
 *           without a termination character.
 *   -1    - The encoder function can alternatively return -1 to indicate that no second call
 *           is required, but the String value in *intermediate should be used instead.
 */


#include "pg.h"
#include "util.h"
#include <inttypes.h>
#include <math.h>

VALUE rb_mPG_TextEncoder;
static ID s_id_encode;
static ID s_id_to_i;

static int pg_text_enc_integer(t_pg_coder *this, VALUE value, char *out, VALUE *intermediate);

VALUE
pg_obj_to_i( VALUE value )
{
	switch (TYPE(value)) {
		case T_FIXNUM:
		case T_FLOAT:
		case T_BIGNUM:
			return value;
		default:
			return rb_funcall(value, s_id_to_i, 0);
	}
}

/*
 * Document-class: PG::TextEncoder::Boolean < PG::SimpleEncoder
 *
 * This is the encoder class for the PostgreSQL bool type.
 *
 * Ruby value false is encoded as SQL +FALSE+ value.
 * Ruby value true is encoded as SQL +TRUE+ value.
 * Any other value is sent as it's string representation.
 *
 */
static int
pg_text_enc_boolean(t_pg_coder *this, VALUE value, char *out, VALUE *intermediate)
{
	switch( TYPE(value) ){
		case T_FALSE:
			if(out) *out = 'f';
			return 1;
		case T_TRUE:
			if(out) *out = 't';
			return 1;
		case T_FIXNUM:
		case T_BIGNUM:
			if( NUM2LONG(value) == 0 ){
				if(out) *out = '0';
				return 1;
			} else if( NUM2LONG(value) == 1 ){
				if(out) *out = '1';
				return 1;
			} else {
				return pg_text_enc_integer(this, value, out, intermediate);
			}
		default:
			return pg_coder_enc_to_s(this, value, out, intermediate);
	}
	/* never reached */
	return 0;
}


/*
 * Document-class: PG::TextEncoder::String < PG::SimpleEncoder
 *
 * This is the encoder class for the PostgreSQL text types.
 *
 * Non-String values are expected to have method +to_s+ defined.
 *
 */
int
pg_coder_enc_to_s(t_pg_coder *this, VALUE value, char *out, VALUE *intermediate)
{
	*intermediate = rb_obj_as_string(value);
	return -1;
}


/*
 * Document-class: PG::TextEncoder::Integer < PG::SimpleEncoder
 *
 * This is the encoder class for the PostgreSQL int types.
 *
 * Non-Integer values are expected to have method +to_i+ defined.
 *
 */
static int
pg_text_enc_integer(t_pg_coder *this, VALUE value, char *out, VALUE *intermediate)
{
	if(out){
		if(TYPE(*intermediate) == T_STRING){
			return pg_coder_enc_to_s(this, value, out, intermediate);
		}else{
			char *start = out;
			int len;
			int neg = 0;
			long long ll = NUM2LL(*intermediate);

			if (ll < 0) {
				/* We don't expect problems with the most negative integer not being representable
				 * as a positive integer, because Fixnum is only up to 63 bits.
				 */
				ll = -ll;
				neg = 1;
			}

			/* Compute the result string backwards. */
			do {
				long long remainder;
				long long oldval = ll;

				ll /= 10;
				remainder = oldval - ll * 10;
				*out++ = '0' + remainder;
			} while (ll != 0);

			if (neg)
				*out++ = '-';

			len = out - start;

			/* Reverse string. */
			out--;
			while (start < out)
			{
				char swap = *start;

				*start++ = *out;
				*out-- = swap;
			}

			return len;
		}
	}else{
		*intermediate = pg_obj_to_i(value);
		if(TYPE(*intermediate) == T_FIXNUM){
			int len;
			long long sll = NUM2LL(*intermediate);
			long long ll = sll < 0 ? -sll : sll;
			if( ll < 100000000 ){
				if( ll < 10000 ){
					if( ll < 100 ){
						len = ll < 10 ? 1 : 2;
					}else{
						len = ll < 1000 ? 3 : 4;
					}
				}else{
					if( ll < 1000000 ){
						len = ll < 100000 ? 5 : 6;
					}else{
						len = ll < 10000000 ? 7 : 8;
					}
				}
			}else{
				if( ll < 1000000000000LL ){
					if( ll < 10000000000LL ){
						len = ll < 1000000000LL ? 9 : 10;
					}else{
						len = ll < 100000000000LL ? 11 : 12;
					}
				}else{
					if( ll < 100000000000000LL ){
						len = ll < 10000000000000LL ? 13 : 14;
					}else{
						return pg_coder_enc_to_s(this, *intermediate, NULL, intermediate);
					}
				}
			}
			return sll < 0 ? len+1 : len;
		}else{
			return pg_coder_enc_to_s(this, *intermediate, NULL, intermediate);
		}
	}
}


/*
 * Document-class: PG::TextEncoder::Float < PG::SimpleEncoder
 *
 * This is the encoder class for the PostgreSQL float types.
 *
 */
static int
pg_text_enc_float(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate)
{
	if(out){
		double dvalue = NUM2DBL(value);
		/* Cast to the same strings as value.to_s . */
		if( isinf(dvalue) ){
			if( dvalue < 0 ){
				memcpy( out, "-Infinity", 9);
				return 9;
			} else {
				memcpy( out, "Infinity", 8);
				return 8;
			}
		} else if (isnan(dvalue)) {
			memcpy( out, "NaN", 3);
			return 3;
		}
		return sprintf( out, "%.16E", dvalue);
	}else{
		return 23;
	}
}

static const char hextab[] = {
	'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
};

/*
 * Document-class: PG::TextEncoder::Bytea < PG::SimpleEncoder
 *
 * This is an encoder class for the PostgreSQL bytea type for server version 9.0
 * or newer.
 *
 * The binary String is converted to hexadecimal representation for transmission
 * in text format. For query bind parameters it is recommended to use
 * PG::BinaryEncoder::Bytea instead, in order to decrease network traffic and
 * CPU usage.
 *
 */
static int
pg_text_enc_bytea(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate)
{
	if(out){
		size_t strlen = RSTRING_LEN(*intermediate);
		char *iptr = RSTRING_PTR(*intermediate);
		char *eptr = iptr + strlen;
		char *optr = out;
		*optr++ = '\\';
		*optr++ = 'x';

		for( ; iptr < eptr; iptr++ ){
			unsigned char c = *iptr;
			*optr++ = hextab[c >> 4];
			*optr++ = hextab[c & 0xf];
		}
		return optr - out;
	}else{
		*intermediate = rb_obj_as_string(value);
		/* The output starts with "\x" and each character is converted to hex. */
		return 2 + RSTRING_LEN(*intermediate) * 2;
	}
}

typedef int (*t_quote_func)( void *_this, char *p_in, int strlen, char *p_out );

static int
quote_array_buffer( void *_this, char *p_in, int strlen, char *p_out ){
	t_pg_composite_coder *this = _this;
	char *ptr1;
	char *ptr2;
	int backslashs = 0;
	int needquote;

	/* count data plus backslashes; detect chars needing quotes */
	if (strlen == 0)
		needquote = 1;   /* force quotes for empty string */
	else if (strlen == 4 && rbpg_strncasecmp(p_in, "NULL", strlen) == 0)
		needquote = 1;   /* force quotes for literal NULL */
	else
		needquote = 0;

	/* count required backlashs */
	for(ptr1 = p_in; ptr1 != p_in + strlen; ptr1++) {
		char ch = *ptr1;

		if (ch == '"' || ch == '\\'){
			needquote = 1;
			backslashs++;
		} else if (ch == '{' || ch == '}' || ch == this->delimiter ||
					ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' || ch == '\v' || ch == '\f'){
			needquote = 1;
		}
	}

	if( needquote ){
		ptr1 = p_in + strlen;
		ptr2 = p_out + strlen + backslashs + 2;
		/* Write end quote */
		*--ptr2 = '"';

		/* Then store the escaped string on the final position, walking
			* right to left, until all backslashs are placed. */
		while( ptr1 != p_in ) {
			*--ptr2 = *--ptr1;
			if(*ptr2 == '"' || *ptr2 == '\\'){
				*--ptr2 = '\\';
			}
		}
		/* Write start quote */
		*p_out = '"';
		return strlen + backslashs + 2;
	} else {
		if( p_in != p_out )
			memcpy( p_out, p_in, strlen );
		return strlen;
	}
}

static char *
quote_string(t_pg_coder *this, VALUE value, VALUE string, char *current_out, int with_quote, t_quote_func quote_buffer, void *func_data)
{
	int strlen;
	VALUE subint;
	t_pg_coder_enc_func enc_func = pg_coder_enc_func(this);

	strlen = enc_func(this, value, NULL, &subint);

	if( strlen == -1 ){
		/* we can directly use String value in subint */
		strlen = RSTRING_LENINT(subint);

		if(with_quote){
			/* size of string assuming the worst case, that every character must be escaped. */
			current_out = pg_rb_str_ensure_capa( string, strlen * 2 + 2, current_out, NULL );

			current_out += quote_buffer( func_data, RSTRING_PTR(subint), strlen, current_out );
		} else {
			current_out = pg_rb_str_ensure_capa( string, strlen, current_out, NULL );
			memcpy( current_out, RSTRING_PTR(subint), strlen );
			current_out += strlen;
		}

	} else {

		if(with_quote){
			/* size of string assuming the worst case, that every character must be escaped
			 * plus two bytes for quotation.
			 */
			current_out = pg_rb_str_ensure_capa( string, 2 * strlen + 2, current_out, NULL );

			/* Place the unescaped string at current output position. */
			strlen = enc_func(this, value, current_out, &subint);

			current_out += quote_buffer( func_data, current_out, strlen, current_out );
		}else{
			/* size of the unquoted string */
			current_out = pg_rb_str_ensure_capa( string, strlen, current_out, NULL );
			current_out += enc_func(this, value, current_out, &subint);
		}
	}
	return current_out;
}

static char *
write_array(t_pg_composite_coder *this, VALUE value, char *current_out, VALUE string, int quote)
{
	int i;

	/* size of "{}" */
	current_out = pg_rb_str_ensure_capa( string, 2, current_out, NULL );
	*current_out++ = '{';

	for( i=0; i<RARRAY_LEN(value); i++){
		VALUE entry = rb_ary_entry(value, i);

		if( i > 0 ){
			current_out = pg_rb_str_ensure_capa( string, 1, current_out, NULL );
			*current_out++ = this->delimiter;
		}

		switch(TYPE(entry)){
			case T_ARRAY:
				current_out = write_array(this, entry, current_out, string, quote);
			break;
			case T_NIL:
				current_out = pg_rb_str_ensure_capa( string, 4, current_out, NULL );
				*current_out++ = 'N';
				*current_out++ = 'U';
				*current_out++ = 'L';
				*current_out++ = 'L';
				break;
			default:
				current_out = quote_string( this->elem, entry, string, current_out, quote, quote_array_buffer, this );
		}
	}
	current_out = pg_rb_str_ensure_capa( string, 1, current_out, NULL );
	*current_out++ = '}';
	return current_out;
}


/*
 * Document-class: PG::TextEncoder::Array < PG::CompositeEncoder
 *
 * This is the encoder class for PostgreSQL array types.
 *
 * All values are encoded according to the #elements_type
 * accessor. Sub-arrays are encoded recursively.
 *
 * This encoder expects an Array of values or sub-arrays as input.
 * Other values are passed through as text without interpretation.
 *
 */
static int
pg_text_enc_array(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate)
{
	char *end_ptr;
	t_pg_composite_coder *this = (t_pg_composite_coder *)conv;

	if( TYPE(value) == T_ARRAY){
		*intermediate = rb_str_new(NULL, 0);

		end_ptr = write_array(this, value, RSTRING_PTR(*intermediate), *intermediate, this->needs_quotation);

		rb_str_set_len( *intermediate, end_ptr - RSTRING_PTR(*intermediate) );

		return -1;
	} else {
		return pg_coder_enc_to_s( conv, value, out, intermediate );
	}
}

static char *
quote_identifier( VALUE value, VALUE out_string, char *current_out ){
	char *p_in = RSTRING_PTR(value);
	char *ptr1;
	size_t strlen = RSTRING_LEN(value);
	char *end_capa = current_out;

	PG_RB_STR_ENSURE_CAPA( out_string, strlen + 2, current_out, end_capa );
	*current_out++ = '"';
	for(ptr1 = p_in; ptr1 != p_in + strlen; ptr1++) {
		char c = *ptr1;
		if (c == '"'){
			strlen++;
			PG_RB_STR_ENSURE_CAPA( out_string, p_in - ptr1 + strlen + 1, current_out, end_capa );
			*current_out++ = '"';
		} else if (c == 0){
			break;
		}
		*current_out++ = c;
	}
	PG_RB_STR_ENSURE_CAPA( out_string, 1, current_out, end_capa );
	*current_out++ = '"';

	return current_out;
}

static char *
pg_text_enc_array_identifier(VALUE value, VALUE string, char *out)
{
	int i;
	int nr_elems;

	Check_Type(value, T_ARRAY);
	nr_elems = RARRAY_LEN(value);

	for( i=0; i<nr_elems; i++){
		VALUE entry = rb_ary_entry(value, i);

		out = quote_identifier(entry, string, out);
		if( i < nr_elems-1 ){
			out = pg_rb_str_ensure_capa( string, 1, out, NULL );
			*out++ = '.';
		}
	}
	return out;
}

/*
 * Document-class: PG::TextEncoder::Identifier < PG::SimpleEncoder
 *
 * This is the encoder class for PostgreSQL identifiers.
 *
 * An Array value can be used for "schema.table.column" type identifiers:
 *   PG::TextEncoder::Identifier.new.encode(['schema', 'table', 'column'])
 *      => '"schema"."table"."column"'
 *
 *  This encoder can also be used per PG::Connection#quote_ident .
 */
int
pg_text_enc_identifier(t_pg_coder *this, VALUE value, char *out, VALUE *intermediate)
{
	UNUSED( this );
	if( TYPE(value) == T_ARRAY){
		*intermediate = rb_str_new(NULL, 0);
		out = RSTRING_PTR(*intermediate);
		out = pg_text_enc_array_identifier(value, *intermediate, out);
	} else {
		StringValue(value);
		*intermediate = rb_str_new(NULL, RSTRING_LEN(value) + 2);
		out = RSTRING_PTR(*intermediate);
		out = quote_identifier(value, *intermediate, out);
	}
	rb_str_set_len( *intermediate, out - RSTRING_PTR(*intermediate) );
	return -1;
}


static int
quote_literal_buffer( void *_this, char *p_in, int strlen, char *p_out ){
	char *ptr1;
	char *ptr2;
	int backslashs = 0;

	/* count required backlashs */
	for(ptr1 = p_in; ptr1 != p_in + strlen; ptr1++) {
		if (*ptr1 == '\''){
			backslashs++;
		}
	}

	ptr1 = p_in + strlen;
	ptr2 = p_out + strlen + backslashs + 2;
	/* Write end quote */
	*--ptr2 = '\'';

	/* Then store the escaped string on the final position, walking
		* right to left, until all backslashs are placed. */
	while( ptr1 != p_in ) {
		*--ptr2 = *--ptr1;
		if(*ptr2 == '\''){
			*--ptr2 = '\'';
		}
	}
	/* Write start quote */
	*p_out = '\'';
	return strlen + backslashs + 2;
}


/*
 * Document-class: PG::TextEncoder::QuotedLiteral < PG::CompositeEncoder
 *
 * This is the encoder class for PostgreSQL literals.
 *
 * A literal is quoted and escaped by the +'+ character.
 *
 */
static int
pg_text_enc_quoted_literal(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate)
{
	t_pg_composite_coder *this = (t_pg_composite_coder *)conv;

	*intermediate = rb_str_new(NULL, 0);
	out = RSTRING_PTR(*intermediate);
	out = quote_string(this->elem, value, *intermediate, out, this->needs_quotation, quote_literal_buffer, this);
	rb_str_set_len( *intermediate, out - RSTRING_PTR(*intermediate) );
	return -1;
}

/*
 * Document-class: PG::TextEncoder::ToBase64 < PG::CompositeEncoder
 *
 * This is an encoder class for conversion of binary to base64 data.
 *
 */
static int
pg_text_enc_to_base64(t_pg_coder *conv, VALUE value, char *out, VALUE *intermediate)
{
	int strlen;
	VALUE subint;
	t_pg_composite_coder *this = (t_pg_composite_coder *)conv;
	t_pg_coder_enc_func enc_func = pg_coder_enc_func(this->elem);

	if(out){
		/* Second encoder pass, if required */
		strlen = enc_func(this->elem, value, out, intermediate);
		base64_encode( out, out, strlen );

		return BASE64_ENCODED_SIZE(strlen);
	} else {
		/* First encoder pass */
		strlen = enc_func(this->elem, value, NULL, &subint);

		if( strlen == -1 ){
			/* Encoded string is returned in subint */
			VALUE out_str;

			strlen = RSTRING_LENINT(subint);
			out_str = rb_str_new(NULL, BASE64_ENCODED_SIZE(strlen));

			base64_encode( RSTRING_PTR(out_str), RSTRING_PTR(subint), strlen);
			*intermediate = out_str;

			return -1;
		} else {
			*intermediate = subint;

			return BASE64_ENCODED_SIZE(strlen);
		}
	}
}


void
init_pg_text_encoder()
{
	s_id_encode = rb_intern("encode");
	s_id_to_i = rb_intern("to_i");

	/* This module encapsulates all encoder classes with text output format */
	rb_mPG_TextEncoder = rb_define_module_under( rb_mPG, "TextEncoder" );

	/* Make RDoc aware of the encoder classes... */
	/* dummy = rb_define_class_under( rb_mPG_TextEncoder, "Boolean", rb_cPG_SimpleEncoder ); */
	pg_define_coder( "Boolean", pg_text_enc_boolean, rb_cPG_SimpleEncoder, rb_mPG_TextEncoder );
	/* dummy = rb_define_class_under( rb_mPG_TextEncoder, "Integer", rb_cPG_SimpleEncoder ); */
	pg_define_coder( "Integer", pg_text_enc_integer, rb_cPG_SimpleEncoder, rb_mPG_TextEncoder );
	/* dummy = rb_define_class_under( rb_mPG_TextEncoder, "Float", rb_cPG_SimpleEncoder ); */
	pg_define_coder( "Float", pg_text_enc_float, rb_cPG_SimpleEncoder, rb_mPG_TextEncoder );
	/* dummy = rb_define_class_under( rb_mPG_TextEncoder, "String", rb_cPG_SimpleEncoder ); */
	pg_define_coder( "String", pg_coder_enc_to_s, rb_cPG_SimpleEncoder, rb_mPG_TextEncoder );
	/* dummy = rb_define_class_under( rb_mPG_TextEncoder, "Bytea", rb_cPG_SimpleEncoder ); */
	pg_define_coder( "Bytea", pg_text_enc_bytea, rb_cPG_SimpleEncoder, rb_mPG_TextEncoder );
	/* dummy = rb_define_class_under( rb_mPG_TextEncoder, "Identifier", rb_cPG_SimpleEncoder ); */
	pg_define_coder( "Identifier", pg_text_enc_identifier, rb_cPG_SimpleEncoder, rb_mPG_TextEncoder );

	/* dummy = rb_define_class_under( rb_mPG_TextEncoder, "Array", rb_cPG_CompositeEncoder ); */
	pg_define_coder( "Array", pg_text_enc_array, rb_cPG_CompositeEncoder, rb_mPG_TextEncoder );
	/* dummy = rb_define_class_under( rb_mPG_TextEncoder, "QuotedLiteral", rb_cPG_CompositeEncoder ); */
	pg_define_coder( "QuotedLiteral", pg_text_enc_quoted_literal, rb_cPG_CompositeEncoder, rb_mPG_TextEncoder );
	/* dummy = rb_define_class_under( rb_mPG_TextEncoder, "ToBase64", rb_cPG_CompositeEncoder ); */
	pg_define_coder( "ToBase64", pg_text_enc_to_base64, rb_cPG_CompositeEncoder, rb_mPG_TextEncoder );
}