Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
cql / cql / cqltypes.py
Size: Mime:
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Representation of Cassandra data types. These classes should make it simple for
the library (and caller software) to deal with Cassandra-style Java class type
names and CQL type specifiers, and convert between them cleanly. Parameterized
types are fully supported in both flavors. Once you have the right Type object
for the type you want, you can use it to serialize, deserialize, or retrieve
the corresponding CQL or Cassandra type strings.

If/when the need arises for interpret types from CQL string literals in
different ways (for https://issues.apache.org/jira/browse/CASSANDRA-3799,
for example), these classes would be a good place to tack on
.from_cql_literal() and .as_cql_literal() classmethods (or whatever).
"""

from cql.apivalues import Binary, UUID
from cql.marshal import (int8_pack, int8_unpack, uint16_pack, uint16_unpack,
                         int32_pack, int32_unpack, int64_pack, int64_unpack,
                         float_pack, float_unpack, double_pack, double_unpack,
                         varint_pack, varint_unpack)
from decimal import Decimal
import time
import socket
import calendar
import re

try:
    from cStringIO import StringIO
except ImportError:
    from StringIO import StringIO

apache_cassandra_type_prefix = 'org.apache.cassandra.db.marshal.'

def trim_if_startswith(s, prefix):
    if s.startswith(prefix):
        return s[len(prefix):]
    return s

def unix_time_from_uuid1(u):
    return (u.get_time() - 0x01B21DD213814000) / 10000000.0

_casstypes = {}
_cqltypes = {}

class CassandraTypeType(type):
    """
    The CassandraType objects in this module will normally be used directly,
    rather than through instances of those types. They can be instantiated,
    of course, but the type information is what this driver mainly needs.

    This metaclass registers CassandraType classes in the global
    by-cassandra-typename and by-cql-typename registries, unless their class
    name starts with an underscore.
    """

    def __new__(metacls, name, bases, dct):
        dct.setdefault('cassname', name)
        cls = type.__new__(metacls, name, bases, dct)
        if not name.startswith('_'):
            _casstypes[name] = cls
            _cqltypes[cls.typename] = cls
        return cls

casstype_scanner = re.Scanner((
    (r'[()]', lambda s,t: t),
    (r'[a-zA-Z0-9_.:]+', lambda s,t: t),
    (r'[\s,]', None),
))

def lookup_casstype_simple(casstype):
    """
    Given a Cassandra type name (either fully distinguished or not), hand
    back the CassandraType class responsible for it. If a name is not
    recognized, a custom _UnrecognizedType subclass will be created for it.

    This function does not handle complex types (so no type parameters--
    nothing with parentheses). Use lookup_casstype() instead if you might need
    that.
    """

    shortname = trim_if_startswith(casstype, apache_cassandra_type_prefix)
    try:
        typeclass = _casstypes[shortname]
    except KeyError:
        typeclass = mkUnrecognizedType(casstype)
    return typeclass

def parse_casstype_args(typestring):
    tokens, remainder = casstype_scanner.scan(typestring)
    if remainder:
        raise ValueError("weird characters %r at end" % remainder)
    args = [[]]
    for tok in tokens:
        if tok == '(':
            args.append([])
        elif tok == ')':
            arglist = args.pop()
            ctype = args[-1].pop()
            paramized = ctype.apply_parameters(*arglist)
            args[-1].append(paramized)
        else:
            if ':' in tok:
                # ignore those column name hex encoding bit; we have the
                # proper column name from elsewhere
                tok = tok.rsplit(':', 1)[-1]
            ctype = lookup_casstype_simple(tok)
            args[-1].append(ctype)
    assert len(args) == 1, args
    assert len(args[0]) == 1, args[0]
    return args[0][0]

def lookup_casstype(casstype):
    """
    Given a Cassandra type as a string (possibly including parameters), hand
    back the CassandraType class responsible for it. If a name is not
    recognized, a custom _UnrecognizedType subclass will be created for it.

    Example:

        >>> lookup_casstype('org.apache.cassandra.db.marshal.MapType(org.apache.cassandra.db.marshal.UTF8Type,org.apache.cassandra.db.marshal.Int32Type)')
        <class 'cql.cqltypes.MapType(UTF8Type, Int32Type)'>

    """

    if isinstance(casstype, (CassandraType, CassandraTypeType)):
        return casstype
    try:
        return parse_casstype_args(casstype)
    except (ValueError, AssertionError, IndexError), e:
        raise ValueError("Don't know how to parse type string %r: %s" % (casstype, e))

def lookup_cqltype(cqltype):
    """
    Given a CQL type specifier, possibly including parameters, hand back the
    CassandraType class responsible for it. If a name is not recognized,
    KeyError is raised. If the type specifier looks like a CQL string literal
    (starts and ends with a single quote character "'"), then it is interpreted
    as a Cassandra type name, and looked up accordingly.

    Example:

        >>> lookup_cqltype('map<text, int>')
        <class 'cql.cqltypes.MapType(UTF8Type, Int32Type)'>

    """

    if isinstance(cqltype, CassandraType):
        return cqltype
    args = ()
    if cqltype.startswith("'") and cqltype.endswith("'"):
        return lookup_casstype(cqltype[1:-1].replace("''", "'"))
    if '<' in cqltype:
        # do we need to support arbitrary nesting? if so, this is where
        # we need to tokenize and parse
        assert cqltype.endswith('>'), cqltype
        cqltype, args = cqltype[:-1].split('<', 1)
        args = [lookup_cqltype(s.strip()) for s in args.split(',')]
    typeclass = _cqltypes[cqltype]
    if args:
        typeclass = typeclass.apply_parameters(*args)
    return typeclass

class _CassandraType(object):
    __metaclass__ = CassandraTypeType
    subtypes = ()
    num_subtypes = 0
    empty_binary_ok = False

    def __init__(self, val):
        self.val = self.validate(val)

    def __str__(self):
        return '<%s( %r )>' % (self.cql_parameterized_type(), self.val)
    __repr__ = __str__

    @staticmethod
    def validate(val):
        """
        Called to transform an input value into one of a suitable type
        for this class. As an example, the BooleanType class uses this
        to convert an incoming value to True or False.
        """

        return val

    @classmethod
    def from_binary(cls, byts):
        """
        Deserialize a bytestring into a value. See the deserialize() method
        for more information. This method differs in that if None or the empty
        string is passed in, None may be returned.
        """

        if byts is None:
            return None
        if byts == '' and not cls.empty_binary_ok:
            return None
        return cls.deserialize(byts)

    @classmethod
    def to_binary(cls, val):
        """
        Serialize a value into a bytestring. See the serialize() method for
        more information. This method differs in that if None is passed in,
        the result is the empty string.
        """

        if val is None:
            return ''
        return cls.serialize(val)

    @staticmethod
    def deserialize(byts):
        """
        Given a bytestring, deserialize into a value according to the protocol
        for this type. Note that this does not create a new instance of this
        class; it merely gives back a value that would be appropriate to go
        inside an instance of this class.
        """
        return byts

    @staticmethod
    def serialize(val):
        """
        Given a value appropriate for this class, serialize it according to the
        protocol for this type and return the corresponding bytestring.
        """
        return val

    @classmethod
    def cass_parameterized_type_with(cls, subtypes, full=False):
        """
        Return the name of this type as it would be expressed by Cassandra,
        optionally fully qualified. If subtypes is not None, it is expected
        to be a list of other CassandraType subclasses, and the output
        string includes the Cassandra names for those subclasses as well,
        as parameters to this one.

        Example:

            >>> LongType.cass_parameterized_type_with(())
            'LongType'
            >>> LongType.cass_parameterized_type_with((), full=True)
            'org.apache.cassandra.db.marshal.LongType'
            >>> SetType.cass_parameterized_type_with([DecimalType], full=True)
            'org.apache.cassandra.db.marshal.SetType(org.apache.cassandra.db.marshal.DecimalType)' 
        """

        cname = cls.cassname
        if full and '.' not in cname:
            cname = apache_cassandra_type_prefix + cname
        if not subtypes:
            return cname
        sublist = ', '.join(styp.cass_parameterized_type(full=full) for styp in subtypes)
        return '%s(%s)' % (cname, sublist)

    @classmethod
    def apply_parameters(cls, *subtypes):
        """
        Given a set of other CassandraTypes, create a new subtype of this type
        using them as parameters. This is how composite types are constructed.

            >>> MapType.apply_parameters(DateType, BooleanType)
            <class 'cql.cqltypes.MapType(DateType, BooleanType)'>
        """

        if cls.num_subtypes != 'UNKNOWN' and len(subtypes) != cls.num_subtypes:
            raise ValueError("%s types require %d subtypes (%d given)"
                             % (cls.typename, cls.num_subtypes, len(subtypes)))
        newname = cls.cass_parameterized_type_with(subtypes).encode('utf8')
        return type(newname, (cls,), {'subtypes': subtypes, 'cassname': cls.cassname})

    @classmethod
    def cql_parameterized_type(cls):
        """
        Return a CQL type specifier for this type. If this type has parameters,
        they are included in standard CQL <> notation.
        """

        if not cls.subtypes:
            return cls.typename
        return '%s<%s>' % (cls.typename, ', '.join(styp.cql_parameterized_type() for styp in cls.subtypes))

    @classmethod
    def cass_parameterized_type(cls, full=False):
        """
        Return a Cassandra type specifier for this type. If this type has
        parameters, they are included in the standard () notation.
        """
        return cls.cass_parameterized_type_with(cls.subtypes, full=full)

# it's initially named with a _ to avoid registering it as a real type, but
# client programs may want to use the name still for isinstance(), etc
CassandraType = _CassandraType

class _UnrecognizedType(_CassandraType):
    num_subtypes = 'UNKNOWN'

def mkUnrecognizedType(casstypename):
    return CassandraTypeType(casstypename.encode('utf8'),
                             (_UnrecognizedType,),
                             {'typename': "'%s'" % casstypename})

class BytesType(_CassandraType):
    typename = 'blob'
    empty_binary_ok = True

    @staticmethod
    def validate(val):
        return Binary(val)

    @staticmethod
    def serialize(val):
        return str(val)

class DecimalType(_CassandraType):
    typename = 'decimal'

    @staticmethod
    def validate(val):
        return Decimal(val)

    @staticmethod
    def deserialize(byts):
        scale = int32_unpack(byts[:4])
        unscaled = varint_unpack(byts[4:])
        return Decimal('%de%d' % (unscaled, -scale))

    @staticmethod
    def serialize(dec):
        sign, digits, exponent = dec.as_tuple()
        unscaled = int(''.join([str(digit) for digit in digits]))
        if sign:
            unscaled *= -1
        scale = int32_pack(-exponent)
        unscaled = varint_pack(unscaled)
        return scale + unscaled

class UUIDType(_CassandraType):
    typename = 'uuid'

    @staticmethod
    def deserialize(byts):
        return UUID(bytes=byts)

    @staticmethod
    def serialize(uuid):
        return uuid.bytes

class BooleanType(_CassandraType):
    typename = 'boolean'

    @staticmethod
    def validate(val):
        return bool(val)

    @staticmethod
    def deserialize(byts):
        return bool(int8_unpack(byts))

    @staticmethod
    def serialize(truth):
        return int8_pack(bool(truth))

class AsciiType(_CassandraType):
    typename = 'ascii'
    empty_binary_ok = True

class FloatType(_CassandraType):
    typename = 'float'

    deserialize = staticmethod(float_unpack)
    serialize = staticmethod(float_pack)

class DoubleType(_CassandraType):
    typename = 'double'

    deserialize = staticmethod(double_unpack)
    serialize = staticmethod(double_pack)

class LongType(_CassandraType):
    typename = 'bigint'

    deserialize = staticmethod(int64_unpack)
    serialize = staticmethod(int64_pack)

class Int32Type(_CassandraType):
    typename = 'int'

    deserialize = staticmethod(int32_unpack)
    serialize = staticmethod(int32_pack)

class IntegerType(_CassandraType):
    typename = 'varint'

    deserialize = staticmethod(varint_unpack)
    serialize = staticmethod(varint_pack)

class InetAddressType(_CassandraType):
    typename = 'inet'

    @staticmethod
    def deserialize(byts):
        if len(byts) == 16:
            fam = socket.AF_INET6
        else:
            fam = socket.AF_INET
        return socket.inet_ntop(fam, byts)

    @staticmethod
    def serialize(addr):
        if ':' in addr:
            fam = socket.AF_INET6
        else:
            fam = socket.AF_INET
        return socket.inet_pton(fam, addr)

class CounterColumnType(_CassandraType):
    typename = 'counter'

    deserialize = staticmethod(int64_unpack)
    serialize = staticmethod(int64_pack)

cql_time_formats = (
    '%Y-%m-%d %H:%M',
    '%Y-%m-%d %H:%M:%S',
    '%Y-%m-%dT%H:%M',
    '%Y-%m-%dT%H:%M:%S',
    '%Y-%m-%d'
)

class DateType(_CassandraType):
    typename = 'timestamp'

    @classmethod
    def validate(cls, date):
        if isinstance(date, basestring):
            date = cls.interpret_datestring(date)
        return date

    @staticmethod
    def interpret_datestring(date):
        if date[-5] in ('+', '-'):
            offset = (int(date[-4:-2]) * 3600 + int(date[-2:]) * 60) * int(date[-5] + '1')
            date = date[:-5]
        else:
            offset = -time.timezone
        for tformat in cql_time_formats:
            try:
                tval = time.strptime(date, tformat)
            except ValueError:
                continue
            return calendar.timegm(tval) + offset
        else:
            raise ValueError("can't interpret %r as a date" % (date,))

    def my_timestamp(self):
        return self.val

    @staticmethod
    def deserialize(byts):
        return int64_unpack(byts) / 1000.0

    @staticmethod
    def serialize(timestamp):
        return int64_pack(timestamp * 1000)

class TimeUUIDType(DateType):
    typename = 'timeuuid'

    def my_timestamp(self):
        return unix_time_from_uuid1(self.val)

    @staticmethod
    def deserialize(byts):
        return UUID(bytes=byts)

    @staticmethod
    def serialize(timeuuid):
        return timeuuid.bytes

class UTF8Type(_CassandraType):
    typename = 'text'
    empty_binary_ok = True

    @staticmethod
    def deserialize(byts):
        return byts.decode('utf8')

    @staticmethod
    def serialize(ustr):
        return ustr.encode('utf8')

# name alias
_cqltypes['varchar'] = _cqltypes['text']

class _ParameterizedType(_CassandraType):
    def __init__(self, val):
        if not self.subtypes:
            raise ValueError("%s type with no parameters can't be instantiated" % (self.typename,))
        _CassandraType.__init__(self, val)

    @classmethod
    def deserialize(cls, byts):
        if not cls.subtypes:
            raise NotImplementedError("can't deserialize unparameterized %s"
                                      % self.typename)
        return cls.deserialize_safe(byts)

    @classmethod
    def serialize(cls, val):
        if not cls.subtypes:
            raise NotImplementedError("can't serialize unparameterized %s"
                                      % self.typename)
        return cls.serialize_safe(val)

class _SimpleParameterizedType(_ParameterizedType):
    @classmethod
    def validate(cls, val):
        subtype, = cls.subtypes
        return cls.adapter([subtype.validate(subval) for subval in val])

    @classmethod
    def deserialize_safe(cls, byts):
        subtype, = cls.subtypes
        numelements = uint16_unpack(byts[:2])
        p = 2
        result = []
        for n in xrange(numelements):
            itemlen = uint16_unpack(byts[p:p+2])
            p += 2
            item = byts[p:p+itemlen]
            p += itemlen
            result.append(subtype.from_binary(item))
        return cls.adapter(result)

    @classmethod
    def serialize_safe(cls, items):
        subtype, = cls.subtypes
        buf = StringIO()
        buf.write(uint16_pack(len(items)))
        for item in items:
            itembytes = subtype.to_binary(item)
            buf.write(uint16_pack(len(itembytes)))
            buf.write(itembytes)
        return buf.getvalue()

class ListType(_SimpleParameterizedType):
    typename = 'list'
    num_subtypes = 1
    adapter = tuple

class SetType(_SimpleParameterizedType):
    typename = 'set'
    num_subtypes = 1
    adapter = set

class MapType(_ParameterizedType):
    typename = 'map'
    num_subtypes = 2

    @classmethod
    def validate(cls, val):
        subkeytype, subvaltype = cls.subtypes
        return dict((subkeytype.validate(k), subvaltype.validate(v)) for (k, v) in val.iteritems())

    @classmethod
    def deserialize_safe(cls, byts):
        subkeytype, subvaltype = cls.subtypes
        numelements = uint16_unpack(byts[:2])
        p = 2
        themap = {}
        for n in xrange(numelements):
            key_len = uint16_unpack(byts[p:p+2])
            p += 2
            keybytes = byts[p:p+key_len]
            p += key_len
            val_len = uint16_unpack(byts[p:p+2])
            p += 2
            valbytes = byts[p:p+val_len]
            p += val_len
            key = subkeytype.from_binary(keybytes)
            val = subvaltype.from_binary(valbytes)
            themap[key] = val
        return themap

    @classmethod
    def serialize_safe(cls, themap):
        subkeytype, subvaltype = cls.subtypes
        buf = StringIO()
        buf.write(uint16_pack(len(themap)))
        for key, val in themap.iteritems():
            keybytes = subkeytype.to_binary(key)
            valbytes = subvaltype.to_binary(val)
            buf.write(uint16_pack(len(keybytes)))
            buf.write(keybytes)
            buf.write(uint16_pack(len(valbytes)))
            buf.write(valbytes)
        return buf.getvalue()

class CompositeType(_ParameterizedType):
    typename = "'org.apache.cassandra.db.marshal.CompositeType'"
    num_subtypes = 'UNKNOWN'

class ColumnToCollectionType(_ParameterizedType):
    """
    This class only really exists so that we can cleanly evaluate types when
    Cassandra includes this. We don't actually need or want the extra
    information.
    """
    typename = "'org.apache.cassandra.db.marshal.ColumnToCollectionType'"
    num_subtypes = 'UNKNOWN'

class ReversedType(_ParameterizedType):
    typename = "'org.apache.cassandra.db.marshal.ReversedType'"
    num_subtypes = 1

    @classmethod
    def deserialize_safe(cls, byts):
        subtype, = cls.subtypes
        return subtype.from_binary(byts)

    @classmethod
    def serialize_safe(cls, val):
        subtype, = cls.subtypes
        return subtype.to_binary(val)

def is_counter_type(t):
    if isinstance(t, basestring):
        t = lookup_casstype(t)
    return issubclass(t, CounterColumnType)

cql_type_to_apache_class = dict([(c, t.cassname) for (c, t) in _cqltypes.items()])
apache_class_to_cql_type = dict([(n, t.typename) for (n, t) in _casstypes.items()])

cql_types = sorted([t for t in _cqltypes.keys() if not t.startswith("'")])

def cql_typename(casstypename):
    """
    Translate a Cassandra-style type specifier (optionally-fully-distinguished
    Java class names for data types, along with optional parameters) into a
    CQL-style type specifier.

        >>> cql_typename('DateType')
        'timestamp'
        >>> cql_typename('org.apache.cassandra.db.marshal.ListType(IntegerType)')
        'list<varint>'
    """
    return lookup_casstype(casstypename).cql_parameterized_type()