Gemfury

referralcandy / json ruby

Repository URL to install this package:
Details
json / java / src / json / ext / ByteListTranscoder.java
/*
 * This code is copyrighted work by Daniel Luz <dev at mernen dot com>.
 *
 * Distributed under the Ruby and GPLv2 licenses; see COPYING and GPL files
 * for details.
 */
package json.ext;

import org.jruby.exceptions.RaiseException;
import org.jruby.runtime.ThreadContext;
import org.jruby.util.ByteList;

/**
 * A class specialized in transcoding a certain String format into another,
 * using UTF-8 ByteLists as both input and output.
 */
abstract class ByteListTranscoder {
    protected final ThreadContext context;

    protected ByteList src;
    protected int srcEnd;
    /** Position where the last read character started */
    protected int charStart;
    /** Position of the next character to read */
    protected int pos;

    private ByteList out;
    /**
     * When a character that can be copied straight into the output is found,
     * its index is stored on this variable, and copying is delayed until
     * the sequence of characters that can be copied ends.
     *
     * <p>The variable stores -1 when not in a plain sequence.
     */
    private int quoteStart = -1;

    protected ByteListTranscoder(ThreadContext context) {
        this.context = context;
    }

    protected void init(ByteList src, ByteList out) {
        this.init(src, 0, src.length(), out);
    }

    protected void init(ByteList src, int start, int end, ByteList out) {
        this.src = src;
        this.pos = start;
        this.charStart = start;
        this.srcEnd = end;
        this.out = out;
    }

    /**
     * Returns whether there are any characters left to be read.
     */
    protected boolean hasNext() {
        return pos < srcEnd;
    }

    /**
     * Returns the next character in the buffer.
     */
    private char next() {
        return src.charAt(pos++);
    }

    /**
     * Reads an UTF-8 character from the input and returns its code point,
     * while advancing the input position.
     *
     * <p>Raises an {@link #invalidUtf8()} exception if an invalid byte
     * is found.
     */
    protected int readUtf8Char() {
        charStart = pos;
        char head = next();
        if (head <= 0x7f) { // 0b0xxxxxxx (ASCII)
            return head;
        }
        if (head <= 0xbf) { // 0b10xxxxxx
            throw invalidUtf8(); // tail byte with no head
        }
        if (head <= 0xdf) { // 0b110xxxxx
            ensureMin(1);
            int cp = ((head  & 0x1f) << 6)
                     | nextPart();
            if (cp < 0x0080) throw invalidUtf8();
            return cp;
        }
        if (head <= 0xef) { // 0b1110xxxx
            ensureMin(2);
            int cp = ((head & 0x0f) << 12)
                     | (nextPart()  << 6)
                     | nextPart();
            if (cp < 0x0800) throw invalidUtf8();
            return cp;
        }
        if (head <= 0xf7) { // 0b11110xxx
            ensureMin(3);
            int cp = ((head & 0x07) << 18)
                     | (nextPart()  << 12)
                     | (nextPart()  << 6)
                     | nextPart();
            if (!Character.isValidCodePoint(cp)) throw invalidUtf8();
            return cp;
        }
        // 0b11111xxx?
        throw invalidUtf8();
    }

    /**
     * Throws a GeneratorError if the input list doesn't have at least this
     * many bytes left.
     */
    protected void ensureMin(int n) {
        if (pos + n > srcEnd) throw incompleteUtf8();
    }

    /**
     * Reads the next byte of a multi-byte UTF-8 character and returns its
     * contents (lower 6 bits).
     *
     * <p>Throws a GeneratorError if the byte is not a valid tail.
     */
    private int nextPart() {
        char c = next();
        // tail bytes must be 0b10xxxxxx
        if ((c & 0xc0) != 0x80) throw invalidUtf8();
        return c & 0x3f;
    }


    protected void quoteStart() {
        if (quoteStart == -1) quoteStart = charStart;
    }

    /**
     * When in a sequence of characters that can be copied directly,
     * interrupts the sequence and copies it to the output buffer.
     *
     * @param endPos The offset until which the direct character quoting should
     *               occur. You may pass {@link #pos} to quote until the most
     *               recently read character, or {@link #charStart} to quote
     *               until the character before it.
     */
    protected void quoteStop(int endPos) {
        if (quoteStart != -1) {
            out.append(src, quoteStart, endPos - quoteStart);
            quoteStart = -1;
        }
    }

    protected void append(int b) {
        out.append(b);
    }

    protected void append(byte[] origin, int start, int length) {
        out.append(origin, start, length);
    }


    protected abstract RaiseException invalidUtf8();

    protected RaiseException incompleteUtf8() {
        return invalidUtf8();
    }
}
referralcandy / json ruby

Products

About

Resources

Contact Gemfury