Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

agriconnect / cchardet   python

Repository URL to install this package:

/ scripts / cchardetect

#!python
from __future__ import print_function, unicode_literals
import argparse
import sys
import cchardet


PY2 = sys.version_info.major == 2

def read_chunks(f, chunk_size):
    chunk = f.read(chunk_size)
    while chunk:
        yield chunk
        chunk = f.read(chunk_size)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('files',
                        nargs='*',
                        help="Files to detect encoding of",
                        type=argparse.FileType('rb'),
                        default=[sys.stdin if PY2 else sys.stdin.buffer])
    parser.add_argument('--chunk-size',
                        type=int,
                        default=(256 * 1024))
    parser.add_argument('--version',
                        action='version',
                        version='%(prog)s {0}'.format(cchardet.__version__))
    args = parser.parse_args()

    for f in args.files:
        detector = cchardet.UniversalDetector()
        for chunk in read_chunks(f, args.chunk_size):
            detector.feed(chunk)
        detector.close()
        print('{file.name}: {result[encoding]} with confidence {result[confidence]}'.format(
            file=f,
            result=detector.result
        ))


if __name__ == '__main__':
    main()