Gemfury

thinkdataworks / namara-python python

Repository URL to install this package:

Details

namara-python / dataframe.py

from pandas import DataFrame as PandaDataFrame
from pandas import read_csv
from namara_python.client import Client
from time import sleep
from io import StringIO
import requests

class DataFrame(PandaDataFrame):
    _metadata = ['client', 'data_set_id', 'loaded']

    @property
    def _constructor(self):
        return DataFrame

    def __init__(self, *args, **kwargs):
        ''' Takes in a `data_set_id` keyword and will load the first 50 rows of
        that dataset. Essentially "registers" this dataframe to a Namara
        dataset.

        Provides a `full_count` property that can tell you how many
        rows are in the full dataset.
        '''

        self.loaded = False
        self.client = None

        data_set_id = kwargs.pop('data_set_id', None)
        client = kwargs.pop('client', None)
        if not client:
            client = Client()

        export = client.create_export(query="select * from %s" % data_set_id)

        # poll for when the export is done
        export_in_progress = True
        while export_in_progress:
            finished_export_details = client.get_export(id=export['export']['id'])

            if finished_export_details['export']['state'] == 'finished':
                export_in_progress = False

            sleep(2)

        res = requests.get(finished_export_details['file_url'], allow_redirects=True)

        data = StringIO(str(res.content, 'utf-8'))

        kwargs['data'] = read_csv(data)

        super().__init__(*args, **kwargs)

thinkdataworks / namara-python python

Products

About

Resources

Contact Gemfury