Repository URL to install this package:
|
Version:
1.0.12 ▾
|
namara-python
/
dataframe.py
|
|---|
from pandas import DataFrame as PandaDataFrame
from pandas import read_csv
from namara_python.client import Client
from time import sleep
from io import StringIO
import requests
class DataFrame(PandaDataFrame):
_metadata = ['client', 'data_set_id', 'loaded']
@property
def _constructor(self):
return DataFrame
def __init__(self, *args, **kwargs):
''' Takes in a `data_set_id` keyword and will load the first 50 rows of
that dataset. Essentially "registers" this dataframe to a Namara
dataset.
Provides a `full_count` property that can tell you how many
rows are in the full dataset.
'''
self.loaded = False
self.client = None
data_set_id = kwargs.pop('data_set_id', None)
client = kwargs.pop('client', None)
if not client:
client = Client()
export = client.create_export(query="select * from %s" % data_set_id)
# poll for when the export is done
export_in_progress = True
while export_in_progress:
finished_export_details = client.get_export(id=export['export']['id'])
if finished_export_details['export']['state'] == 'finished':
export_in_progress = False
sleep(2)
res = requests.get(finished_export_details['file_url'], allow_redirects=True)
data = StringIO(str(res.content, 'utf-8'))
kwargs['data'] = read_csv(data)
super().__init__(*args, **kwargs)