"""Longley dataset"""
from statsmodels.datasets import utils as du
__docformat__ = 'restructuredtext'
COPYRIGHT = """This is public domain."""
TITLE = __doc__
SOURCE = """
The classic 1967 Longley Data
http://www.itl.nist.gov/div898/strd/lls/data/Longley.shtml
::
Longley, J.W. (1967) "An Appraisal of Least Squares Programs for the
Electronic Comptuer from the Point of View of the User." Journal of
the American Statistical Association. 62.319, 819-41.
"""
DESCRSHORT = """"""
DESCRLONG = """The Longley dataset contains various US macroeconomic
variables that are known to be highly collinear. It has been used to appraise
the accuracy of least squares routines."""
NOTE = """::
Number of Observations - 16
Number of Variables - 6
Variable name definitions::
TOTEMP - Total Employment
GNPDEFL - GNP deflator
GNP - GNP
UNEMP - Number of unemployed
ARMED - Size of armed forces
POP - Population
YEAR - Year (1947 - 1962)
"""
def load(as_pandas=None):
"""
Load the Longley data and return a Dataset class.
Parameters
----------
as_pandas : bool
Flag indicating whether to return pandas DataFrames and Series
or numpy recarrays and arrays. If True, returns pandas.
Returns
-------
Dataset instance
See DATASET_PROPOSAL.txt for more information.
"""
return du.as_numpy_dataset(load_pandas(), as_pandas=as_pandas)
def load_pandas():
"""
Load the Longley data and return a Dataset class.
Returns
-------
Dataset instance
See DATASET_PROPOSAL.txt for more information.
"""
data = _get_data()
return du.process_pandas(data, endog_idx=0)
def _get_data():
data = du.load_csv(__file__, 'longley.csv')
data = data.iloc[:, [1, 2, 3, 4, 5, 6, 7]].astype(float)
return data