Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / statsmodels   python

Repository URL to install this package:

Version: 0.11.1 

/ datasets / anes96 / data.py

"""American National Election Survey 1996"""
from numpy import log

from statsmodels.datasets import utils as du

__docformat__ = 'restructuredtext'

COPYRIGHT = """This is public domain."""
TITLE = __doc__
SOURCE = """
http://www.electionstudies.org/

The American National Election Studies.
"""

DESCRSHORT = """This data is a subset of the American National Election Studies of 1996."""

DESCRLONG = DESCRSHORT

NOTE = """::

    Number of observations - 944
    Number of variables - 10

    Variables name definitions::

            popul - Census place population in 1000s
            TVnews - Number of times per week that respondent watches TV news.
            PID - Party identification of respondent.
                0 - Strong Democrat
                1 - Weak Democrat
                2 - Independent-Democrat
                3 - Independent-Indpendent
                4 - Independent-Republican
                5 - Weak Republican
                6 - Strong Republican
            age : Age of respondent.
            educ - Education level of respondent
                1 - 1-8 grades
                2 - Some high school
                3 - High school graduate
                4 - Some college
                5 - College degree
                6 - Master's degree
                7 - PhD
            income - Income of household
                1  - None or less than $2,999
                2  - $3,000-$4,999
                3  - $5,000-$6,999
                4  - $7,000-$8,999
                5  - $9,000-$9,999
                6  - $10,000-$10,999
                7  - $11,000-$11,999
                8  - $12,000-$12,999
                9  - $13,000-$13,999
                10 - $14,000-$14.999
                11 - $15,000-$16,999
                12 - $17,000-$19,999
                13 - $20,000-$21,999
                14 - $22,000-$24,999
                15 - $25,000-$29,999
                16 - $30,000-$34,999
                17 - $35,000-$39,999
                18 - $40,000-$44,999
                19 - $45,000-$49,999
                20 - $50,000-$59,999
                21 - $60,000-$74,999
                22 - $75,000-89,999
                23 - $90,000-$104,999
                24 - $105,000 and over
            vote - Expected vote
                0 - Clinton
                1 - Dole
            The following 3 variables all take the values:
                1 - Extremely liberal
                2 - Liberal
                3 - Slightly liberal
                4 - Moderate
                5 - Slightly conservative
                6 - Conservative
                7 - Extremely Conservative
            selfLR - Respondent's self-reported political leanings from "Left"
                to "Right".
            ClinLR - Respondents impression of Bill Clinton's political
                leanings from "Left" to "Right".
            DoleLR  - Respondents impression of Bob Dole's political leanings
                from "Left" to "Right".
            logpopul - log(popul + .1)
"""


def load_pandas():
    """Load the anes96 data and returns a Dataset class.

    Returns
    -------
    Dataset instance:
        See DATASET_PROPOSAL.txt for more information.
    """
    data = _get_data()
    return du.process_pandas(data, endog_idx=5, exog_idx=[10, 2, 6, 7, 8])


def load(as_pandas=None):
    """Load the anes96 data and returns a Dataset class.

    Parameters
    ----------
    as_pandas : bool
        Flag indicating whether to return pandas DataFrames and Series
        or numpy recarrays and arrays.  If True, returns pandas.

    Returns
    -------
    Dataset instance:
        See DATASET_PROPOSAL.txt for more information.
    """
    return du.as_numpy_dataset(load_pandas(), as_pandas=as_pandas)


def _get_data():
    data = du.load_csv(__file__, 'anes96.csv', sep=r'\s')
    data = du.strip_column_names(data)
    data['logpopul'] = log(data['popul'] + .1)
    return data.astype(float)