Learn more  » Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Bower components Debian packages RPM packages NuGet packages

alkaline-ml / statsmodels   python

Repository URL to install this package:

Version: 0.11.1 

/ duration / tests / results / phreg_gentests.py

import numpy as np

"""
Generate data sets for testing Cox proportional hazards regression
models.

After updating the test data sets, use R to run the survival.R script
to update the R results.
"""

# The current data may not reflect this seed
np.random.seed(5234)

# Loop over pairs containing (sample size, number of variables).
for (n, p) in (20, 1), (50, 1), (50, 2), (100, 5), (1000, 10):

    exog = np.random.normal(size=(5*n, p))
    coef = np.linspace(-0.5, 0.5, p)
    lpred = np.dot(exog, coef)
    expected_survival_time = np.exp(-lpred)

    # Survival times are exponential
    survival_time = -np.log(np.random.uniform(size=5*n))
    survival_time *= expected_survival_time

    # Set this to get a reasonable amount of censoring
    expected_censoring_time = np.mean(expected_survival_time)

    # Theses are the observation times.
    censoring_time = -np.log(np.random.uniform(size=5*n))
    censoring_time *= expected_censoring_time

    # Entry times
    entry_time = -np.log(np.random.uniform(size=5*n))
    entry_time *= 0.5*expected_censoring_time

    # 1=failure (death), 0=no failure (no death)
    status = 1*(survival_time <= censoring_time)

    # The censoring time of the failure time, whichever comes first
    time = np.where(status == 1, survival_time, censoring_time)

    # Round time so that we have ties
    time = np.around(time, decimals=1)

    # Only take cases where the entry time is before the failure or
    # censoring time.  Take exactly n such cases.
    ii = np.flatnonzero(entry_time < time)
    ii = ii[np.random.permutation(len(ii))[0:n]]
    status = status[ii]
    time = time[ii]
    exog = exog[ii, :]
    entry_time = entry_time[ii]

    data = np.concatenate((time[:, None], status[:, None],
                           entry_time[:, None], exog),
                          axis=1)

    fname = "results/survival_data_%d_%d.csv" % (n, p)
    np.savetxt(fname, data, fmt="%.5f")