Repository URL to install this package:
|
Version:
0.10.2 ▾
|
"""
Test Results for discrete models from Stata
"""
import os
import numpy as np
class Namespace(object):
pass
# Discrete Model Tests
# Note that there is a slight refactor of the classes, so that one dataset
# might be used for more than one model
cur_dir = os.path.abspath(os.path.dirname(__file__))
class Anes(object):
def __init__(self):
"""r
Results are from Stata 11 (checked vs R nnet package).
"""
self.nobs = 944
def mnlogit_basezero():
obj = Namespace()
obj.nobs = 944
params = [
-.01153598, .29771435, -.024945, .08249144, .00519655,
-.37340167, -.08875065, .39166864, -.02289784, .18104276,
.04787398, -2.2509132, -.1059667, .57345051, -.01485121,
-.00715242, .05757516, -3.6655835, -.0915567, 1.2787718,
-.00868135, .19982796, .08449838, -7.6138431, -.0932846,
1.3469616, -.01790407, .21693885, .08095841, -7.0604782,
-.14088069, 2.0700801, -.00943265, .3219257, .10889408,
-12.105751]
obj.params = np.reshape(params, (6, -1), order='F')
bse = [
.0342823657, .093626795, .0065248584, .0735865799,
.0176336937, .6298376313, .0391615553, .1082386919,
.0079144618, .0852893563, .0222809297, .7631899491,
.0570382292, .1585481337, .0113313133, .1262913234,
.0336142088, 1.156541492, .0437902764, .1288965854,
.0084187486, .0941250559, .0261963632, .9575809602,
.0393516553, .1171860107, .0076110152, .0850070091,
.0229760791, .8443638283, .042138047, .1434089089,
.0081338625, .0910979921, .025300888, 1.059954821]
obj.bse = np.reshape(bse, (6, -1), order='F')
obj.yhat = np.loadtxt(os.path.join(cur_dir, 'yhat_mnlogit.csv'))
obj.phat = np.loadtxt(os.path.join(cur_dir, 'phat_mnlogit.csv'))
obj.cov_params = None
obj.llf = -1461.922747312
obj.llnull = -1750.34670999
obj.llr = 576.8479253554
obj.llr_pvalue = 1.8223179e-102
obj.prsquared = .1647810465387
obj.df_model = 30
obj.df_resid = 944 - 36
obj.J = 7
obj.K = 6
obj.aic = 2995.84549462
obj.bic = 3170.45003661
z = [
-.3364988051, 3.179798597, -3.823070772, 1.121012042,
.2946945327, -.5928538661, -2.266269864, 3.618564069,
-2.893164162, 2.122688754, 2.148652536, -2.949348555,
-1.857818873, 3.616885888, -1.310634214, -.0566342868,
1.712822091, -3.169435381, -2.090799808, 9.920912816,
-1.031191864, 2.123004903, 3.225576554, -7.951122047,
-2.370538224, 11.49421878, -2.352389066, 2.552011323,
3.523595639, -8.361890935, -3.34331327, 14.43480847,
-1.159676452, 3.533839715, 4.303962885, -11.42100649]
obj.z = np.reshape(z, (6, -1), order='F')
pvalues = [
0.7364947525, 0.0014737744, 0.0001317999, 0.2622827367,
0.7682272401, 0.5532789548, 0.0234348654, 0.0002962422,
0.0038138191, 0.0337799420, 0.0316619538, 0.0031844460,
0.0631947400, 0.0002981687, 0.1899813744, 0.9548365214,
0.0867452747, 0.0015273542, 0.0365460134, 3.37654e-23,
0.3024508550, 0.0337534410, 0.0012571921, 1.84830e-15,
0.0177622072, 1.41051e-30, 0.0186532528, 0.0107103038,
0.0004257334, 6.17209e-17, 0.0008278439, 3.12513e-47,
0.2461805610, 0.0004095694, 0.0000167770, 3.28408e-30]
obj.pvalues = np.reshape(pvalues, (6, -1), order='F')
conf_int = [
[[-0.0787282, 0.0556562],
[0.1142092, 0.4812195],
[-0.0377335, -0.0121565],
[-0.0617356, 0.2267185],
[-0.0293649, 0.0397580],
[-1.6078610, 0.8610574]],
[[-0.1655059, -0.0119954],
[0.1795247, 0.6038126],
[-0.0384099, -0.0073858],
[0.0138787, 0.3482068],
[0.0042042, 0.0915438],
[-3.7467380, -0.7550884]],
[[-0.2177596, 0.0058262],
[0.2627019, 0.8841991],
[-0.0370602, 0.0073578],
[-0.2546789, 0.2403740],
[-0.0083075, 0.1234578],
[-5.9323630, -1.3988040]],
[[-0.1773841, -0.0057293],
[1.0261390, 1.5314040],
[-0.0251818, 0.0078191],
[0.0153462, 0.3843097],
[0.0331544, 0.1358423],
[-9.4906670, -5.7370190]],
[[-0.1704124, -0.0161568],
[1.1172810, 1.5766420],
[-0.0328214, -0.0029868],
[0.0503282, 0.3835495],
[0.0359261, 0.1259907],
[-8.7154010, -5.4055560]],
[[-0.2234697, -0.0582916],
[1.7890040, 2.3511560],
[-0.0253747, 0.0065094],
[0.1433769, 0.5004745],
[0.0593053, 0.1584829],
[-14.1832200, -10.0282800]]]
obj.conf_int = np.asarray(conf_int)
# margins, dydx(*) predict(outcome(#))
obj.margeff_dydx_overall = np.array([
[0.00868085993550, -0.09779854015456, 0.00272556969847,
-0.01992376579372, -0.00603133322764],
[0.00699386733148, -0.05022430802614, -0.00211003909752,
-0.00536980000265, -0.00554366741814],
[-0.00391040848820, -0.02824717135857, -0.00100551299310,
0.00664337806861, 0.00097987356999],
[-0.00182580888015, -0.00573744730031, -0.00004249256428,
-0.00546669558488, 0.00054101121854],
[-0.00098558129923, 0.01985550937033, 0.00047972250012,
0.00172605778905, 0.00211291403209],
[-0.00153469551647, 0.03755346502013, -0.00068531143399,
0.00472471794347, 0.00254733486106],
[-0.00741820702809, 0.12459834487569, 0.00063806819375,
0.01766610701188, 0.00539385283759]
]).T
obj.margeff_dydx_overall_se = np.array([
[.0038581061, .0080471125, .0007068488, .0082318967, .0020261706],
[.003904378, .0073600286, .000756431, .0084381578, .0020482238],
[.003137126, .0056813182, .0006601377, .0068932588, .0018481806],
[.0019427783, .0031904763, .0003865411, .004361789, .0011523221],
[.0029863227, .0054076092, .0005886612, .0064426365, .0018886818],
[.0035806552, .0069497362, .000722511, .0078287717, .0022352393],
[.0033641608, .008376629, .0006774697, .0073505286, .0021660086]
]).T
obj.margeff_dydx_mean = np.array([
[0.01149887431225, -0.13784207091973, 0.00273313385873,
-0.02542974260540, -0.00855346837482],
[0.01114846831102, -0.09864273512889, -0.00222435063712,
-0.01214617126321, -0.00903581444579],
[-0.00381702868421, -0.05132297961269, -0.00116763216994,
0.00624203027060, 0.00021912081810],
[-0.00233455327258, -0.00928554037343, -0.00000206561214,
-0.00775415690571, 0.00060004460394],
[-0.00352579921274, 0.06412187169362, 0.00073938948643,
0.00747778063206, 0.00459965010365],
[-0.00574308219449, 0.11126535089794, -0.00057337915464,
0.01467424346725, 0.00641760846097],
[-0.00722687818452, 0.12170608820238, 0.00049490419675,
0.01693601418978, 0.00575285798725]]).T
obj.margeff_dydx_mean_se = np.array([
[.0043729758, .0110343353, .0008149907, .0092551389, .0023752071],
[.004875051, .0124746358, .0009613152, .0105665812, .0026524426],
[.0040718954, .0103613938, .0008554615, .0089931297, .0024374625],
[.0026430804, .0070845916, .0005364369, .0057654258, .0015988838],
[.0037798151, .0103849291, .0007393481, .0082021938, .0023489261],
[.0045654631, .0130329403, .0009128134, .0100053262, .0028048602],
[.0027682389, .0113292677, .0005325113, .0061289353, .0017330763]
]).T
obj.margeff_dydx_dummy_overall = np.array([
[0.00549149574321, -0.05348235321783, 0.00298963549049,
-0.01479461677951, -0.00332167981255, -0.26502967041815],
[0.00345677928276, -0.00950322030929, -0.00189456107189,
0.00033893662061, -0.00314690167350, -0.21040878091828],
[-0.00645089013284, 0.00401746940204, -0.00083948249351,
0.01114202556889, 0.00277069841472, -0.15967397659686],
[-0.00215436802341, -0.00366545199370, -0.00000002297812,
-0.00457368049644, 0.00065303026027, -0.00094772782001],
[0.00058038428936, -0.00369080100124, 0.00035948233235,
-0.00018863693013, 0.00079351293461, 0.12640653743480],
[0.00217597030999, -0.01279456622853, -0.00091882392767,
0.00001651192759, -0.00037998290789, 0.27175070356670],
[-0.00309932483642, 0.07911868907484, 0.00030378521102,
0.00805941631677, 0.00263129901425, 0.23790291475181]]).T
obj.margeff_dydx_dummy_overall_se = np.array([
[.0037314453, .0094102332, .000688838, .0079744554, .0019365971,
.0243914836],
[.0038215262, .0095938828, .0007410885, .008259353, .0019984087,
.0317628806],
[.0031045718, .00785814, .0006504353, .0067892866, .0018060332,
0.0262803561],
[.0019756086, .0051031194, .0003862449, .0043621673, .0011796953,
.0219999601],
[.0029714074, .0081732018, .0005715192, .0064742872, .0019130195,
.0331694192],
[.0034443743, .0097296187, .0006774867, .0075996454, .0021993881,
.038600835],
[.0032003518, .0098741227, .0006335772, .0070902078, .0021003227,
.0255727127]]).T
obj.margeff_eydx_dummy_overall = np.array([
[.03939188, -.65758371, .01750922, -.12131806, -.03613241,
-3.2132513],
[.02752366, -.383165, -.00830021, -.03652935, -.03286046,
-1.8741853],
[-.05006681, -.2719659, -.00626481, .06525323, .01012554,
-2.0058029],
[-.05239558, -.22549142, .00025015, -.13104416, .01114517,
-.27052009],
[-.00296374, .25627809, .00140513, .03358712, .02296041,
1.3302701],
[.00328283, .2800168, -.0083912, .04332782, .01575863,
1.8441023],
[-.03257068, .98346111, -.00122118, .10847807, .0406456,
2.9119099]]).T
obj.margeff_eydx_dummy_overall_se = np.array([
[.0272085605, .0777760394, .0052427952, .0584011446, .0148618012,
.5796921383],
[.0262290023, .0724479385, .005174736, .0567743614, .0144447083,
.3015738731],
[.0321415498, .0895589422, .0067480662, .0701460193, .0190451865,
.3904138447],
[.0511305319, .1420904068, .0102342163, .1129912244, .0308618233,
.3693799595],
[.0340186217, .0991711703, .0065812158, .0737441012, .0212966336,
.2346982385],
[.0289250212, .0840662279, .0056743561, .0631772185, .0177278895,
.2089516714],
[.0318251305, .1085637405, .0062400589, .0699123044, .0201045606,
.3727166284]]).T
# taken from gretl
obj.resid = np.loadtxt(os.path.join(cur_dir, 'mnlogit_resid.csv'),
delimiter=",")
return obj
mnlogit_basezero = mnlogit_basezero()
class DiscreteL1(object):
def __init__(self):
"""
Special results for L1 models
Uses the Spector data and a script to generate the baseline results
"""
pass
def logit():
"""
Results generated with:
data = sm.datasets.spector.load(as_pandas=False)
data.exog = sm.add_constant(data.exog, prepend=True)
alpha = 3 * np.array([0, 1, 1, 1])
res2 = sm.Logit(data.endog, data.exog).fit_regularized(
method="l1", alpha=alpha, disp=0, trim_mode='size',
size_trim_tol=1e-5, acc=1e-10, maxiter=1000)
"""
obj = Namespace()
nan = np.nan
obj.params = [-4.10271595, 0., 0.15493781, 0.]
obj.conf_int = [
[-9.15205122, 0.94661932],
[nan, nan],
[-0.06539482, 0.37527044],
[nan, nan]]
obj.bse = [2.5762388, nan, 0.11241668, nan]
obj.nnz_params = 2
obj.aic = 42.091439368583671
obj.bic = 45.022911174183122
obj.cov_params = [
[6.63700638, nan, -0.28636261, nan],
[nan, nan, nan, nan],
[-0.28636261, nan, 0.01263751, nan],
[nan, nan, nan, nan]]
return obj
logit = logit()
def sweep():
"""
Results generated with
params = np.zeros((3, 4))
alphas = np.array(
[[0.1, 0.1, 0.1, 0.1],
[0.4, 0.4, 0.5, 0.5], [0.5, 0.5, 1, 1]])
model = sm.Logit(data.endog, data.exog)
for i in range(3):
alpha = alphas[i, :]
res2 = model.fit_regularized(method="l1", alpha=alpha,
disp=0, acc=1e-10,
maxiter=1000, trim_mode='off')
params[i, :] = res2.params
print(params)
"""
obj = Namespace()
obj.params = [
[-10.37593611, 2.27080968, 0.06670638, 2.05723691],
[-5.32670811, 1.18216019, 0.01402395, 1.45178712],
[-3.92630318, 0.90126958, -0., 1.09498178]]
return obj
sweep = sweep()
def probit():
"""
Results generated with
data = sm.datasets.spector.load(as_pandas=False)
data.exog = sm.add_constant(data.exog, prepend=True)
alpha = np.array([0.1, 0.2, 0.3, 10])
res2 = sm.Probit(data.endog, data.exog).fit_regularized(
method="l1", alpha=alpha, disp=0, trim_mode='auto',
auto_trim_tol=0.02, acc=1e-10, maxiter=1000)
"""
obj = Namespace()
nan = np.nan
obj.params = [-5.40476992, 1.25018458, 0.04744558, 0.]
obj.conf_int = [
[-9.44077951, -1.36876033],
[0.03716721, 2.46320194],
[-0.09727571, 0.19216687],
[np.nan, np.nan]]
obj.bse = [2.05922641, 0.61889778, 0.07383875, np.nan]
obj.nnz_params = 3
obj.aic = 38.399773877542927
obj.bic = 42.796981585942106
obj.cov_params = [
[4.24041339, -0.83432592, -0.06827915, nan],
[-0.83432592, 0.38303447, -0.01700249, nan],
[-0.06827915, -0.01700249, 0.00545216, nan],
[nan, nan, nan, nan]]
return obj
probit = probit()
def mnlogit():
"""
Results generated with
anes_data = sm.datasets.anes96.load(as_pandas=False)
anes_exog = anes_data.exog
anes_exog = sm.add_constant(anes_exog, prepend=False)
mlogit_mod = sm.MNLogit(anes_data.endog, anes_exog)
alpha = 10 * np.ones((mlogit_mod.J - 1, mlogit_mod.K))
alpha[-1, :] = 0
mlogit_l1_res = mlogit_mod.fit_regularized(
method='l1', alpha=alpha, trim_mode='auto', auto_trim_tol=0.02,
acc=1e-10)
"""
obj = Namespace()
obj.params = [
[0.00100163, -0.05864195, -0.06147822, -0.04769671, -0.05222987,
-0.09522432],
[0., 0.03186139, 0.12048999, 0.83211915, 0.92330292,
1.5680646],
[-0.0218185, -0.01988066, -0.00808564, -0.00487463, -0.01400173,
-0.00562079],
[0., 0.03306875, 0., 0.02362861, 0.05486435,
0.14656966],
[0., 0.04448213, 0.03252651, 0.07661761, 0.07265266,
0.0967758],
[0.90993803, -0.50081247, -2.08285102, -5.26132955, -4.86783179,
-9.31537963]]
obj.conf_int = [
[[-0.0646223, 0.06662556],
[np.nan, np.nan],
[-0.03405931, -0.00957768],
[np.nan, np.nan],
[np.nan, np.nan],
[0.26697895, 1.55289711]],
[[-0.1337913, 0.01650741],
[-0.14477255, 0.20849532],
[-0.03500303, -0.00475829],
[-0.11406121, 0.18019871],
[0.00479741, 0.08416684],
[-1.84626136, 0.84463642]],
[[-0.17237962, 0.04942317],
[-0.15146029, 0.39244026],
[-0.02947379, 0.01330252],
[np.nan, np.nan],
[-0.02501483, 0.09006785],
[-3.90379391, -0.26190812]],
[[-0.12938296, 0.03398954],
[0.62612955, 1.03810876],
[-0.02046322, 0.01071395],
[-0.13738534, 0.18464256],
[0.03017236, 0.12306286],
[-6.91227465, -3.61038444]],
[[-0.12469773, 0.02023799],
[0.742564, 1.10404183],
[-0.02791975, -0.00008371],
[-0.08491561, 0.19464431],
[0.0332926, 0.11201273],
[-6.29331126, -3.44235233]],
[[-0.17165567, -0.01879296],
[1.33994079, 1.79618841],
[-0.02027503, 0.00903345],
[-0.00267819, 0.29581751],
[0.05343135, 0.14012026],
[-11.10419107, -7.52656819]]]
obj.bse = [
[0.03348221, 0.03834221, 0.05658338, 0.04167742, 0.03697408,
0.03899631],
[np.nan, 0.09012101, 0.13875269, 0.10509867, 0.09221543,
0.11639184],
[0.00624543, 0.00771564, 0.01091253, 0.00795351, 0.00710116,
0.00747679],
[np.nan, 0.07506769, np.nan, 0.08215148, 0.07131762,
0.07614826],
[np.nan, 0.02024768, 0.02935837, 0.02369699, 0.02008204,
0.02211492],
[0.32804638, 0.68646613, 0.92906957, 0.84233441, 0.72729881,
0.91267567]]
obj.nnz_params = 32
obj.aic = 3019.4391360294126
obj.bic = 3174.6431733460686
return obj
mnlogit = mnlogit()
class Spector(object):
"""
Results are from Stata 11
"""
def __init__(self):
self.nobs = 32
def logit():
obj = Namespace()
obj.nobs = 32
obj.params = [2.82611297201, .0951576702557, 2.37868772835,
-13.0213483201]
obj.cov_params = [
[1.59502033639, -.036920566629, .427615725153, -4.57347950298],
[-.036920566629, .0200375937069, .0149126464275, -.346255757562],
[.427615725153, .0149126464275, 1.13329715236, -2.35916128427],
[-4.57347950298, -.346255757562, -2.35916128427, 24.3179625937]]
obj.bse = [1.26294114526, .141554207662, 1.06456430165, 4.93132462871]
obj.resid_pearson = [
-.1652382, -.2515266, -.4800059, -.1630655,
.8687437, -.1900454, -.165002, -.2331563,
-.3535812, .6647838, -.1583799, -.4843181,
-.689527, 2.043449, -.7516119, -.1764176,
-.2380445, -.2003426, -1.199277, .7164842,
-.255713, .3242821, -.5646816, -2.400189,
.4392082, 1.038473, .75747, -.6659256,
.4336657, .2404583, -1.060033, 2.829577]
obj.resid_dev = [
-.2321102, -.3502712, -.6439626, -.2290982,
1.060478, -.2663844, -.2317827, -.3253788, -.4853875,
.8555557, -.2225972, -.6491808, -.8819993, 1.813269,
-.9463985, -.247583, -.3320177, -.2805444, -1.335131,
.9103027, -.3559217, .4471892, -.744005, -1.955074,
.5939538, 1.209638, .952332, -.8567857, .5870719, .335292,
-1.227311, 2.096639]
# from gretl
obj.resid_generalized = [
-0.026578, -0.059501, -0.187260,
-0.025902, 0.430107, -0.034858, -0.026504, -0.051559,
-0.111127, 0.306489, -0.024470, -0.189997, -0.322240,
0.806789, -0.360990, -0.030184, -0.053626, -0.038588,
-0.589872, 0.339214, -0.061376, 0.095153, -0.241772,
-0.852091, 0.161709, 0.518867, 0.364579, -0.307219,
0.158296, 0.054660, -0.529117, 0.888969]
obj.phat = np.array([
.02657799236476,
.05950126051903,
.18725991249084,
.02590163610876,
.56989300251007,
.03485824912786,
.02650404907763,
.05155897513032,
.11112663894892,
.69351142644882,
.02447037212551,
.18999740481377,
.32223951816559,
.1932111531496,
.36098992824554,
.03018374741077,
.05362640321255,
.03858831897378,
.58987241983414,
.66078591346741,
.06137581542134,
.90484726428986,
.24177247285843,
.85209089517593,
.8382905125618,
.48113295435905,
.63542068004608,
.30721867084503,
.84170418977737,
.94534027576447,
.52911710739136,
.1110308393836])
obj.yhat = np.array([
-3.6007342338562,
-2.7604126930237,
-1.4679137468338,
-3.6272060871124,
.28141465783119,
-3.3209850788116,
-3.6035962104797,
-2.9120934009552,
-2.0792844295502,
.81658720970154,
-3.6855175495148,
-1.4500269889832,
-.74349880218506,
-1.429278254509,
-.57107019424438,
-3.4698030948639,
-2.8705959320068,
-3.2154531478882,
.36343798041344,
.66679841279984,
-2.7273993492126,
2.2522828578949,
-1.1429864168167,
1.7510952949524,
1.6455633640289,
-.07550399750471,
.55554306507111,
-.81315463781357,
1.6709630489349,
2.8504176139832,
.11660042405128,
-2.0802545547485])
obj.llf = -12.8896334653335
obj.llnull = -20.5917296966173
obj.df_model = 3
obj.df_resid = 32 - 4 # TODO: is this right? not reported in stata
obj.llr = 15.4041924625676
obj.prsquared = .374038332124624
obj.llr_pvalue = .00150187761112892
obj.aic = 33.779266930667
obj.bic = 39.642210541866
obj.z = [2.237723415, 0.6722348408, 2.234423721, -2.640537645]
obj.conf_int = [
[.3507938, 5.301432],
[-.1822835, .3725988],
[.29218, 4.465195],
[-22.68657, -3.35613]]
obj.pvalues = [.0252390974, .5014342039, .0254552063, .0082774596]
# taken from margins command
obj.margeff_nodummy_dydx = [
.36258084688424, .01220841099085, .30517768382304]
obj.margeff_nodummy_dydx_se = [.1094412, .0177942, .0923796]
obj.margeff_nodummy_dydxmean = [
.53385885781692, .01797548988961, .44933926079386]
obj.margeff_nodummy_dydxmean_se = [.237038, .0262369, .1967626]
obj.margeff_nodummy_dydxmedian = [
.25009492465091, .00842091261329, .2105003352955]
obj.margeff_nodummy_dydxmedian_se = [.1546708, .0134314, .0928183]
obj.margeff_nodummy_dydxzero = [
6.252993785e-06, 2.105437138e-07, 5.263030788e-06]
obj.margeff_nodummy_dydxzero_se = [.0000288, 9.24e-07, .000025]
obj.margeff_nodummy_dyex = [
1.1774000792198, .27896245178384, .16960002159996]
obj.margeff_nodummy_dyex_se = [.3616481, .4090679, .0635583]
obj.margeff_nodummy_dyexmean = [
1.6641381583512, .39433730945339, .19658592659731]
obj.margeff_nodummy_dyexmean_se = [.7388917, .5755722, .0860836]
# NOTE: PSI at median should be a NaN or 'omitted'
obj.margeff_nodummy_dyexmedian = [.76654095836557, .18947053379898, 0]
obj.margeff_nodummy_dyexmedian_se = [.4740659, .302207, 0]
# NOTE: all should be NaN
# TODO: re above Note, since the values below are *not* NaN,
# should something be done about this?
obj.margeff_nodummy_dyexzero = [0, 0, 0]
obj.margeff_nodummy_dyexzero_se = [0, 0, 0]
obj.margeff_nodummy_eydx = [
1.8546366266779, .06244722072812, 1.5610138123033]
obj.margeff_nodummy_eydx_se = [.847903, .0930901, .7146715]
obj.margeff_nodummy_eydxmean = [
2.1116143062702, .0710998816585, 1.7773072368626]
obj.margeff_nodummy_eydxmean_se = [1.076109, .1081501, .9120842]
obj.margeff_nodummy_eydxmedian = [
2.5488082240624, .0858205793373, 2.1452853812126]
obj.margeff_nodummy_eydxmedian_se = [1.255377, .1283771, 1.106872]
obj.margeff_nodummy_eydxzero = [
2.8261067189993, .0951574597115, 2.3786824653103]
obj.margeff_nodummy_eydxzero_se = [1.262961, .1415544, 1.064574]
obj.margeff_nodummy_eyex = [
5.4747106798973, 1.3173389907576, .44600395466634]
obj.margeff_nodummy_eyex_se = [2.44682, 1.943525, .1567618]
obj.margeff_nodummy_eyexmean = [
6.5822977203268, 1.5597536538833, .77757191612739]
obj.margeff_nodummy_eyexmean_se = [3.354433, 2.372543, .3990368]
obj.margeff_nodummy_eyexmedian = [7.8120973525952, 1.9309630350892, 0]
obj.margeff_nodummy_eyexmedian_se = [3.847731951, 2.888485089, 0]
obj.margeff_nodummy_eyexzero = [0, 0, 0]
obj.margeff_nodummy_eyexzero_se = [0, 0, 0]
# for below GPA = 2.0, psi = 1
obj.margeff_nodummy_atexog1 = [
.1456333017086, .00490359933927, .12257689308426]
obj.margeff_nodummy_atexog1_se = [.145633, .0111226, .1777101]
# for below GPA at mean, tuce = 21, psi = 0
obj.margeff_nodummy_atexog2 = [
.25105129214546, .00845311433473, .2113052923675]
obj.margeff_nodummy_atexog2_se = [.1735778, .012017, .0971515]
# must get this from older margeff or i.psi then margins
obj.margeff_dummy_dydx = [
.36258084688424, .01220841099085, .35751515254729]
obj.margeff_dummy_dydx_se = [.1094412, .0177942, .1420034]
obj.margeff_dummy_dydxmean = [
.53385885781692, .01797548988961, .4564984096959]
obj.margeff_dummy_dydxmean_se = [.237038, .0262369, .1810537]
# from margeff
obj.margeff_dummy_count_dydx_median = [
0.250110487483923, 0.008426867847905, 0.441897738279663]
obj.margeff_dummy_count_dydx_median_se = [
.1546736661, .0134551951, .1792363708]
# estimate with i.psi for the below then use margins
obj.margeff_dummy_eydx = [
1.8546366266779, .06244722072812, 1.5549034398832]
obj.margeff_dummy_eydx_se = [.847903, .0930901, .7283702]
# ie
# margins, eydx(*) at((mean) _all)
obj.margeff_dummy_eydxmean = [
2.1116143062702, .0710998816585, 1.6631775707188]
obj.margeff_dummy_eydxmean_se = [1.076109, .1081501, .801205]
# TODO: Should we remove the commented-out code below?
# Factor variables not allowed in below
# test raises
# obj.margeff_dummy_dydxzero
# obj.margeff_dummy_eydxmedian
# obj.margeff_dummy_eydxzero
# obj.margeff_dummy_dyex
# obj.margeff_dummy_dyexmean
# obj.margeff_dummy_dyexmedian
# obj.margeff_dummy_dyexzero
# obj.margeff_dummy_eyex
# obj.margeff_count_dummy_dydx_median
# obj.margeff_count_dummy_dydx_median_se
# NOTE: need old version of margeff for nodisc but at option is broken
# stata command is margeff, count nodisc
# this can be replicated with the new results by margeff
# and then using margins for the last value
obj.margeff_count_dydx = [.3625767598018, .0122068569914, .3051777]
obj.margeff_count_dydx_se = [.1094379569, .0177869773, .0923796]
# middle value taken from margeff rest from margins
obj.margeff_count_dydxmean = [
.5338588, 0.01797186545386, .4493393]
obj.margeff_count_dydxmean_se = [.237038, .0262211, .1967626]
# with new version of margeff this is just a call to
# margeff
# mat list e(margeff_b), nonames format(%17.16g)
obj.margeff_count_dummy_dydxoverall = [.362576759801767,
.012206856991439,
.357515163621704]
# AFAICT, an easy way to get se is
# mata
# V = st_matrix("e(margeff_V)")
# se = diagonal(cholesky(diag(V)))
# last SE taken from margins with i.psi, don't know how they
# don't know why margeff is different, but trust official results
obj.margeff_count_dummy_dydxoverall_se = [.1094379569, .0177869773,
.1420034]
# from new margeff
obj.margeff_count_dummy_dydxmean = [0.533849340033768,
0.017971865453858,
0.456498405282412]
obj.margeff_count_dummy_dydxmean_se = [.2370202503, .0262210796,
.1810536852]
# for below GPA = 2.0, psi = 1
obj.margeff_dummy_atexog1 = [
.1456333017086, .00490359933927, .0494715429937]
obj.margeff_dummy_atexog1_se = [.145633, .0111226, .0731368]
# for below GPA at mean, tuce = 21, psi = 0
obj.margeff_dummy_atexog2 = [.25105129214546,
.00845311433473,
.44265645632553]
obj.margeff_dummy_atexog2_se = [.1735778, .012017, .1811925]
# The test for the prediction table was taken from Gretl
# Gretl Output matched the Stata output here for params and SE
obj.pred_table = np.array([[18, 3], [3, 8]])
return obj
logit = logit()
def probit():
obj = Namespace()
obj.nobs = 32
obj.params = [
1.62581025407, .051728948442, 1.42633236818, -7.45232041607]
obj.cov_params = [
[.481472955383, -.01891350017, .105439226234, -1.1696681354],
[-.01891350017, .00703757594, .002471864882, -.101172838897],
[.105439226234, .002471864882, .354070126802, -.594791776765],
[-1.1696681354, -.101172838897, -.594791776765, 6.46416639958]]
obj.bse = [.693882522754, .083890261293, .595037920474, 2.54247249731]
obj.llf = -12.8188033249334
obj.llnull = -20.5917296966173
obj.df_model = 3
obj.df_resid = 32 - 4
obj.llr = 15.5458527433678
obj.prsquared = .377478069409622
obj.llr_pvalue = .00140489496775855
obj.aic = 33.637606649867
obj.bic = 39.500550261066
obj.z = [2.343062695, .6166263836, 2.397044489, -2.931131182]
obj.conf_int = [
[.2658255, 2.985795],
[-.1126929, .2161508],
[.2600795, 2.592585],
[-12.43547, -2.469166]]
obj.pvalues = [.0191261688, .537481188, .0165279168, .0033773013]
obj.phat = [
.0181707, .0530805, .1899263, .0185707, .5545748,
.0272331, .0185033, .0445714, .1088081, .6631207,
.0161024, .1935566, .3233282, .1951826, .3563406,
.0219654, .0456943, .0308513, .5934023, .6571863,
.0619288, .9045388, .2731908, .8474501, .8341947,
.488726, .6424073, .3286732, .8400168, .9522446,
.5399595, .123544]
obj.yhat = np.array([
-2.0930860042572,
-1.615691781044,
-.87816804647446,
-2.0842070579529,
.13722851872444,
-1.9231110811234,
-2.0856919288635,
-1.6999372243881,
-1.2328916788101,
.42099541425705,
-2.1418602466583,
-.86486464738846,
-.45841211080551,
-.85895526409149,
-.36825761198997,
-2.0147502422333,
-1.6881184577942,
-1.8684275150299,
.23630557954311,
.40479621291161,
-1.538782119751,
1.3078554868698,
-.60319095849991,
1.025558590889,
.97087496519089,
-.02826354466379,
.36490100622177,
-.44357979297638,
.99452745914459,
1.6670187711716,
.10033150017262,
-1.1574513912201])
obj.resid_dev = [
-.191509, -.3302762, -.6490455, -.1936247, 1.085867,
-.2349926, -.1932698, -.3019776, -.4799906, .9064196,
-.1801855, -.6559291, -.8838201, 1.807661, -.9387071,
-.2107617, -.3058469, -.2503485, -1.341589, .9162835,
-.3575735, .447951, -.7988633, -1.939208, .6021435,
1.196623, .9407793, -.8927477, .59048, .3128364,
-1.246147, 2.045071]
# Stata doesn't have it, but I think it's just oversight
obj.resid_pearson = None
# generalized residuals from gretl
obj.resid_generalized = [
-0.045452, -0.114220, -0.334908,
-0.046321, 0.712624, -0.064538,
-0.046175, -0.098447, -0.209349,
0.550593, -0.040906, -0.340339,
-0.530763, 1.413373, -0.579170,
-0.053593, -0.100556, -0.071855,
-0.954156, 0.559294, -0.130167,
0.187523, -0.457597, -1.545643,
0.298511, 0.815964, 0.581013,
-0.538579, 0.289631, 0.104405,
-0.862836, 1.652638]
obj.pred_table = np.array([[18, 3], [3, 8]])
return obj
probit = probit()
class RandHIE(object):
"""
Results obtained from Stata 11
"""
def __init__(self):
self.nobs = 20190
def poisson():
obj = Namespace()
obj.nobs = 20190
obj.params = [
-.052535114675, -.247086797633, .035290201794,
-.03457750643, .271713973711, .033941474461, -.012635035534,
.054056326828, .206115121809, .700352877227]
obj.cov_params = None
obj.bse = [
.00288398915279, .01061725196728, .00182833684966,
.00161284852954, .01223913844387, .00056476496963,
.00925061122826, .01530987068312, .02627928267502,
.01116266712362]
predict = np.loadtxt(os.path.join(cur_dir, 'yhat_poisson.csv'),
delimiter=",")
obj.phat = predict[:, 0]
obj.yhat = predict[:, 1]
obj.llf = -62419.588535018
obj.llnull = -66647.181687959
obj.df_model = 9
obj.df_resid = obj.nobs - obj.df_model - 1
obj.llr = 8455.186305881856
obj.prsquared = .0634324369893758
obj.llr_pvalue = 0
obj.aic = 124859.17707
obj.bic = 124938.306497
obj.z = [-18.21612769, -23.27219872, 19.30180524, -21.43878101,
22.20041672, 60.09840604, -1.36585953, 3.53081538, 7.84325525,
62.74063980]
obj.conf_int = [
[-.0581876, -.0468826],
[-0.2678962, -0.2262774],
[0.0317067, 0.0388737],
[-0.0377386, -0.0314164],
[0.2477257, 0.2957022],
[0.0328346, 0.0350484],
[-0.0307659, 0.0054958],
[0.0240495, 0.0840631],
[0.1546087, 0.2576216],
[0.6784745, 0.7222313]]
obj.pvalues = [
3.84415e-74, 8.4800e-120, 5.18652e-83, 5.8116e-102,
3.4028e-109, 0, .1719830562, .0004142808, 4.39014e-15, 0]
# from stata
# use margins and put i. in front of dummies
obj.margeff_dummy_overall = [
-0.15027280560599, -0.66568074771099,
0.10094500919706, -0.09890639687842,
0.77721770295360, 0.09708707452600,
-0.03608195237609, 0.15804581481115,
0.65104087597053]
obj.margeff_dummy_overall_se = [
.008273103, .0269856266,
.0052466639, .0046317555, .0351582169, .0016652181,
.0263736472, .0457480115, .0913901155]
# just use margins
obj.margeff_nodummy_overall = [
-0.15027280560599, -0.70677348928158,
0.10094500919705, -0.09890639687842,
0.77721770295359, 0.09708707452600,
-0.03614158359367, 0.15462412033340,
0.58957704430148]
obj.margeff_nodummy_overall_se = [
.008273103, .0305119343,
.0052466639, .0046317555,
.0351582168, .0016652181,
.0264611158, .0437974779,
.0752099666]
# taken from gretl
obj.resid = np.loadtxt(os.path.join(cur_dir, 'poisson_resid.csv'),
delimiter=",")
return obj
poisson = poisson()
def negativebinomial_nb2_bfgs():
# R 2.15.1 MASS 7.3-22 glm.nb()
obj = Namespace()
obj.nobs = 20190
obj.params = [
-0.0579469537244314,
-0.267787718814838, 0.0412060770911646, -0.0381376804392121,
0.268915772213171, 0.0381637446219235, -0.0441338846217674,
0.0172521803400544, 0.177960787443151, 0.663556087183864,
# lnalpha from stata
1.292953339909746]
# alpha and stderr from stata
obj.lnalpha_std_err = .0143932
obj.lnalpha = 0.256929012449
obj.bse = [
0.00607085853920512, 0.0226125368090765,
0.00405542008282773, 0.00344455937127785, 0.0298855063286547,
0.00142421904710063, 0.0199374393307107, 0.0358416931939136,
0.0741013728607101, 0.0250354082637892,
# from stata
.0186098]
obj.z = [
-9.54510030998327, -11.8424447940467,
10.1607419822296, -11.071860382846, 8.99820030672628,
26.7962605187844, -2.21361850384595, 0.481343898758222,
2.40158556546135, 26.5047040652267]
obj.pvalues = [
1.35975947860026e-21,
2.35486776488278e-32, 2.96808970292151e-24,
1.71796558863781e-28, 2.2944789508802e-19,
3.57231639404726e-158, 0.0268550333379416, 0.630272102021494,
0.0163241908407114, 8.55476622951356e-155]
obj.fittedvalues = [
0.892904166867786, 0.892904166867786, 0.892904166867786,
0.892904166867786, 0.892904166867786, 0.937038051489553,
0.937038051489553, 0.937038051489553, 0.937038051489553,
0.937038051489553]
# obj.aic = 86789.3241530713 # This is what R reports
obj.aic = 86789.32415307125484 # from Stata
obj.df_resid = 20180
obj.df_model = 9
# R conf_int: 1.96 * bse, not profile likelihood via R's confint()
obj.conf_int = [
# from Stata
[-.0698826, -.0460113],
[-.3122654, -.2233101],
[.0330781, .049334],
[-.0448006, -.0314748],
[.2102246, .3276069],
[.0352959, .0410316],
[-.0834356, -.0048321],
[-.0535908, .0880951],
[.0324115, .3235101],
[.6150055, .7121067],
# from Stata
[1.256989, 1.329947]
]
obj.bic = 86876.36652289562335 # stata
obj.llnull = -44199.27443563430279 # stata
obj.llr = 1631.224718197351 # stata
obj.llf = -43383.66207653563 # stata
obj.df_model = 9.0
obj.llr_pvalue = 0.0
return obj
negativebinomial_nb2_bfgs = negativebinomial_nb2_bfgs()
def negativebinomial_nb1_bfgs():
# Unpublished implementation intended for R's COUNT package. Sent by
# J.Hilbe (of Cambridge UP NBin book) and Andrew Robinson to Vincent
# Arel-Bundock on 2012-12-06.
# TODO: Does the from Stata comment below apply to the
# commented-out obj.params here or to everything below? If the
# latter, then where do the commented-out version come from?
# obj.params = [-0.065309744899923, -0.296016207412261,
# 0.0411724098925173, -0.0320460533573259, 0.19083354227553,
# 0.0318566232844115, -0.0331972813313092, -0.0484691550721231,
# 0.111971860837541, 0.757560143822609,
# 3.73086958562569]
# from Stata
obj = Namespace()
obj.nobs = 20190
obj.params = [-.065317260803762961, -.296023807893893376,
.041187021258044826, -.032028789543547605,
.19065933246421754, .031871625115758778,
-.033250849053302826, -.04850769174426571,
.111813637465757343, .757277086555503409,
3.731151380800305]
# lnalpha and lnalpha_std_err are from stata
obj.lnalpha = 1.316716867203
obj.lnalpha_std_err = .0168876692
obj.bse = [
0.00536019929563678,
0.0196998350459769, 0.00335779098766272, 0.00301145915122889,
0.0237984097096245, 0.00107360844112751, 0.0167174614755359,
0.0298037989274781, 0.0546838603596457, 0.0214703279904911,
0.0630011409376052]
obj.z = [
-12.1842008660173, -15.0263292419148,
12.2617548393554, -10.6413707601675, 8.0187518663633,
29.6724784046551, -1.98578482623631, -1.62627439508848,
2.04762173155154, 35.2840508145997,
# From R, this is alpha/bse(alpha)
59.2190796881069
# taken from Stata even though they don't report it
# lnalpha/bse(lnalpha)
# 77.968995
]
obj.conf_int = [
[-0.075815736, -0.0548037543],
[-0.334627884, -0.2574045307],
[0.034591140, 0.0477536802],
[-0.037948513, -0.0261435934],
[0.144188659, 0.2374784253],
[0.029752351, 0.0339608958],
[-0.065963506, -0.0004310568],
[-0.106884601, 0.0099462908],
[0.004791495, 0.2191522271],
[3.607387349, 3.8543518219],
[0.715478301, 0.7996419867]]
# from Stata
obj.llf = -43278.75612911823
obj.llnull = -44199.2744356343
obj.llr = 1841.036613032149
obj.aic = 86579.51225823645655
obj.bic = 86666.55462806082505
obj.llr_pvalue = 0.0
obj.df_model = 9.0
obj.df_resid = 20180.0
# Smoke tests TODO: check against other stats package
obj.pvalues = [
3.65557865e-034, 5.24431864e-051,
1.42921171e-034, 2.09797259e-026, 1.15949461e-015,
1.56785415e-193, 4.71746349e-002, 1.04731854e-001,
4.07534831e-002, 1.95504975e-272, 0.00000000e+000]
obj.conf_int = [
[-.0758236, -.054811],
[-.3346363, -.2574113],
[.0346053, .0477687],
[-.0379314, -.0261261],
[.1440119, .2373067],
[.0297667, .0339766],
[-.0660178, -.0004839],
[-.1069241, .0099087],
[.0046266, .2190007],
[.7151889, .7993652],
# from stata for alpha no lnalpha
[3.609675, 3.856716]]
# [1.28360034e+00, 1.34979803e+00]]
obj.fittedvalues = [
0.8487497, 0.8487497, 0.8487497, 0.8487497,
0.8487497, 0.88201746, 0.88201746, 0.88201746, 0.88201746,
0.88201746]
return obj
negativebinomial_nb1_bfgs = negativebinomial_nb1_bfgs()
def negativebinomial_geometric_bfgs():
# Smoke tests TODO: Cross check with other stats package
obj = Namespace()
obj.nobs = 20190
obj.params = [
-0.05768894, -0.26646696, 0.04088528, -0.03795503,
0.26885821, 0.03802523, -0.04308456, 0.01931675, 0.18051684,
0.66469896]
obj.bse = [
0.00553867, 0.02061988, 0.00375937, 0.0030924,
0.02701658, 0.00132201, 0.01821646, 0.03271784, 0.06666231,
0.02250053]
obj.pvalues = [
2.10310916e-025, 3.34666368e-038, 1.50697768e-027,
1.25468406e-034, 2.48155744e-023, 6.18745348e-182,
1.80230194e-002, 5.54919603e-001, 6.77044178e-003,
8.44913440e-192]
obj.z = [-10.41567024, -12.92281571, 10.8755779, -12.27364916,
9.95160202, 28.76323587, -2.36514487, 0.59040434,
2.70792943, 29.54148082]
obj.aic = 87101.159433012392 # old value 87101.160011780419
obj.bic = 87180.288860125467 # old value 87180.289438893495
obj.df_model = 9.0
obj.df_resid = 20180.0
obj.llf = -43540.58000589021
obj.llnull = -44586.650971362695 # old value -44199.27443567125
obj.llr = 2092.1425097129977 # old value 1317.3888595620811
obj.llr_pvalue = 0 # old value 5.4288002863296022e-278
obj.fittedvalues = [
0.89348994, 0.89348994, 0.89348994,
0.89348994, 0.89348994, 0.9365745, 0.9365745, 0.9365745,
0.9365745, 0.9365745]
obj.conf_int = [
[-0.06854453, -0.04683335],
[-0.30688118, -0.22605273],
[0.03351706, 0.04825351],
[-0.04401602, -0.03189404],
[0.21590669, 0.32180972],
[0.03543415, 0.04061632],
[-0.07878816, -0.00738096],
[-0.04480903, 0.08344253],
[0.04986111, 0.31117258],
[0.62059873, 0.70879919]]
return obj
negativebinomial_geometric_bfgs = negativebinomial_geometric_bfgs()
def generalizedpoisson_gp2():
# Stata gnpoisson function
obj = Namespace()
obj.nobs = 20190
obj.llf = -43326.42720093228
obj.params = [-0.0604495342, -0.277717228, 0.0438136144,
-0.0395811744, 0.273044906, 0.0399108677, -0.0552626543,
-0.001227569488, 0.151980519, 0.651125316, 0.448085318]
obj.lnalpha_std_err = 0.0125607
obj.lnalpha = -0.8027716
obj.bse = [0.00634704, 0.02381906, 0.00443871, 0.00355094,
0.0334247, 0.00166303, 0.02102142, 0.0390845,
0.087821, 0.02626823, 0.00562825]
obj.df_model = 9
obj.aic = 86674.854401865
obj.conf_int = [
[-0.07288951, -0.04800956],
[-0.32440173, -0.23103272],
[0.03511389, 0.05251333],
[-0.04654088, -0.03262147],
[0.20753371, 0.33855610],
[0.03665139, 0.04317034],
[-0.09646387, -0.01406144],
[-0.07783191, 0.07537652],
[-0.02014548, 0.32410651],
[0.59964053, 0.70261011],
[0.43718883, 0.45925338]
]
obj.bic = 86761.896771689
obj.wald_pvalue = 4.8795019354e-254
obj.wald_statistic = 1206.46339591254
return obj
generalizedpoisson_gp2 = generalizedpoisson_gp2()
def zero_inflated_poisson_logit():
obj = Namespace()
obj.nobs = 20190
obj.params = [.1033783, -1.045983, -.0821979, .0085692,
-.0267957, 1.482363]
obj.llf = -57005.72199826186
obj.bse = [0.0079912, 0.02235510, .0107145, 0.0018697,
0.0014121, 0.0085915]
obj.conf_int = [
[0.0877159, 0.1190408],
[-1.089798, -1.002167],
[-0.1031979, -0.061198],
[0.0049045, 0.0122338],
[-0.0295635, -0.024028],
[1.465524, 1.499202]]
obj.aic = 114023.444
obj.bic = 114070.9
return obj
zero_inflated_poisson_logit = zero_inflated_poisson_logit()
def zero_inflated_poisson_probit():
obj = Namespace()
obj.nobs = 20190
obj.params = [.0622534, -.6429324, -.0821788, .0085673,
-.0267952, 1.482369]
obj.llf = -57006.05
obj.bse = [.0048228, .0132516, .0107142, .0018697,
.0014121, .0085913]
obj.conf_int = [
[0.0528009, .0717058],
[-0.6689051, -.6169597],
[-0.1031783, -.0611793],
[0.0049027, .0122319],
[-0.0295629, -.0240275],
[1.46553, 1.499208]]
obj.aic = 114024.1
obj.bic = 114071.6
return obj
zero_inflated_poisson_probit = zero_inflated_poisson_probit()
def zero_inflated_poisson_offset():
obj = Namespace()
obj.nobs = 20190
obj.params = [.1052014, -1.082434, -.0922822, .0115868,
-.0283842, 1.347514]
obj.llf = -58207.67
obj.bse = [.0081836, .0230043, .0107788, .0018687,
.0014162, .0086309]
obj.conf_int = [
[.0891619, .1212409],
[-1.127522, -1.037347],
[-.1134082, -.0711561],
[.0079242, .0152494],
[-.0311599, -.0256085],
[1.330598, 1.36443]]
obj.aic = 116427.3
obj.bic = 116474.8
return obj
zero_inflated_poisson_offset = zero_inflated_poisson_offset()
def zero_inflated_generalized_poisson():
obj = Namespace()
obj.nobs = 20190
obj.params = [3.57337, -17.95797, -0.21380, 0.03847,
-0.05348, 1.15666, 1.36468]
obj.llf = -43630.6
obj.bse = [1.66109, 7.62052, 0.02066, 0.00339,
0.00289, 0.01680, 0.01606]
obj.aic = 87275
return obj
zero_inflated_generalized_poisson = zero_inflated_generalized_poisson()
def zero_inflated_negative_binomial():
obj = Namespace()
obj.params = [1.883859, -10.280888, -0.204769, 1.137985, 1.344457]
obj.llf = -44077.91
obj.bse = [0.3653, 1.6694, 0.02178, 0.01163, 0.0217496]
obj.aic = 88165.81
return obj
zero_inflated_negative_binomial = zero_inflated_negative_binomial()