In [1]: import numpy as np
In [2]: import statsmodels.api as sm
Create some data
In [3]: nsample = 50
In [4]: sig = 0.25
In [5]: x1 = np.linspace(0, 20, nsample)
In [6]: X = np.c_[x1, np.sin(x1), (x1-5)**2, np.ones(nsample)]
In [7]: beta = [0.5, 0.5, -0.02, 5.]
In [8]: y_true = np.dot(X, beta)
In [9]: y = y_true + sig * np.random.normal(size=nsample)
Setup and estimate the model
In [10]: olsmod = sm.OLS(y, X)
In [11]: olsres = olsmod.fit()
In [12]: print olsres.params
[ 0.50898072 0.51916098 -0.02158601 4.99563796]
In [13]: print olsres.bse
[ 0.01141961 0.04489185 0.00100265 0.07404521]
In-sample prediction
In [14]: ypred = olsres.predict(X)
Create a new sample of explanatory variables Xnew, predict and plot
In [15]: x1n = np.linspace(20.5,25, 10)
In [16]: Xnew = np.c_[x1n, np.sin(x1n), (x1n-5)**2, np.ones(10)]
In [17]: ynewpred = olsres.predict(Xnew) # predict out of sample
In [18]: print ypred
[ 4.4559876 4.95431237 5.41158849 5.79952214 6.1000306
6.30821316 6.43315634 6.49644096 6.52859648 6.56408511
6.63563975 6.76888609 6.97813236 7.26401852 7.61341116
8.00156122 8.39617026 8.76269694 9.07003211 9.29560991
9.42911534 9.47417891 9.44778022 9.37745824 9.2967867
9.2398581 9.23568231 9.30342042 9.44923656 9.66528521
9.93100013 10.2164734 10.48736917 10.71056495 10.85959353
10.91899149 10.8868398 10.77507906 10.60754778 10.41606569
10.2352059 10.09661463 10.02381182 10.02832687 10.10780439
10.24639119 10.41734091 10.58740837 10.72231167 10.79236459]
In [19]: import matplotlib.pyplot as plt
In [20]: plt.figure()
Out[20]: <matplotlib.figure.Figure at 0xc64578c>
In [21]: plt.plot(x1, y, 'o', x1, y_true, 'b-')
Out[21]:
[<matplotlib.lines.Line2D at 0xc65860c>,
<matplotlib.lines.Line2D at 0xc65822c>]
In [22]: plt.plot(np.hstack((x1, x1n)), np.hstack((ypred, ynewpred)),'r')
Out[22]: [<matplotlib.lines.Line2D at 0xc66750c>]
In [23]: plt.title('OLS prediction, blue: true and data, fitted/predicted values:red')
Out[23]: <matplotlib.text.Text at 0xc674cec>