import numpy as npimport matplotlib.pyplot as plt a =0.5# coefficientb =3# interceptnoiseVar =5# noise的标准差x = np.arange(1, 100, 1)# generate datay = a * x + b + noiseVar* np.random.randn(x.size)# 画出数据的散点图plt.plot(x, y, 'o', color='r')plt.xlabel('Population')plt.ylabel('House price')
可以得到以下分布图:
我们用bootstrap的方法来估计参数的可靠性(reliability)
nBoot =1000# bootstrap多少次res = np.empty((nBoot, 2))for i inrange(nBoot):# sample with replacement data idx = np.random.choice(np.arange(y.size), y.size, replace=True)# deal with x x2 = np.vstack((x[idx], np.ones(x.size)))# 注意这里x的idx也要改变# fit the square res[i,:]= np.linalg.lstsq(x2.T, y[idx], rcond=None)[0]
import numpy as npimport matplotlib.pyplot as plt a =0.5# coefficientb =3# interceptnoiseVar =5# noise的标准差x = np.arange(1, 1000, 0.1)# generate datay = a * x + b + noiseVar* np.random.randn(x.size)plt.plot(x, y, 'o', color='r')
得到的散点图为:
现在用原来10倍的数据,再来用bootstrap来估计原来的reliability
nBoot =1000# bootstrap多少次res = np.empty((nBoot, 2))for i inrange(nBoot):# sample with replacement data idx = np.random.choice(np.arange(y.size), y.size, replace=True)# deal with x x2 = np.vstack((x[idx], np.ones(x.size)))# 注意这里x的idx也要改变# fit the square res[i,:]= np.linalg.lstsq(x2.T, y[idx], rcond=None)[0]
import numpy as npimport matplotlib.pyplot as plt a =0.05# coefficientb =3# interceptnoiseVar =5# noise的标准差x = np.arange(1, 100, 0.1)# generate datay = a * x + b + noiseVar* np.random.randn(x.size)
nBoot =10000# permutation 多少次res2 = np.empty((nBoot, 2))for i inrange(nBoot):# sample without replacement data idx = np.random.choice(np.arange(y.size), y.size, replace=False)# deal with x x2 = np.vstack((x, np.ones(x.size)))# 注意这里的x不用利用idx而改变# fit the square res2[i,:]= np.linalg.lstsq(x2.T, y[idx], rcond=None)[0]