overfitting 체험
오버피팅된 모델의 train data와 test data에 대한 결정계수 비교
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn import svm
x = np.random.rand(100,1)
x = x * 2 - 1
#y = 4x3 - 3x2 + 2x - 1
y = 4 * x**3 - 3 * x**2 + 2 * x -1
y += np.random.randn(100,1)
x_train = x[:30]
y_train = y[:30]
x_test = x[30:]
y_test = y[30:]
"""
plt.subplot(1,3,1)
plt.scatter(x,y,marker='+')
plt.title('all data')
plt.subplot(1,3,2)
plt.scatter(x_train, y_train, marker='o')
plt.title('train data')
plt.subplot(1,3,3)
plt.scatter(x_test, y_test, marker='x')
plt.title('test data')
plt.show()
"""
X_TRAIN = np.c_ [
x_train ** 9,
x_train ** 8,
x_train ** 7,
x_train ** 6,
x_train ** 5,
x_train ** 4,
x_train ** 3,
x_train ** 2,
x_train ]
model = linear_model.LinearRegresson()
#model = svm.SVR()
model.fit(X_TRAIN,y_train)
plt.scatter(x_train, y_train, marker='o') # expected
plt.scatter(x_train, model.predict(X_TRAIN), marker='x') # predicted
plt.show()
#print(model.coef_)
#print(model.intercept_)
print("TRAIN data score = " + str(model.score(X_TRAIN,y_train)))
X_TEST = np.c_ [
x_test ** 9,
x_test ** 8,
x_test ** 7,
x_test ** 6,
x_test ** 5,
x_test ** 4,
x_test ** 3,
x_test ** 2,
x_test
]
print("TEST data score = "+str(model.score(X_TEST, y_test)))