def linear_regression(feature_arr, label_arr):
"""
use SGD method to optimize linear regression
:param feature_arr: the train feature
:param label_arr: the train label
"""
feature_num = len(feature_arr[0])
sample_num = len(feature_arr)
feature_arr = np.array(feature_arr)
para = np.random.rand(feature_num, 1)
learning_rate = 0.01
loss = 0
iteration = 0
threshold = 0
while(iteration > 1000 or threshold < 1e-8):
last_loss = loss
iteration += 1
loss = 0
for x,y in zip(feature_arr, label_arr):
x = x.reshape(1,len(x))
loss += (np.dot(x, para) - y) ** 2
para += learning_rate * (y - np.dot(x, para)) * x.reshape(feature_num,1)
threshold = abs(loss - last_loss)
return para
def predict(feature_arr, para_vector):
np.zeros((len(feature_arr), 1))
predict_value = sum(feature_arr * para_vector, axis=1)
return predict_value
weight = linear_regression(xArr, yArr)
weight = weight.reshape(2,)
x_aix = np.array(xArr)[:,1]
y_turly_value = np.array(yArr)
y_pre = predict(xArr, weight)
plt.scatter(x_aix, y_turly_value, color='black')
plt.plot(x_aix, y_pre, color='blue', linewidth=3)
plt.show()