[机器学习]线性回归预测14年房价（含python代码动态展示梯度下降过程）

it2025-09-28 32

数据集如下图所示：原先Year是2000——2013的，但是由于Year和Price数量级相差过大，导致梯度下降不能很快收敛，所以处理一下。代码首先会展示两张动态图，展示梯度下降代价函数的下降过程以及直线的拟合过程。拟合完毕（收敛后）会显示闭式解法拟合出的直线。同时，两种方法都有展示预测的2014年房价。

python代码：

import csv from numpy import * import matplotlib.pyplot as plt from matplotlib.pyplot import MultipleLocator def loadDataSet(): csv_reader=csv.reader(open('data.csv',encoding='utf-8-sig')) data = [] ## 一个特征 label = [] i = 0 for row in csv_reader: if i > 0: ## 去除表头 data.append([1.0, float(row[0])]) label.append(float(row[1])) i = i + 1 return data, label def closeFormSolution(data, label): dataMat = mat(data) labelMat = mat(label).transpose() theta = linalg.inv(dataMat.transpose() * dataMat) * dataMat.transpose() * labelMat # theta = theta.tolist() return theta def gradientDescent(data, label): dataMat = mat(data) labelMat = mat(label).transpose() n,m = shape(dataMat) ## n样本 m特征 theta = ones((m,1)) ##初始值 alpha = 0.0001 ##学习率 maxCycle = 10000 epsilon = 0.0002 error = dataMat * theta - labelMat precost = 1 / 2 * error.transpose() * error # 画图部分 plt.ion() xs = [0, 0] ys= [0, precost[0, 0]] # 画图部分 for k in range(maxCycle): theta = theta - alpha * (dataMat.transpose() * error) error = dataMat * theta - labelMat cost = 1/2 * error.transpose() * error xs[0] = xs[1] ys[0] = ys[1] xs[1] = k ys[1] = cost[0, 0] plt.figure(1) plt.title('costFunction', fontsize=14) plt.xlabel('num of iterations', fontsize=8) plt.ylabel('cost', fontsize=8) plt.plot(xs, ys, color = 'red') plt.figure(2) plotRegression(data, label, theta, 'gradientDescent') plt.pause(0.1) if abs(precost - cost) < epsilon: # cost变化已不大，收敛 break precost = cost return theta def plotRegression(data, label, theta, title): plt.clf() x = arange(0, 20) y = theta[0] + theta[1] * x x = x.tolist() y = y.transpose().tolist() ax = plt.subplot() plt.title(title , fontsize=14) plt.xlabel('year(+2000)', fontsize=8) plt.xticks(x, (2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019)) plt.ylabel('price', fontsize=8) x_major_locator = MultipleLocator(1) y_major_locator = MultipleLocator(2) ax.xaxis.set_major_locator(x_major_locator) ax.yaxis.set_major_locator(y_major_locator) plt.xlim(0, 16) plt.ylim(0, 16) plt.plot([data[i][1] for i in range(0, 14)], label, "ob") plt.plot(14, y[14][0], 'om') plt.text(14, y[14][0] + 0.8, '2014_prediction', color='b', fontsize=10) plt.text(14, y[14][0], '%.2f'%y[14][0], color = 'b', fontsize=10) plt.plot(x, y, color='red') def main(): data, label = loadDataSet() theta = gradientDescent(data, label) print("梯度下降theta") print(theta) plt.ioff() plt.figure(3) theta1 = closeFormSolution(data, label) plotRegression(data, label, theta1, 'closeFormSolution') print("闭式解theta") print(theta1) plt.show() if __name__=='__main__': main()

最新回复(0)