吴恩达机器学习ex3 python实现

it2024-01-03 60

这个项目包含了吴恩达机器学习ex3的python实现，主要知识点为多类别逻辑回归、神经网络

1.多分类

这个部分需要你实现手写数字（0到9）的识别。你需要扩展之前的逻辑回归，并将其应用于一对多的分类。

1.1读取数据

import numpy as np import pandas as pd import matplotlib.pyplot as plt import matplotlib from scipy.io import loadmat from sklearn.metrics import classification_report data=loadmat(r'C:\Users\xxx\Desktop\机器学习\machine-learning-ex3\machine-learning-ex3\ex3\ex3data1.mat') data {'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011', '__version__': '1.0', '__globals__': [], 'X': array([[0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], ..., [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.]]), 'y': array([[10], [10], [10], ..., [ 9], [ 9], [ 9]], dtype=uint8)} data['X'].shape,data['y'].shape print(data['y']) [[10] [10] [10] ... [ 9] [ 9] [ 9]]

1.2数据可视化

sample_idx=np.random.choice(np.arange(data['X'].shape[0]),100) print(sample_idx) sample_images=data['X'][sample_idx, :] sample_images [4104 3905 3710 3160 2934 2928 3564 2093 1751 2326 2314 3642 3020 2981 791 4597 158 2346 929 910 448 1866 2141 2041 501 4387 694 2070 3656 3075 1808 592 4226 1772 3231 2608 3786 1427 1765 4823 919 4628 3328 4337 620 1171 3258 2868 776 2603 4803 299 3174 1503 4149 620 3364 4578 2672 3010 1312 2127 2826 3632 4861 4683 1697 115 4043 869 4500 2089 4244 1806 1957 4211 1244 4427 3030 437 3805 2609 314 4338 1456 915 851 206 2436 992 2686 1482 881 3934 2274 4370 1702 572 2505 2799] array([[0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], ..., [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.]]) fig,ax_array=plt.subplots(nrows=10,ncols=10,sharey=True,sharex=True,figsize=(12,12)) for r in range(10): for c in range(10): #matshow 把矩阵或者数组绘制成图像的函数参数cmap是选择颜色 ax_array[r,c].matshow(np.array(sample_images[10*r+c].reshape((20,20))).T,cmap=matplotlib.cm.binary) plt.xticks(np.array([]))#设置刻度 plt.yticks(np.array([]))

1.3将逻辑回归向量化

定义sigmoid函数

def sigmoid(z): return 1/(1+np.exp(-z))

定义cost函数

def cost(theta ,X,y,learningRate): theta=np.matrix(theta) X=np.matrix(X) y=np.matrix(y) first=np.multiply(-y,np.log(sigmoid(X*theta.T))) second=np.multiply((1-y),np.log(1-sigmoid(X*theta.T))) reg=(learningRate/(2*len(X)))*np.sum(np.power(theta[:,1:theta.shape[1]],2)) return np.sum(first-second)/len(X)+reg

向量化梯度

def gradient(theta,X,y,learningRate): theta=np.matrix(theta) X=np.matrix(X) y=np.matrix(y) parameters=int(theta.ravel().shape[1]) error=sigmoid(X*theta.T)-y grad=((X.T*error)/len(X)).T+((learningRate/len(X))*theta) #thet0不需要正则化 grad[0,0]=np.sum(np.multiply(error,X[:,0]))/len(X) return np.array(grad).ravel() # for i in range(parameters): # term = np.multiply(error, X[:,i]) # if (i == 0): # grad[i] = np.sum(term) / len(X) # else: # grad[i] = (np.sum(term) / len(X)) + ((learningRate / len(X)) * theta[:,i]) # return grad

1.4一对多分类器

定义训练模型

from scipy.optimize import minimize def one_vs_all(X,y,num_labels,learning_rate): rows=X.shape[0] params=X.shape[1] #之所以是params+1是因为之后还要插入theta0 all_theta=np.zeros((num_labels,params+1)) X=np.insert(X,0,values=np.ones(rows),axis=1) for i in range(1,num_labels+1): theta=np.zeros(params+1) y_i=np.array([1 if label==i else 0 for label in y]) y_i =np.reshape(y_i,(rows,1)) fmin=minimize(fun=cost,x0=theta,args=(X,y_i,learning_rate),method='TNC',jac=gradient) all_theta[i-1,:]=fmin.x return all_theta

数据预处理，主要是处理y

rows=data['X'].shape[0] params=data['X'].shape[1] all_theta=np.zeros((10,params+1)) X=np.insert(data['X'],0,values=np.ones(rows),axis=1) theta=np.zeros(params+1) y_0=np.array([1 if label==0 else 0 for label in data['y'] ]) print(y_0.shape) y_0=np.reshape(y_0,(rows,1)) X.shape,y_0.shape,theta.shape,all_theta.shape, (5000,) ((5000, 401), (5000, 1), (401,), (10, 401)) np.unique(data['y']) array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=uint8)

利用模型训练theta

all_theta=one_vs_all(data['X'],data['y'],10,1) all_theta array([[-2.38318550e+00, 0.00000000e+00, 0.00000000e+00, ..., 1.30405699e-03, -6.70953714e-10, 0.00000000e+00], [-3.18325496e+00, 0.00000000e+00, 0.00000000e+00, ..., 4.45659505e-03, -5.08109220e-04, 0.00000000e+00], [-4.79627526e+00, 0.00000000e+00, 0.00000000e+00, ..., -2.87789088e-05, -2.48011286e-07, 0.00000000e+00], ..., [-7.98901828e+00, 0.00000000e+00, 0.00000000e+00, ..., -8.94589615e-05, 7.21263539e-06, 0.00000000e+00], [-4.57343099e+00, 0.00000000e+00, 0.00000000e+00, ..., -1.33555921e-03, 9.98155754e-05, 0.00000000e+00], [-5.40070238e+00, 0.00000000e+00, 0.00000000e+00, ..., -1.16422488e-04, 7.87937989e-06, 0.00000000e+00]])

定义预测函数

def predict_all(X,all_theta): rows=X.shape[0] params=X.shape[1] num_labels=all_theta.shape[0] X=np.insert(X,0,values=np.ones(rows),axis=1) X=np.matrix(X) all_theta=np.matrix(all_theta) h=sigmoid(X*all_theta.T) h_argmax=np.argmax(h,axis=1) print(h_argmax.shape) h_argmax+=1 #这里+1是因为原来是从0-9 return h_argmax (5000, 1)

预测结果

y_pred=predict_all(data['X'],all_theta) print(classification_report(data['y'],y_pred)) precision recall f1-score support 1 0.95 0.99 0.97 500 2 0.95 0.92 0.93 500 3 0.95 0.91 0.93 500 4 0.95 0.95 0.95 500 5 0.92 0.92 0.92 500 6 0.97 0.98 0.97 500 7 0.95 0.95 0.95 500 8 0.93 0.92 0.92 500 9 0.92 0.92 0.92 500 10 0.97 0.99 0.98 500 accuracy 0.94 5000 macro avg 0.94 0.94 0.94 5000 weighted avg 0.94 0.94 0.94 5000

support表示出现的次数

2神经网络

2.1读取数据和参数

weight=loadmat(r'C:\Users\xxx\Desktop\机器学习\machine-learning-ex3\machine-learning-ex3\ex3\ex3weights.mat') theta1,theta2 =weight['Theta1'],weight['Theta2'] theta1.shape,theta2.shape ((25, 401), (10, 26)) X2=np.matrix(np.insert(data['X'],0,values=np.ones(X.shape[0]),axis=1)) y2=np.matrix(data['y']) X2.shape,y2.shape ((5000, 401), (5000, 1))

2.2 前馈神经网络和预测

a1=X2 z2=a1*theta1.T z2.shape (5000, 25) a2=sigmoid(z2) a2.shape (5000, 25) a2=np.insert(a2,0,values=np.ones(a2.shape[0]),axis=1) z3=a2*theta2.T z3.shape (5000, 10) a3=sigmoid(z3) a3 matrix([[1.12661530e-04, 1.74127856e-03, 2.52696959e-03, ..., 4.01468105e-04, 6.48072305e-03, 9.95734012e-01], [4.79026796e-04, 2.41495958e-03, 3.44755685e-03, ..., 2.39107046e-03, 1.97025086e-03, 9.95696931e-01], [8.85702310e-05, 3.24266731e-03, 2.55419797e-02, ..., 6.22892325e-02, 5.49803551e-03, 9.28008397e-01], ..., [5.17641791e-02, 3.81715020e-03, 2.96297510e-02, ..., 2.15667361e-03, 6.49826950e-01, 2.42384687e-05], [8.30631310e-04, 6.22003774e-04, 3.14518512e-04, ..., 1.19366192e-02, 9.71410499e-01, 2.06173648e-04], [4.81465717e-05, 4.58821829e-04, 2.15146201e-05, ..., 5.73434571e-03, 6.96288990e-01, 8.18576980e-02]]) y_pred2=np.argmax(a3,axis=1)+1 y_pred2.shape (5000, 1) print(classification_report(y2,y_pred)) precision recall f1-score support 1 0.95 0.99 0.97 500 2 0.95 0.92 0.93 500 3 0.95 0.91 0.93 500 4 0.95 0.95 0.95 500 5 0.92 0.92 0.92 500 6 0.97 0.98 0.97 500 7 0.95 0.95 0.95 500 8 0.93 0.92 0.92 500 9 0.92 0.92 0.92 500 10 0.97 0.99 0.98 500 accuracy 0.94 5000 macro avg 0.94 0.94 0.94 5000 weighted avg 0.94 0.94 0.94 5000

总结

np.random.choice(arg1,arg2)函数从arg1中取出arg2个元素，返回一个一维数组,若arg2未指明，则默认值为1；plt.subplots函数中sharex, sharey：设置为 True 或者 ‘all’ 时，所有子图共享 x 轴或者 y 轴，设置为 False or ‘none’ 时，所有子图的 x，y 轴均为独立；minimize中参数method：求解的算法，选择TNC则和fmin_tnc()类似，都是有约束的多元函数问题，提供梯度信息，使用截断牛顿法；宏平均 macro avg: 对每个类别的精准、召回和F1 加和求平均。微平均 micro avg: 不区分样本类别，计算整体的精准、召回和F1加权平均 weighted avg：是对宏平均的一种改进，考虑了每个类别样本数量在总样本中占比

最新回复(0)