统计学习方法读书笔记9-朴素贝叶斯习题

it2025-05-26 34

文章目录

1.课本习题-符号太多了2.视频作业

1.课本习题-符号太多了

2.视频作业

与课本P63-64页差不多

#!usr/bin/env python # -*- coding:utf-8 _*- """ @author: liujie @software: PyCharm @file: naives 自编程实现.py @time: 2020/10/22 10:03 """ import numpy as np import pandas as pd # 定义朴素贝叶斯类 class NaiveBayes(): # 所有参数初始化 def __init__(self,lambda_): self.lambda_ = lambda_ # 贝叶斯系数，取0时即为极大似然估计 self.y_types_count = None # y的数量 self.y_types_proba = None # y的概率 self.x_types_prob = dict() # (xi 的编号,xi的取值，y的类型)条件概率 # def fit(self,x_train,y_train): # y的所有取值类型[-1,1] self.y_types = np.unique(y_train) # 转化为DataFrame数据格式,方便后续计算 x = pd.DataFrame(x_train) y = pd.DataFrame(y_train) # y的数量统计 # 利用value_counts()对y进行统计 self.y_types_count = y[0].value_counts() # y的概率的计算 self.y_types_proba = (self.y_types_count + self.lambda_) / (y.shape[0] + len(self.y_types) * self.lambda_) # 条件概率的计算 # 遍历xi- 特征 for idx in x.columns: # 遍历每一个y的类型 for j in self.y_types: # 选择y==j为真的数据点的第idx个特征的值，并进行统计 p_x_y = x[(y == j).values][idx].value_counts() print(p_x_y) for i in p_x_y.index: print(i) # 字典-xi 的编号,xi的取值，y的类型 self.x_types_prob[(idx,i,j)] = (p_x_y[i] + self.lambda_) / (self.y_types_count[j] + p_x_y.shape[0] * self.lambda_) def predict(self,x_new): res = [] # 遍历y的可能取值 for y in self.y_types: p_y = self.y_types_proba[y] p_xy =1 for idx,x in enumerate(x_new): p_xy *= self.x_types_prob[idx,x,y] res.append(p_y*p_xy) for i in range(len(self.y_types)): print('[{}]对应的概率 : {:.2%}'.format(self.y_types[i],res[i])) # 返回最大后验概率对应的y值 return self.y_types[np.argmax(res)] def main(): X_train = np.array([ [1, "S"], [1, "M"], [1, "M"], [1, "S"], [1, "S"], [2, "S"], [2, "M"], [2, "M"], [2, "L"], [2, "L"], [3, "L"], [3, "M"], [3, "M"], [3, "L"], [3, "L"] ]) y_train = np.array([-1, -1, 1, 1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, -1]) clf = NaiveBayes(lambda_=0.2) clf.fit(X_train, y_train) X_new = np.array([2, "S"]) y_predict = clf.predict(X_new) print("{}被分类为:{}".format(X_new, y_predict)) if __name__ == '__main__': main() [-1]对应的概率 : 6.51% [1]对应的概率 : 2.49% ['2' 'S']被分类为:-1

import numpy as np from sklearn.naive_bayes import GaussianNB,BernoulliNB,MultinomialNB from sklearn import preprocessing #预处理 def main(): X_train=np.array([ [1,"S"], [1,"M"], [1,"M"], [1,"S"], [1,"S"], [2,"S"], [2,"M"], [2,"M"], [2,"L"], [2,"L"], [3,"L"], [3,"M"], [3,"M"], [3,"L"], [3,"L"] ]) y_train=np.array([-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1]) # 数据预处理 enc = preprocessing.OneHotEncoder(categories='auto') enc.fit(X_train) # toarray()变成数组的形式 X_train = enc.transform(X_train).toarray() print(X_train) clf=MultinomialNB(alpha=0.0000001) clf.fit(X_train,y_train) X_new=np.array([[2,"S"]]) X_new=enc.transform(X_new).toarray() y_predict=clf.predict(X_new) print("{}被分类为:{}".format(X_new,y_predict)) print(clf.predict_proba(X_new)) if __name__=="__main__": main()

最新回复(0)