文章目录
1.课本习题-符号太多了2.视频作业
1.课本习题-符号太多了
2.视频作业
与课本P63
-64页差不多
"""
@author: liujie
@software: PyCharm
@file: naives 自编程实现.py
@time: 2020/10/22 10:03
"""
import numpy
as np
import pandas
as pd
class NaiveBayes():
def __init__(self
,lambda_
):
self
.lambda_
= lambda_
self
.y_types_count
= None
self
.y_types_proba
= None
self
.x_types_prob
= dict()
def fit(self
,x_train
,y_train
):
self
.y_types
= np
.unique
(y_train
)
x
= pd
.DataFrame
(x_train
)
y
= pd
.DataFrame
(y_train
)
self
.y_types_count
= y
[0].value_counts
()
self
.y_types_proba
= (self
.y_types_count
+ self
.lambda_
) / (y
.shape
[0] + len(self
.y_types
) * self
.lambda_
)
for idx
in x
.columns
:
for j
in self
.y_types
:
p_x_y
= x
[(y
== j
).values
][idx
].value_counts
()
print(p_x_y
)
for i
in p_x_y
.index
:
print(i
)
self
.x_types_prob
[(idx
,i
,j
)] = (p_x_y
[i
] + self
.lambda_
) / (self
.y_types_count
[j
] + p_x_y
.shape
[0] * self
.lambda_
)
def predict(self
,x_new
):
res
= []
for y
in self
.y_types
:
p_y
= self
.y_types_proba
[y
]
p_xy
=1
for idx
,x
in enumerate(x_new
):
p_xy
*= self
.x_types_prob
[idx
,x
,y
]
res
.append
(p_y
*p_xy
)
for i
in range(len(self
.y_types
)):
print('[{}]对应的概率 : {:.2%}'.format(self
.y_types
[i
],res
[i
]))
return self
.y_types
[np
.argmax
(res
)]
def main():
X_train
= np
.array
([
[1, "S"],
[1, "M"],
[1, "M"],
[1, "S"],
[1, "S"],
[2, "S"],
[2, "M"],
[2, "M"],
[2, "L"],
[2, "L"],
[3, "L"],
[3, "M"],
[3, "M"],
[3, "L"],
[3, "L"]
])
y_train
= np
.array
([-1, -1, 1, 1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, -1])
clf
= NaiveBayes
(lambda_
=0.2)
clf
.fit
(X_train
, y_train
)
X_new
= np
.array
([2, "S"])
y_predict
= clf
.predict
(X_new
)
print("{}被分类为:{}".format(X_new
, y_predict
))
if __name__
== '__main__':
main
()
[-1]对应的概率
: 6.51%
[1]对应的概率
: 2.49%
['2' 'S']被分类为
:-1
import numpy
as np
from sklearn
.naive_bayes
import GaussianNB
,BernoulliNB
,MultinomialNB
from sklearn
import preprocessing
def main():
X_train
=np
.array
([
[1,"S"],
[1,"M"],
[1,"M"],
[1,"S"],
[1,"S"],
[2,"S"],
[2,"M"],
[2,"M"],
[2,"L"],
[2,"L"],
[3,"L"],
[3,"M"],
[3,"M"],
[3,"L"],
[3,"L"]
])
y_train
=np
.array
([-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1])
enc
= preprocessing
.OneHotEncoder
(categories
='auto')
enc
.fit
(X_train
)
X_train
= enc
.transform
(X_train
).toarray
()
print(X_train
)
clf
=MultinomialNB
(alpha
=0.0000001)
clf
.fit
(X_train
,y_train
)
X_new
=np
.array
([[2,"S"]])
X_new
=enc
.transform
(X_new
).toarray
()
y_predict
=clf
.predict
(X_new
)
print("{}被分类为:{}".format(X_new
,y_predict
))
print(clf
.predict_proba
(X_new
))
if __name__
=="__main__":
main
()
转载请注明原文地址: https://lol.8miu.com/read-26086.html