一。基本原理

贝叶斯公式

二。在文本分类中的情况


sklearn实现
1 from sklearn.datasets import fetch_20newsgroups
2 from sklearn.model_selection import train_test_split
3 from sklearn.feature_extraction.text import TfidfVectorizer
4 from sklearn.naive_bayes import MultinomialNB
5
6 def news_classification():
7 """
8 朴素贝叶斯对新闻进行分类
9 :return:
10 """
11 #1.获取数据
12 news=fetch_20newsgroups("c:/new",subset="all")
13 #print(news)
14 #2.划分数据集
15 x_train,x_test,y_train,y_test=train_test_split(news.data,news.target)
16 # print(x_train)
17 #3.特征工程:文本特征抽取-tfidf
18 transfer=TfidfVectorizer()
19 x_train=transfer.fit_transform(x_train)
20 x_test=transfer.transform(x_test)
21 #4.朴素贝叶斯预估器'
22 estimator=MultinomialNB()
23 estimator.fit(x_train,y_train)
24 #5.模型评估
25 y_predict=estimator.predict(x_test)
26 print(y_predict)
27 print("直接比对真实值和预测值:\n",y_predict==y_test)
28 #方法2:直接计算准确率
29 score=estimator.score(x_test,y_test)
30 print("准确率为:",score)
31 if __name__ == "__main__":
32 news_classification()
