3.特征提取
3.特征提取 将使用特征提取函数。函数代码也与之前类似,该函数具体如下: utils.py 折叠源码 # -*- coding: utf-8 -*- """ Created on Sun Sep 11 23:06:06 2016 @author: DIP """ from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer def build_feature_matrix(documents, feature_type = 'frequency' , ngram_range = ( 1 , 1 ), min_df = 0.0 , max_df = 1.0 ): feature_type = feature_type.lower().strip() if feature_type = = 'binary' : vectorizer = CountVectorizer(binary = True , min_df = min_df, max_df = max_df, ngram_range = ngram_range) elif feature_type = = 'frequency' : vectorizer = CountVectorizer(binary = False , min_df