Gensim学习笔记-2.主题与变换
from pprint import pprint import warnings warnings . filterwarnings ( action = 'ignore' , category = UserWarning , module = 'gensim' ) from gensim import corpora stopWordsList = set ( 'for a of the and to in' . split ()) with open ( './Data/mycorpus.txt' , encoding = 'utf-8' ) as f : texts = [[ word for word in line . lower (). split () if word not in stopWordsList ] for line in f ] dictionary = corpora . Dictionary . load ( './Data/sampleDict.dict' ) corpus = [ dictionary . doc2bow ( doc ) for doc in texts ] pprint ( corpus ) [[( 0 , 1 ), ( 1 , 1 ), ( 2 , 1 )], [( 0 , 1 ), ( 3 , 1 ), ( 4 , 1