I have a list of sentences:
text = [\'cant railway station\',\'citadel hotel\',\' police stn\'].
I need to form bigram pairs and store the
Just fixing Dan's code:
def get_bigrams(myString):
tokenizer = WordPunctTokenizer()
tokens = tokenizer.tokenize(myString)
stemmer = PorterStemmer()
bigram_finder = BigramCollocationFinder.from_words(tokens)
bigrams = bigram_finder.nbest(BigramAssocMeasures.chi_sq, 500)
for bigram_tuple in bigrams:
x = "%s %s" % bigram_tuple
tokens.append(x)
result = [' '.join([stemmer.stem(w).lower() for w in x.split()]) for x in tokens if x.lower() not in stopwords.words('english') and len(x) > 8]
return result