python连接数据库 | 易学教程

31页
from bs4 import BeautifulSoup
from collections import Counter
from nltk.corpus import stopwords
from nltk import LancasterStemmer
import urllib.request
URL=input("Enter a website")
with urllib.request.urlopen(URL) as infile:
    soup=BeautifulSoup(infile)
words=nltk.word_tokenize(soup.text)
text=[w.lower() for w in words]
words=[LancasterStemmer().stem(w) for w in text if w not in stopwords.words("english")and w.isalnum()]

freqs=Counter(words)
print(freqs.most_common(10))

139

import nltk,pymysql
conn=pymysql.connect(user="newuser",passwd="123456",db="dsdb")
cur=conn.cursor()

QUERY="INSERT INTO indexer1 (word,position,pos) VALUES "

with open(r"C:\Users\Dell\Desktop\words.txt") as infile:
    text=infile.read()
pieces=enumerate(nltk.pos_tag(nltk.WordPunctTokenizer().tokenize(text)))
words=["(\"%s\",%d,\"%s\")"\
               %(conn.escape_string(w),i+1,conn.escape_string(pos))\
               for(i,(w,pos)) in pieces]       
if words:
    cur.execute(QUERY+','.join(words))      
conn.commit()
conn.close()

import nltk, pymysql
import urllib.request
from bs4 import BeautifulSoup
URL = input("Enter the name of the file to index:")
conn = pymysql.connect(user="newuser", passwd="123456", db="dsdb")
cur = conn.cursor()
QUERY = "INSERT INTO indexer(word,position,pos) VALUES "
offset = 1
with urllib.request.urlopen(URL) as infile:
    soup=BeautifulSoup(infile,features="html.parser")
words = nltk.WordPunctTokenizer().tokenize(soup.text)
pieces = enumerate(nltk.pos_tag(words))

wordss = ["(\"%s\",%d,\"%s\")"%(w,i+offset,pos) for (i, (w, pos))in pieces]
if wordss:
    cur.execute(QUERY + ','.join(wordss))
    offset = offset + len(wordss)
conn.commit()
conn.close()
来源：CSDN
作者：啦啦啦mmm
链接：https://blog.csdn.net/qq_45593796/article/details/103816345
标签
nltk
python
pymysql