import nltk
import pymysql as pymysql
infilename = input("Enter the name of the file to index:")
conn = pymysql.connect(user="root", passwd="", db="dsbd")
cur = conn.cursor()
QUERY = "insert into indexer (word, position, pos) values "
wpt = nltk.WordPunctTokenizer()
offset = 1
with open(infilename) as infile:
for text in infile:
pieces = enumerate(nltk.pos_tag(wpt.tokenize(text)))
words = ["(\"%s\",%d,\"%s\")" % (conn.escape_string(w),
i + offset,
conn.escape_string(pos))
for (i, (w, pos)) in pieces]
if words:
cur.execute(QUERY + ','.join(words))
offset += len(words)
conn.commit()
conn.close()
连接数据库:
- 由于本人已经创建数据库dsbd,在搜索显示中已有数据库dsbd,创建数据库命令create database 数据库名称(例如dsbd);
- user dsbd使用数据库
- 建立一个名为dsbd的表单