数据库字段设计
word_id:自动递增
python代码如下:
import requests
import pymysql
from bs4 import BeautifulSoup
def downloader(url):
"""
下载汉字并保存
"""
response = requests.get(url)
if response.status_code != 200:
print(f'{url} is failed!')
return
print(f'{url} is parsing')
html = BeautifulSoup(response.content.decode('gbk', errors='ignore'), "lxml")
a = html.find_all('a', target="_blank")
prefix = 'http://www.zd9999.com'
words = [prefix + w.get('href') for w in a]
res = []
for i in range(0, len(words)):
response = requests.get(words[i])
print(f'{[words[i]]} is parsing')
if response.status_code != 200:
print(f'{words[i]} is failed!')
continue
wordhtml = BeautifulSoup(response.content.decode('gbk', errors='ignore').replace('<br/>', '\n').replace('<br>', '\n')\
, "lxml")
td = wordhtml.find_all('table')[4].find_all('td')
word = td[1].text.strip()
oldword = td[4].text.strip()
pinyin = td[8].text.strip()
radicals = td[10].text.strip()
explanation = td[12].text.strip()
# 连接数据库
conn = pymysql.connect(
host='localhost',
user='root',
# 密码
password='******',
# 数据库名称
db='*****',
charset='utf8'
)
# python必须有一个游标对象,用来给数据库发送sql语句并执行
# 创建游标对象
cur = conn.cursor()
# 对于数据库进行增删改查
# insert into word(word,oldword,pinyin,radicals,explanation) values(%s,%s,%s,%s,%s)
try:
insert_sql = "insert into word(word,oldword,pinyin,radicals,explanation) values(%s,%s,%s,%s,%s)"
param = (word, oldword, pinyin, radicals, explanation)
cur.execute(insert_sql, param)
conn.commit()
print("插入数据成功;")
except Exception as e:
print("插入数据失败:", e)
conn.rollback()
finally:
cur.close()
if __name__ == '__main__':
downloader('http://www.zd9999.com/zi/index.htm')
for i in range(2, 102):
downloader(f'http://www.zd9999.com/zi/index_{i}.htm')
效果如下: