# -*- encoding:utf-8 -*-
import urllib.request
from bs4 import BeautifulSoup
import os
import lxml #文档解析器 #os模块就是对操作系统进行操作
import numpy as np
import pymysql
for line in open("D:/json/words.txt"):
url = r'https://dict.cn/search?q='+line
#指定要抓取的网页url,必须以http开头
res = urllib.request.urlopen(url)
#调用urlopen()从服务器获取网页响应(respone),其返回的响应是一个实例
html = res.read().decode('utf-8')
soup = BeautifulSoup(html, 'lxml')
result1 = str(soup.find_all('ul',class_ = 'dict-basic-ul')).strip('[').strip(']')
#例句
result2 = str(soup.find_all('div',class_ = 'layout sort')).strip('[').strip(']')
#近反义词
result3 = str(soup.find_all('div',class_ = 'layout nfo')).strip('[').strip(']')
# 数据库操作
# translate VARCHAR(1000),
#synonymandantonymy VARCHAR(2000),
#sentence VARCHAR(2000),
# (1)定义一个格式化的sql语句
#sql = 'insert into chen(words,translate,sentence,synonymandantonymy) values(%s,%s,%s,%s)'
# (2)准备数据
data = (line.replace("\n",""),result1,result3,result2)
conn = pymysql.connect(host="localhost", user="root", password="6908", database="danci",charset="utf8")
cursor = conn.cursor()
sql = 'insert into shu2(word,translate,synonymandantonymy,sentence) values(%s,%s,%s,%s) '
# (2)准备数据
try:
cursor.execute(sql,data)
conn.commit()
print("ok")
except Exception as e:
print('插入数据失败',e)
conn.rollback() #回滚
cursor.close()
conn.close()
爬虫抓取单词网页
最新推荐文章于 2024-07-14 20:20:34 发布