# coding=utf-8
import requests
import random
import xpinyin
from bs4 import BeautifulSoup
#定义爬取的网站地址
urls = ["http://www.chengyudaquan.net/feisizichengyu/sizichengyu/list_1.html",
"http://www.chengyudaquan.net/feisizichengyu/sizichengyu/list_2.html",
"http://www.chengyudaquan.net/feisizichengyu/sizichengyu/list_3.html",
"http://www.chengyudaquan.net/feisizichengyu/sizichengyu/list_4.html",
"http://www.chengyudaquan.net/feisizichengyu/sizichengyu/list_5.html",
"http://www.chengyudaquan.net/feisizichengyu/sizichengyu/list_6.html",
"http://www.chengyudaquan.net/feisizichengyu/sizichengyu/list_7.html",
"http://www.chengyudaquan.net/feisizichengyu/sizichengyu/list_8.html",
"http://www.chengyudaquan.net/feisizichengyu/sizichengyu/list_9.html",
"http://www.chengyudaquan.net/feisizichengyu/sizichengyu/list_10.html",
"http://www.chengyudaquan.net/feisizichengyu/sizichengyu/list_11.html",
"http://www.chengyudaquan.net/feisizichengyu/sizichengyu/list_12.html",
"http://www.chengyudaquan.net/feisizichengyu/sizichengyu/list_13.html",
"http://www.chengyudaquan.net/feisizichengyu/sizichengyu/list_14.html",
"http://www.chengyudaquan.net/feisizichengyu/sizichengyu/list_15.html",
"http://www.chengyudaquan.net/feisizichengyu/sizichengyu/list_16.html",
"http://www.chengyudaquan.net/feisizichengyu/sizichengyu/list_17.html"]
#定义词语文件
w = open('/tensorflow/py_aiplat_demo/data/ciyu.txt','w')
for url in urls:
response = requests.get(url)
response.raise_for_status()
response.encoding = response.apparent_encoding
soup = BeautifulSoup(response.text, 'lxml')
for link in soup.find_all("span", class_="mainlia1 wzbtlist"):
#处理数据只取四字成语
if len(link.text) == 4:
notext=link.text + '\n'
w.write(notext)
print("抓取数据成功!")
python 成语接龙1-爬去四字成语
最新推荐文章于 2024-02-13 03:19:40 发布