# -*- coding: utf-8 -*-
# !/usr/bin/env python
import requests
import re
import time,random
u0 = 'http://jingyan.baidu.com/user/npublic?uid=d1b612bceb0dc22ba8ffe137&pn='
for x in range(0,50,7):
url = u0+str(x)
reponse = requests.get(url)
reponse.encoding='utf-8'
html = reponse.text
tts = re.findall(r'title="(.*?)" target="_blank">', html)
u = re.findall(r'<a href="(/article/\w+\.html)" title="', html)
for i in range(len(u)):
for j in range(20):
tt = 'https://jingyan.baidu.com'+u[i]
a = requests.get(tt)
a.encoding='utf-8'
b = a.text
txt = re.findall(r'<p>(.*?)</p>',b)
for ii in txt:
print(' '+ii)
time.sleep(random.uniform(1,2))
百度经验正文爬虫
最新推荐文章于 2020-12-06 09:14:47 发布