# -*- coding: UTF-8 -*-
import urllib.request as urllib2
from time import sleep
from bs4 import BeautifulSoup
#声明变量且赋值
#循环13次
#小说静态的
#这个url是糗事百科网址
url = 'http://book.zongheng.com/chapter/791751/44852939.html'
# User-Agent是爬虫与反爬虫的第一步
#这个值写自己电脑的
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36'}
try:
# 通过urllib2.Request()方法构造一个请求对象
req = urllib2.Request(url=url,headers=headers)
source_code = urllib2.urlopen(req).read()
plain_text=source_code.decode('utf-8')
except urllib2.error.URLError:
if hasattr(e,"code"):
print (e.code)
if hasattr(e,"reason"):
print (e.reason)
soup=BeautifulSoup(plain_text,'html.parser')
#获取class为context的内容
list_scpan=soup.find_all('div',{'class':'content'})
i=0
while i<len(list_scpan):
print (list_scpan[i].get_text().strip())
#存到本地文件当中
f=open("D:\Documents\Desktop/reotile.txt",'a',encoding="utf-8")
f.write(list_scpan[i].get_text().strip())
i+=1
python3爬虫之爬取糗事百科段子
最新推荐文章于 2021-02-12 11:47:32 发布