爬虫爬取豆瓣top250书籍并保存进.txt文件
使用的工具:pycharm
使用的包:requests,bs4,time
以下是本次爬取的代码
import requests
from bs4 import BeautifulSoup
import time
#发送请求
def request_dangdang(url,headers):
try:
response = requests.get (url,headers=headers)
if response.status_code==200:
return response.text
except requests.RequestException:
return None
#解析文件
def parse_result(soup):
contents=soup.find(class_=‘article’).find_all(name=‘table’,width=‘100%’)#.get_text().replace(’ ‘, ‘’).replace(’\n’,’ ').strip()
for item in contents:
item_bookname=item.find(name='div',class_='pl2').find(name='a').get_text().replace('\n','').replace(' ','')
item_author=item.find(name='p',class_='pl').get_text()
item_pl=item.find(name='span&#