人生中第一个爬虫项目,嘻嘻
使用python中的requests爬取页面,BeautifulSoup解析页面
具体代码如下,供大家参考
# -*- coding: utf-8 -*-
"""
Created on Sat May 25 19:58:21 2019
@author: Administrator
"""
import requests
from bs4 import BeautifulSoup
#解析页面
def html_parse():
for url in get_page():
resp = requests.get('https://book.douban.com/top250?start=0')
#设置一个soup对象
soup = BeautifulSoup(resp.text, 'lxml')
#获取书名
alldiv = soup.find_all('div', class_='pl2')
names = [div.find('a')['title'] for div in alldiv]
#获取作者
allp = soup.find_all('p', class_='pl')
authors = [p.text.split('/')[0] for p in allp]
# 评分
starspan = soup.find_all('span', class_='rating_nums')
scores = [s.get_text() for s in starspan]
# 简介