以爬取简书首页标题为例
# coding:utf-8
import requests
from bs4 import BeautifulSoup
# 简书首页title爬取
class SoupSpider:
def __init__(self):
self.session = requests.Session()
def jian_shu_spider(self, url, headers):
response = requests.get(url, headers=headers).text
# 将获取到的内容转换成BeautifulSoup格式
soup = BeautifulSoup(response, "lxml")
# 查找所有class="title"的语句
title_list = soup.find_all(class_= "title")
for tit in title_list:
title = tit.text
print("文章标题:{}".format(title))
if __name__ == '__main__':
soup_spider = SoupSpider()
soup_spider.jian_shu_spider(
"http://www.jianshu.com",
{
"Referer": "https://www.jianshu.com/",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML