以爬取新浪新闻为例
import re
import requests
from bs4 import BeautifulSoup
import json
from datetime import datetime
def getSoup(newsurl):
res=requests.get(newsurl)
res.encoding='utf-8'
soup=BeautifulSoup(res.text,'html.parser')
return soup
newsurl为新浪新闻sh首页某则新闻的链接
打印出soup查看结构
title=soup.select('title')[0].text
<