用BeautifulSoup库解析和提取新浪新闻数据
新浪新闻地址:国内新闻_新闻中心_新浪网
import requests
from bs4 import BeautifulSoup
#获取网页源代码
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
url = 'http://news.sina.com.cn/china'
res = requests.get(url, headers=headers)
res.encoding = 'utf-8'
res = res.text
soup = BeautifulSoup(res, 'html.parser')
a = soup.select('.left-content-1 li a')
for i in range(len(a)):
print(str(i+1) + '.' + a[i].text)
print(a[i]['href'])