python通过urllib+requests来抓取CCTV每日新闻联播的新闻标题
直接上代码
import re import urllib3 import requests from bs4 import BeautifulSoup requests.packages.urllib3.disable_warnings() # 一个PoolManager实例来生成请求, 由该实例对象处理与线程池的连接以及线程安全的所有细节 http = urllib3.PoolManager() # 通过request()方法创建一个请求: header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36' } #cctv网址 r = http.request('GET','https://tv.cctv.com/lm/xwlb/',headers=header) soup = BeautifulSoup(r.data.decode(), 'html.parser') item = soup.find(id="content") str=item.find_all('a',href=re.compile("tv.cctv")) for str1 in str: print (str1.text)