使用bs4+re正则来爬取网页上需要的数据
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
from bs4 import BeautifulSoup as bs
import requests
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36'}
lists = []
# 发送网络请求的方法,传入url和消息头
def get_text(url, headers):
r = requests.get(url=url, headers=headers)
r.encoding = 'GBK'
soup = bs(r.text, 'html.parser')
# 寻找标签为a的内容,匹