目录
一、独立标签采集
import requests
from bs4 import BeautifulSoup
import re
import bs4
kv = {'User-Agent': 'Mozilla/5.0'}
def getHTMLText(url):
try:
r = requests.get(url, headers=kv, timeout=30)
r.raise_for_status() # 不是200就报错
r.encoding = r.apparent_encoding
return r.text
except:
return ""
def fillList(ulist, html):