1环境:pycharm,python3.4
2.源码解析
import requests
import re
from bs4 import BeautifulSoup
#通过requests.get获取整个网页的数据
def getHtmlText(url):try:
r = requests.get(url)
# to cheack r.status_code is your expected
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return "craw failed"
#下图是网页中的内容:
#解析页面内容,通过find_all函数find所有的a标签的内容,返回一个list,
然后通过正则表达式匹配re.findall(r"[s