1. 使用requests库
import requests
url="http://www.starbaby.cn/zhinan/609987"
req =requests.get(url)
req.encoding='utf-8' #显式地指定网页编码,一般情况可以不用
print(req.text)
2. 使用BeautifulSoup
from bs4 import BeautifulSoup
from bs4 import UnicodeDammit
import requests
def run():
r = requests.get('http://zy.upln.cn/gongshi2014/index.html')
soup = r.text.encode(r.encoding) #这里获取的text先按照指定的字符集解析下,这样gbk、utf8都可以了
soup = BeautifulSoup(soup, 'html.parser')
soup = soup.find('tbody')
for x in soup.find_all('tr'):
for y in x.find_all('td'):
s = y.a.text
print(s)
if __name__=="__main__":
run()