Python爬取表格的步骤:
1、检查url地址,用raise_for_status()检查url地址;
2、爬取资源,用BeautifulSoup()爬取数据,并且用find_all('tr')抓取其中的表格;
3、保存资源,用write()将表格保存到指定目录。from bs4 import BeautifulSoup
import requests
import csv
import bs4
#检查url地址
def check_link(url):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
print('无法链接服务器!!!')
#爬取资源
def get_contents(ulist,rurl):
soup = BeautifulSoup(rurl,'lxml')
trs = soup.find_all('tr')
for tr in trs:
ui = []
for td in tr:
ui.append(td.string)
ulist.append(ui)
#保存资源
def save_contents(urlist):