利用pandas
方法:
1、soup = BeautifulSoup(html.content, ‘lxml’, from_encoding=‘utf-8’)
2、pd.read_html()
请求天气后报的一些表格数据的方法:
#!/usr/bin/env python
# encoding: utf-8
"""
@Time : 2020/2/15 22:08
@Author : jaden
@Desc : 利用pandas爬取表格数据 (这种只能拿带有table便签的数据表)
"""
import requests
from bs4 import BeautifulSoup
import pandas as pd
# 1.请求地址url
city_list = ['baotou', 'beijing', 'shanghai']
for city_url in city_list:
url = 'http://tianqihoubao.com/aqi/' + city_url + '.html'
headers = {
'user-agent': '*********************************8'
}
html = requests.get(url, headers=headers)
if html.status_code == 200:
soup = BeautifulSoup(html.content, 'lxml', from_encoding='utf-8')
table = soup.find('table', class_='b')
tf = pd.read_html(table.prettify(), header=0) # prettify():页面美化(整理成有格式的)
print(tf[0])
tf[0].to_csv('results.csv', header=None, encoding='utf-8-sig', mode='a') # mode = a 持续往进写值
else:
print('输出失败')