先上源码
这次用的是BeautifulSoup,解析html,非常的便捷
import datetime
import pandas as pd
import re
import requests
import time
from bs4 import BeautifulSoup
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'
}
def get_html(url):
# 这个网站第一次请求一般都会被反爬给挡住,所以要多请求几次
while True:
r = requests.get(url, headers=headers)
print('从', url, '获取数据')
if 'table' in r.text:
print('成功获取数据')
return r.content
else:
print('甘霖凉鸡掰,不给我数据')
time.sleep(1)
def parse_html(page_content):
soup = BeautifulSoup(page_content, features='lxml')
table = soup.find('table')
item_list = table.find_all('tr')
month = []
for i in range(1, len(ite