爬虫:爬取有table标签的表格数据
简单使用方法:# 谷歌浏览器 -> 审查元素 -> copy selector 如果截取不到,截取一部分前半部分
select:
url = 'http://s.askci.com/stock/a/?reportTime=2017-12-31&pageNum=1'
city_list = ['http://s.askci.com/stock/a/?reportTime=2017-12-31&pageNum=1']
for city_url in city_list:
url = city_url
headers = {
'User-Agent': '*********************8'
}
html = requests.get(url, headers=headers)
html = html.text
soup = BeautifulSoup(html, 'html', from_encoding='utf-8') # html
# 谷歌浏览器 -> 审查元素 -> copy selector 如果截取不到,截取一部分前半部分
#body > div.content > div.right_frame > div:nth-child(2) > div.public_table_box.mg_tone
# content = soup.select('body > div.content > div.right_frame > div:nth-child(2) > div.public_table_box.mg_tone > div:nth-child(1) > div.public_ta_b_l_com.mg_tone > table')[0]
content = soup.select('body > div.content > div.right_frame > div:nth-child(2) > div.public_table_box.mg_tone')[0]
tf = pd.read_html(content.prettify(), header=0) # prettify():页面美化(整理成有格式的) #myTable04
print(tf)