import re
from selenium import webdriver
import time
browser = webdriver.Chrome()
url = 'http://www.sse.com.cn//disclosure/credibility/supervision/inquiries/'
browser.get(url)
time.sleep(3)
data = browser.page_source
import pandas as pd
table_all = pd.DataFrame()
for i in range(10):
browser.find_element_by_xpath('//*[@id="ht_codeinput"]').send_keys(i + 1)
browser.find_element_by_xpath('//*[@id="pagebutton"]').click()
time.sleep(3)
data = browser.page_source
p_href = '<td><a href="(.*?)" target="_blank">.*?</a></td>'
href = re.findall(p_href,data)
table = pd.read_html(data)[0]
table['网址'] = href
table_all = table_all.append(table)
table_all
- 报错如下
添加标题的xpath表达式,解决该问题
import re
from selenium import webdriver
import time
browser = webdriver.Chrome()
url = 'http://www.sse.com.cn//disclosure/credibility/supervision/inquiries/'
browser.get(url)
time.sleep(3)
data = browser.page_source
import pandas as pd
table_all = pd.DataFrame()
for i in range(10):
browser.find_element_by_xpath('//*[@id="ht_codeinput"]').send_keys(i + 1)
browser.find_element_by_xpath('//*[@id="pagebutton"]').click()
time.sleep(3)
data = browser.page_source
p_title = '<td><a href=".*?" target="_blank">(.*?)</a></td>'
p_href = '<td><a href="(.*?)" target="_blank">.*?</a></td>'
title = re.findall(p_title,data)
href = re.findall(p_href,data)
table = pd.read_html(data)[0]
table['网址'] = href
table_all = table_all.append(table)
table_all