功能介绍
打开本地html,定位到特定内容,然后保存该内容对应的html到txt中
代码
from selenium import webdriver
from tqdm import tqdm
def write_txt(txt_list, txt_name):
with open(txt_name, 'a+', encoding='utf-8') as f:
for txt in txt_list:
f.write(txt+'\n\n')
def login(web_address):
# 若不打开网页
# option = webdriver.ChromeOptions()
# option.add_argument('headless') # 设置option
# driver = webdriver.Chrome(options=option) # 调用带参数的谷歌浏览器
# 若要打开网页显示过程
driver = webdriver.Chrome() # 调用带参数的谷歌浏览器
# 2. 打开保存在本地的html
driver.get(web_address)
return driver
def change_web(web_address):
driver.get(web_address)
return driver
def save_keyinfo(driver, xpath, txt_name):
# 3. 定位元素,并得到html代码
try:
table_list = driver.find_elements_by_xpath(xpath)
keyinfo = []
for i, table in enumerate(tqdm(table_list)):
keyinfo.append(table.get_attribute('outerHTML'))
# 写入到txt中
write_txt(keyinfo, txt_name)
except:
# 最好能记录下报错的网页地址
pass
if __name__ == '__main__':
# 获取所有要读取的网页地址
web_address_list = [""]
# 内容保存的txt文件名
txt_name = 'xxx.txt'
# 用于关键信息定位的xpath
xpath = ''
for i, web_address in enumerate(web_address_list):
print("【{}】:{}".format(i, web_address))
# 进入网页
if i==0:
# 创建webdriver
driver = login(web_address)
else:
driver = change_web(web_address)
# 得到单个网页下,需要保存的内容,并且写入到txt中
save_keyinfo(driver, xpath, txt_name)
# 退出访问的实例网站。
if i==len(web_address_list)-1:
driver.quit()