html连接python,使用python从现有的HTML链接创建新的HTML

from selenium import webdriver

import urllib.request,os,datetime

from bs4 import BeautifulSoup

options = webdriver.ChromeOptions()

driver = webdriver.Chrome(executable_path=r'C:\chromedriver_win32\chromedriver.exe', chrome_options=options)

driver.implicitly_wait(10)

driver.get("https://mylink")

elems = driver.find_elements_by_css_selector("[href*=PublicInfoServlet]") #finding the weblinks(html doc) I need to edit and create new html docs

for elem in elems: #iterate through all the html weblinks found on the main webpage

abc=elem.get_attribute("href")

print(abc)

page = urllib.request.urlopen(abc)

soup = BeautifulSoup(page,'html.parser')

a=soup.find("div", {"id": "SpanPrint"}) #identify the html tag that needs to be used to create the required html document

efg = (abc.split("=", 1)[1])

hig=(efg.split('&', 1)[0])

f = open(str(hig)+'.html', 'w')

message=str(a)

f.write(message)

f.close()

# Change path to reflect file location

x = str(datetime.date.today())

b = str(datetime.datetime.now())

c = x[0:10]

d = b[11:19]

e = str(c + d).replace(':', '')

filename = 'mypath' +str(hig)+'.html' #saving the new doc at the required location.

os.rename('mypath'.html',

'mypath' +str(hig) + e + '.html')

driver.quit()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值