import sys
sys.path.append('lib/')
import time
import redis
import xlwt
from bs4 import BeautifulSoup
from selenium import webdriver
if __name__ == '__main__':
try:
r = redis.StrictRedis(host='localhost', port=6379, password='102400', db=0)
html = r.get("html")
work_book = xlwt.Workbook(encoding='utf-8')
sheet = work_book.add_sheet('sheet表名')
sheet.write(0,0,'招标内容')
sheet.write(0,1,'招标编码')
sheet.write(0,2,'截止时间')
if not html:
print("new")
target = 'https://tang.cdt-ec.com/notice//moreController/toMore?globleType=0'
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
browser = webdriver.Chrome(chrome_options=chrome_options)
req = browser.get(url=target)
browser.find_element("id","purchase_unit").send_keys("潮州")
browser.find_elements("css selector",".layui-btn-primary")[0].click()
time.sleep(2)
html = browser.page_source
r.set('html', html)
else:
print("cache")
bf = BeautifulSoup(html, "html.parser")
i = 1
for tr in bf.select(".layui-table-main .layui-table tr"):
title = tr.find("a").get_text()
code = tr.select("ul li:nth-of-type(1) span")[0].get_text()
endTime = time.strftime("%Y/%m/%d %H:%M", time.strptime(tr.select("ul li:last-child span")[0].get_text(),"%Y-%m-%d %H:%M:%S"))
sheet.write(i,0,title)
sheet.write(i,1,code)
sheet.write(i,2,endTime)
i += 1
work_book.save('招标公告.xls')
except Exception as e:
print(e)
sys.exit()
python爬虫例子
最新推荐文章于 2024-05-03 11:07:56 发布