# -*- coding: utf-8 -*-
import datetime
import os.path, configparser
import time
import random
from selenium import webdriver
s_dateTime_mark = 0
# -*- coding: utf-8 -*-
import datetime
import os.path, configparser
import time
import random
from selenium import webdriver
s_dateTime_mark = 0
_sindex = '0'
w_time = datetime.datetime.now()
# w_time1 = datetime.datetime.strftime(w_time, '%Y%m%d_%H%M%S')
w_time1 = "context"
pathfilename = (os.getcwd() + '\\').replace('\\', '/')
filename_list = pathfilename + w_time1 + '.txt'
filename_log = pathfilename + w_time1 + 'log.txt'
config_fileName = pathfilename + "load_db.ini"
print('config_fileName' + config_fileName)
config = configparser.ConfigParser()
try:
f = open(pathfilename + "load_db.ini", 'r')
f.close()
print("INI存在")
except IOError:
print("INI不存在")
f = open(pathfilename + "load_db.ini", 'w')
config.add_section("Mark")
config.set("Mark", "load_mark", '0')
#s_dateTime_mark = int(config.get("Mark", "load_mark"), 10)
#s_dateTime_mark = config.get("Mark", "load_mark")
config.write(open(pathfilename + "load_db.ini", "a"))
print("INI建立成功")
f.close()
config.read(config_fileName)
file_object = open(filename_list, 'a', encoding='utf-8')
file_object_log = open(filename_log, 'a', encoding='utf-8')
# file_object.write('序號,原ID,地區,單位名稱,項目名稱,金額(萬元),時間' + '\n')
try:
s_dateTime_mark = int(config.get("Mark", "load_mark"), 10)
print(s_dateTime_mark)
if s_dateTime_mark == 0:
file_object.write('序號,原ID,地區,單位名稱,項目名稱,金額(萬元),時間' + '\n')
else:
pass
except:
print("异常")
file_object.flush()
#config.set("Mark", "load_mark", "1")
#config.write(open(config_fileName, "w"))
file_object_log.write('開始時間:' + str(datetime.datetime.now()) + '\n')
log_link1 = r"E:\PYTHON\test1\log_link.txt"
#log_link1 = r"G:\py\log_link.txt"
read_file = open(log_link1, 'r', encoding='utf-8')
countx = len(read_file.readlines())
read_file.close()
read_file = open(log_link1, 'r', encoding='utf-8')
date_line = read_file.readlines()
path_driver = "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chromedriver.exe"
browser = webdriver.Chrome(path_driver)
for line in range(s_dateTime_mark + 1,countx):
#for line in range(s_dateTime_mark + 1, countx):
# for line in range(s_dateTime_mark - 1, 5):
# date_line1 = read_file.readline()
date_line1 = date_line[line]
data_list = date_line1.split(",")
_sindex = data_list[0]
_id = data_list[1]
_districtName = data_list[2]
_url = data_list[3]
try:
#path_driver = "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chromedriver.exe"
#browser = webdriver.Chrome(path_driver)
#option = webdriver.ChromeOptions()
#option.add_argument('headless')
#browser = webdriver.Chrome(chrome_options=option)
#browser = webdriver.Chrome()
browser.implicitly_wait(20)
browser.get(_url)
browser.switch_to_frame('iframe')
biaoti = browser.find_element_by_tag_name("body").find_element_by_tag_name("header").find_element_by_tag_name(
"h1").text
laiyuan = browser.find_element_by_tag_name("body").find_element_by_tag_name("header").find_element_by_class_name(
"detail-info").find_elements_by_tag_name("span")[0].text
fabushijian = \
browser.find_element_by_tag_name("body").find_element_by_tag_name("header").find_element_by_class_name(
"detail-info").find_elements_by_tag_name("span")[1].text
liulancishu = \
browser.find_element_by_tag_name("body").find_element_by_tag_name("header").find_element_by_class_name(
"detail-info").find_elements_by_tag_name("span")[2].text
laiyuan = laiyuan.replace('来源:', '')
# =第1種情況 出現====================================================================================================
try:
print('試用第1種sectionNo控件名找數據,序號為:',_sindex)
xuhao = browser.find_element_by_id("template-center-mark").find_element_by_class_name(
"template-bookmark").find_element_by_tag_name("tbody").find_elements_by_class_name("code-sectionNo")
purchaseProjectName = browser.find_element_by_id("template-center-mark").find_element_by_class_name(
"template-bookmark").find_element_by_tag_name("tbody").find_elements_by_class_name("code-purchaseProjectName")
caigouxuqiugaikuang = browser.find_element_by_id("template-center-mark").find_element_by_class_name(
"template-bookmark").find_element_by_tag_name("tbody").find_elements_by_class_name(
"code-purchaseRequirementDetail")
budgetPrice = browser.find_element_by_id("template-center-mark").find_element_by_class_name(
"template-bookmark").find_element_by_tag_name("tbody").find_elements_by_class_name("code-budgetPrice")
estimatedPurchaseTime = browser.find_element_by_id("template-center-mark").find_element_by_class_name(
"template-bookmark").find_element_by_tag_name("tbody").find_elements_by_class_name("code-estimatedPurchaseTime")
beizhu = browser.find_element_by_id("template-center-mark").find_element_by_class_name(
"template-bookmark").find_element_by_tag_name("tbody").find_elements_by_class_name("code-remark")
for each in range(len(xuhao)):
value_input = str(_sindex) + ',' + str(_id) + ',' + _districtName + ',' + laiyuan + ',' + purchaseProjectName[
each].text + ',' + budgetPrice[each].text + ',' + estimatedPurchaseTime[each].text + '\n'
file_object.write(value_input)
print(value_input)
#config.set("Mark", "load_mark", str(_sindex))
#config.write(open(config_fileName, "w"))
except:
print('第1種sectionNo控件名沒有找到')
# =第2種情況 出現====================================================================================================
try:
print('試用第2種表格名的方式找數據,序號為:', _sindex)
rowsList = browser.find_element_by_class_name(
"form-panel-input-cls").find_element_by_tag_name("tbody").find_elements_by_tag_name("tr")
allContext = []
for eachRow in rowsList:
ColsList = [eachCol.text for eachCol in eachRow.find_elements_by_tag_name("td")]
allContext.append(ColsList)
# print(ColsList)
# print(allContext)
for each in range(len(allContext)):
if allContext[each][0] == "序号":
continue
value_input = str(_sindex) + ',' + str(_id) + ',' + _districtName + ',' + laiyuan + ',' + \
allContext[each][1] + ',' + allContext[each][3] + ',' + allContext[each][4] + '\n'
file_object.write(value_input)
print(value_input)
#config.set("Mark", "load_mark", str(_sindex))
#config.write(open(config_fileName, "w"))
except (ZeroDivisionError,TypeError) as e:
# =====================================================================================================
print('試用2種方式沒找到數據,序號為:', _sindex + e)
value_input = str(_sindex) + ',' + str(_id) + ',' + _districtName + ',' + _url + ',' + str(e) + '\n'
file_object.write(value_input)
pass
#browser.close()
except (ZeroDivisionError,TypeError) as e:
print('沒有載入網頁,序號為:', _sindex)
value_input = str(_sindex) + ',' + str(_id) + ',' + _districtName + ',' + _url + ',' + str(e) + '\n'
file_object.write(value_input)
pass
file_object_log.write( '序号:' + str(_sindex)+' 開始時間:' + str(datetime.datetime.now()) + '\n')
file_object_log.flush()
file_object.flush()
ret = random.uniform(2, 6)
time.sleep(ret)
browser.close()
if int(_sindex, 10) >0:
config.set("Mark", "load_mark", str(_sindex))
config.write(open(config_fileName, "w"))
file_object.close()
file_object_log.close()
read_file.close()
_sindex = '0'
w_time = datetime.datetime.now()
# w_time1 = datetime.datetime.strftime(w_time, '%Y%m%d_%H%M%S')
w_time1 = "context"
pathfilename = (os.getcwd() + '\\').replace('\\', '/')
filename_list = pathfilename + w_time1 + '.txt'
filename_log = pathfilename + w_time1 + 'log.txt'
config_fileName = pathfilename + "load_db.ini"
print('config_fileName' + config_fileName)
config = configparser.ConfigParser()
try:
f = open(pathfilename + "load_db.ini", 'r')
f.close()
print("INI存在")
except IOError:
print("INI不存在")
f = open(pathfilename + "load_db.ini", 'w')
config.add_section("Mark")
config.set("Mark", "load_mark", '0')
#s_dateTime_mark = int(config.get("Mark", "load_mark"), 10)
#s_dateTime_mark = config.get("Mark", "load_mark")
config.write(open(pathfilename + "load_db.ini", "a"))
print("INI建立成功")
f.close()
config.read(config_fileName)
file_object = open(filename_list, 'a', encoding='utf-8')
file_object_log = open(filename_log, 'a', encoding='utf-8')
# file_object.write('序號,原ID,地區,單位名稱,項目名稱,金額(萬元),時間' + '\n')
try:
s_dateTime_mark = int(config.get("Mark", "load_mark"), 10)
print(s_dateTime_mark)
if s_dateTime_mark == 0:
file_object.write('序號,原ID,地區,單位名稱,項目名稱,金額(萬元),時間' + '\n')
else:
pass
except:
print("异常")
file_object.flush()
#config.set("Mark", "load_mark", "1")
#config.write(open(config_fileName, "w"))
file_object_log.write('開始時間:' + str(datetime.datetime.now()) + '\n')
log_link1 = r"E:\PYTHON\test1\log_link.txt"
#log_link1 = r"G:\py\log_link.txt"
read_file = open(log_link1, 'r', encoding='utf-8')
countx = len(read_file.readlines())
read_file.close()
read_file = open(log_link1, 'r', encoding='utf-8')
date_line = read_file.readlines()
path_driver = "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chromedriver.exe"
browser = webdriver.Chrome(path_driver)
for line in range(s_dateTime_mark + 1,countx):
#for line in range(s_dateTime_mark + 1, countx):
# for line in range(s_dateTime_mark - 1, 5):
# date_line1 = read_file.readline()
date_line1 = date_line[line]
data_list = date_line1.split(",")
_sindex = data_list[0]
_id = data_list[1]
_districtName = data_list[2]
_url = data_list[3]
try:
#path_driver = "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chromedriver.exe"
#browser = webdriver.Chrome(path_driver)
#option = webdriver.ChromeOptions()
#option.add_argument('headless')
#browser = webdriver.Chrome(chrome_options=option)
#browser = webdriver.Chrome()
browser.implicitly_wait(20)
browser.get(_url)
browser.switch_to_frame('iframe')
biaoti = browser.find_element_by_tag_name("body").find_element_by_tag_name("header").find_element_by_tag_name(
"h1").text
laiyuan = browser.find_element_by_tag_name("body").find_element_by_tag_name("header").find_element_by_class_name(
"detail-info").find_elements_by_tag_name("span")[0].text
fabushijian = \
browser.find_element_by_tag_name("body").find_element_by_tag_name("header").find_element_by_class_name(
"detail-info").find_elements_by_tag_name("span")[1].text
liulancishu = \
browser.find_element_by_tag_name("body").find_element_by_tag_name("header").find_element_by_class_name(
"detail-info").find_elements_by_tag_name("span")[2].text
laiyuan = laiyuan.replace('来源:', '')
# =第1種情況 出現====================================================================================================
try:
print('試用第1種sectionNo控件名找數據,序號為:',_sindex)
xuhao = browser.find_element_by_id("template-center-mark").find_element_by_class_name(
"template-bookmark").find_element_by_tag_name("tbody").find_elements_by_class_name("code-sectionNo")
purchaseProjectName = browser.find_element_by_id("template-center-mark").find_element_by_class_name(
"template-bookmark").find_element_by_tag_name("tbody").find_elements_by_class_name("code-purchaseProjectName")
caigouxuqiugaikuang = browser.find_element_by_id("template-center-mark").find_element_by_class_name(
"template-bookmark").find_element_by_tag_name("tbody").find_elements_by_class_name(
"code-purchaseRequirementDetail")
budgetPrice = browser.find_element_by_id("template-center-mark").find_element_by_class_name(
"template-bookmark").find_element_by_tag_name("tbody").find_elements_by_class_name("code-budgetPrice")
estimatedPurchaseTime = browser.find_element_by_id("template-center-mark").find_element_by_class_name(
"template-bookmark").find_element_by_tag_name("tbody").find_elements_by_class_name("code-estimatedPurchaseTime")
beizhu = browser.find_element_by_id("template-center-mark").find_element_by_class_name(
"template-bookmark").find_element_by_tag_name("tbody").find_elements_by_class_name("code-remark")
for each in range(len(xuhao)):
value_input = str(_sindex) + ',' + str(_id) + ',' + _districtName + ',' + laiyuan + ',' + purchaseProjectName[
each].text + ',' + budgetPrice[each].text + ',' + estimatedPurchaseTime[each].text + '\n'
file_object.write(value_input)
print(value_input)
#config.set("Mark", "load_mark", str(_sindex))
#config.write(open(config_fileName, "w"))
except:
print('第1種sectionNo控件名沒有找到')
# =第2種情況 出現====================================================================================================
try:
print('試用第2種表格名的方式找數據,序號為:', _sindex)
rowsList = browser.find_element_by_class_name(
"form-panel-input-cls").find_element_by_tag_name("tbody").find_elements_by_tag_name("tr")
allContext = []
for eachRow in rowsList:
ColsList = [eachCol.text for eachCol in eachRow.find_elements_by_tag_name("td")]
allContext.append(ColsList)
# print(ColsList)
# print(allContext)
for each in range(len(allContext)):
if allContext[each][0] == "序号":
continue
value_input = str(_sindex) + ',' + str(_id) + ',' + _districtName + ',' + laiyuan + ',' + \
allContext[each][1] + ',' + allContext[each][3] + ',' + allContext[each][4] + '\n'
file_object.write(value_input)
print(value_input)
#config.set("Mark", "load_mark", str(_sindex))
#config.write(open(config_fileName, "w"))
except (ZeroDivisionError,TypeError) as e:
# =====================================================================================================
print('試用2種方式沒找到數據,序號為:', _sindex + e)
value_input = str(_sindex) + ',' + str(_id) + ',' + _districtName + ',' + _url + ',' + str(e) + '\n'
file_object.write(value_input)
pass
#browser.close()
except (ZeroDivisionError,TypeError) as e:
print('沒有載入網頁,序號為:', _sindex)
value_input = str(_sindex) + ',' + str(_id) + ',' + _districtName + ',' + _url + ',' + str(e) + '\n'
file_object.write(value_input)
pass
file_object_log.write( '序号:' + str(_sindex)+' 開始時間:' + str(datetime.datetime.now()) + '\n')
file_object_log.flush()
file_object.flush()
ret = random.uniform(2, 6)
time.sleep(ret)
browser.close()
if int(_sindex, 10) >0:
config.set("Mark", "load_mark", str(_sindex))
config.write(open(config_fileName, "w"))
file_object.close()
file_object_log.close()
read_file.close()