代码如下
import random
import time
from tkinter import filedialog
import tkinter as tk
import xlrd
import os
import datetime
import csv
from selenium import webdriver
from selenium.webdriver import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def chooseFile():
root = tk.Tk()
root.withdraw()
f_path = filedialog.askopenfilename(title='选择公司列表文件', filetypes=[('xlsx', '*.XLSX'), ('xls', '*.xls')])
if f_path == '':
return f_path
print('\n获取的文件地址:', f_path)
with xlrd.open_workbook(f_path, formatting_info=False) as workbook:
sheet1 = workbook.sheet_by_index(0)
return f_path
def DList(tagList):
nList = []
if len(tagList) == 0:
nList.append(" ")
nList.append(" ")
nList.append(" ")
else:
nList.append(tagList[0])
nList.append(tagList[1])
nList.append(tagList[2])
return nList
def main():
xlsFile = chooseFile()
if xlsFile == '':
return
workbook = xlrd.open_workbook(xlsFile, formatting_info=False)
sheet1 = workbook.sheet_by_index(0)
iIndex = 0
allUrls = []
while iIndex < sheet1.nrows:
commodityId = str(sheet1.cell_value(iIndex, 0))
allUrls.append([commodityId])
iIndex = iIndex + 1
currentDir = os.getcwd()
dirName = currentDir + "//ouput//"
curr_time = datetime.datetime.now()
strNow = datetime.datetime.strftime(curr_time, '%Y%m%d%H%M%S')
dirName = dirName + strNow
os.makedirs(dirName)
outCsv = dirName + "/out.csv"
userAgent = [
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.5735.289 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0",
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.5735.289 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 SE 2.X MetaSr 1.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 SLBrowser/9.0.3.1311 SLBChan/128",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.5735.289 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0"]
chrome_location = r'Chrome-bin\chrome.exe'
options = webdriver.ChromeOptions()
options.binary_location = chrome_location
options.add_experimental_option('detach', True)
options.add_experimental_option('excludeSwitches', ['enable-automation']