业务需求:给到一个Excel文件里面有店铺名称,在酒仙网搜索店铺,查看是否关店和商品是否下架
# -*- coding: UTF-8 -*-
import requests
from tkinter import *
from tkinter import filedialog, messagebox, ttk
import xlrd
import xlwt
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor
class ExcelEntity:
__name1 = ''
__name2 = ''
__key3 = ''
__key4 = ''
__state5 = ''
__state6 = ''
def set_name1(self, name1):
self.__name1 = name1
def get_name1(self):
return self.__name1
def set_name2(self, name2):
self.__name2 = name2
def get_name2(self):
return self.__name2
def set_key3(self, key3):
self.__key3 = key3
def get_key3(self):
return self.__key3
def set_key4(self, key4):
self.__key4 = key4
def get_key4(self):
return self.__key4
def get_state5(self):
return self.__state5
def set_state5(self, state5):
self.__state5 = state5
def get_state6(self):
return self.__state6
def set_state6(self, state6):
self.__state6 = state6
# 解析html文本
def readHtml(b):
if '店铺名称' in b.get_name2():
b.set_state5('备注')
b.set_state6('状态')
else:
url = 'http://list.jiuxian.com/search.htm?key=' + b.get_name2() + '&area=2'
# 使用BeautifulSoup解析html文本
html = BeautifulSoup(requests.get(url).text, features="lxml")
noticBox = html.find_all('div', class_='notic_box')
if len(noticBox) > 0:
b.set_state5('已下架')
b.set_state6('关店')
else:
b.set_state5('未下架')
b.set_state6('在开')
return b
# 读取excel
def readExcel(name):
list = []
wb = xlrd.open_workbook(name)
sheet1 = wb.sheet_by_index(0)
rowAllIndex = sheet1.nrows
for rowIndex in range(0, rowAllIndex):
bean = ExcelEntity()
for colIndex in range(0, sheet1.ncols):
value = sheet1.cell_value(rowIndex, colIndex)
if value is None:
value = ''
if colIndex == 0:
bean.set_name1(value)
elif colIndex == 1:
bean.set_name2(value)
elif colIndex == 2:
bean.set_key3(value)
elif colIndex == 3:
bean.set_key4(value)
elif colIndex == 4:
bean.set_state5(value)
elif colIndex == 5:
bean.set_state6(value)
list.append(bean)
# 线程池
executor = ThreadPoolExecutor(max_workers=100)
for data in executor.map(readHtml, list):
ri = list.index(data)+1
ai = len(list)
strVar2.set('解析html中...')
progressbarOne['value'] = ri * 100 / ai
strVar3.set(str(ri) + '/' + str(ai))
# 刷新页面
root.update()
print(data.get_name2())
return list
# 创建新的excel并写入
def writeExcel(readList):
strVar2.set('写入excel表格中...')
book = xlwt.Workbook()
book.add_sheet('sheet1')
sheet1 = book.get_sheet(0)
for b in readList:
index = readList.index(b)
ind = readList.index(b)+1
allIndex = len(readList)
progressbarOne['value'] = ind * 100 / allIndex
strVar3.set(str(ind) + '/' + str(allIndex))
root.update()
sheet1.write(index, 0, b.get_name1())
sheet1.write(index, 1, b.get_name2())
sheet1.write(index, 2, b.get_key3())
sheet1.write(index, 3, b.get_key4())
sheet1.write(index, 4, b.get_state5())
sheet1.write(index, 5, b.get_state6())
book.save('./店铺数量New.xls')
strVar2.set('写入完成!在该应用所在的目录下找到【店铺数量New.xls】文件')
# 选择文件
def funOpen():
fileName = filedialog.askopenfilename(
title='选择文件',
filetypes=[("Excel文件", "*.xlsx"), ("Excel文件", "*.xls")],
initialdir='./'
)
if fileName is not None and len(fileName) > 0:
strVar1.set(fileName)
writeExcel(readExcel(fileName))
if __name__ == '__main__':
print()
root = Tk()
root.title('酒仙网店铺爬虫')
root.minsize(400, 300)
# root.grid_rowconfigure([1, 2], weight=1)
# root.grid_columnconfigure([1, 2, 3, 4], weight=1)
root.columnconfigure(1, weight=1)
root.rowconfigure(1, weight=2)
root.rowconfigure(2, weight=1)
root.rowconfigure(3, weight=2)
strVar1 = StringVar()
strVar2 = StringVar()
strVar3 = StringVar()
strVar1.set('将要显示的文件名')
strVar2.set('进度说明')
strVar3.set('0/0')
btnOpen = Button(root, text='选择Excel文件', command=funOpen).grid(row=1, column=1)
labelName = Label(root, textvariable=strVar1).grid(row=2, column=1)
f1 = Frame(root)
labelProgress = Label(f1, textvariable=strVar2).grid(row=1, column=1)
labelProgressIndex = Label(f1, textvariable=strVar3).grid(row=2, column=2)
# 进度条
progressbarOne = ttk.Progressbar(f1)
# 进度值最大值
progressbarOne['maximum'] = 100
# 进度值初始值
progressbarOne['value'] = 0
progressbarOne.grid(row=2, column=1)
f1.grid(row=3, column=1)
root.mainloop()
首先要先安装pyinstaller,pip install pyinstaller ,最后通过命令 pyinstaller -F xxx.py --noconsole打包成exe可执行程序