python简单爬虫代码

业务需求:给到一个Excel文件里面有店铺名称,在酒仙网搜索店铺,查看是否关店和商品是否下架

在这里插入图片描述

# -*- coding: UTF-8 -*-

import requests
from tkinter import *
from tkinter import filedialog, messagebox, ttk
import xlrd
import xlwt
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor


class ExcelEntity:
    __name1 = ''
    __name2 = ''
    __key3 = ''
    __key4 = ''
    __state5 = ''
    __state6 = ''

    def set_name1(self, name1):
        self.__name1 = name1

    def get_name1(self):
        return self.__name1

    def set_name2(self, name2):
        self.__name2 = name2

    def get_name2(self):
        return self.__name2

    def set_key3(self, key3):
        self.__key3 = key3

    def get_key3(self):
        return self.__key3

    def set_key4(self, key4):
        self.__key4 = key4

    def get_key4(self):
        return self.__key4

    def get_state5(self):
        return self.__state5

    def set_state5(self, state5):
        self.__state5 = state5

    def get_state6(self):
        return self.__state6

    def set_state6(self, state6):
        self.__state6 = state6


# 解析html文本
def readHtml(b):
    if '店铺名称' in b.get_name2():
        b.set_state5('备注')
        b.set_state6('状态')
    else:
        url = 'http://list.jiuxian.com/search.htm?key=' + b.get_name2() + '&area=2'
        # 使用BeautifulSoup解析html文本
        html = BeautifulSoup(requests.get(url).text, features="lxml")
        noticBox = html.find_all('div', class_='notic_box')
        if len(noticBox) > 0:
            b.set_state5('已下架')
            b.set_state6('关店')
        else:
            b.set_state5('未下架')
            b.set_state6('在开')
    return b


# 读取excel
def readExcel(name):
    list = []
    wb = xlrd.open_workbook(name)
    sheet1 = wb.sheet_by_index(0)
    rowAllIndex = sheet1.nrows
    for rowIndex in range(0, rowAllIndex):
        bean = ExcelEntity()
        for colIndex in range(0, sheet1.ncols):
            value = sheet1.cell_value(rowIndex, colIndex)
            if value is None:
                value = ''
            if colIndex == 0:
                bean.set_name1(value)
            elif colIndex == 1:
                bean.set_name2(value)
            elif colIndex == 2:
                bean.set_key3(value)
            elif colIndex == 3:
                bean.set_key4(value)
            elif colIndex == 4:
                bean.set_state5(value)
            elif colIndex == 5:
                bean.set_state6(value)
        list.append(bean)
    # 线程池
    executor = ThreadPoolExecutor(max_workers=100)
    for data in executor.map(readHtml, list):
        ri = list.index(data)+1
        ai = len(list)
        strVar2.set('解析html中...')
        progressbarOne['value'] = ri * 100 / ai
        strVar3.set(str(ri) + '/' + str(ai))
        # 刷新页面
        root.update()
        print(data.get_name2())
    return list


# 创建新的excel并写入
def writeExcel(readList):
    strVar2.set('写入excel表格中...')
    book = xlwt.Workbook()
    book.add_sheet('sheet1')
    sheet1 = book.get_sheet(0)
    for b in readList:
        index = readList.index(b)
        ind = readList.index(b)+1
        allIndex = len(readList)
        progressbarOne['value'] = ind * 100 / allIndex
        strVar3.set(str(ind) + '/' + str(allIndex))
        root.update()
        sheet1.write(index, 0, b.get_name1())
        sheet1.write(index, 1, b.get_name2())
        sheet1.write(index, 2, b.get_key3())
        sheet1.write(index, 3, b.get_key4())
        sheet1.write(index, 4, b.get_state5())
        sheet1.write(index, 5, b.get_state6())
    book.save('./店铺数量New.xls')
    strVar2.set('写入完成!在该应用所在的目录下找到【店铺数量New.xls】文件')


# 选择文件
def funOpen():
    fileName = filedialog.askopenfilename(
        title='选择文件',
        filetypes=[("Excel文件", "*.xlsx"), ("Excel文件", "*.xls")],
        initialdir='./'
    )
    if fileName is not None and len(fileName) > 0:
        strVar1.set(fileName)
        writeExcel(readExcel(fileName))

if __name__ == '__main__':
    print()
root = Tk()
root.title('酒仙网店铺爬虫')
root.minsize(400, 300)
# root.grid_rowconfigure([1, 2], weight=1)
# root.grid_columnconfigure([1, 2, 3, 4], weight=1)
root.columnconfigure(1, weight=1)
root.rowconfigure(1, weight=2)
root.rowconfigure(2, weight=1)
root.rowconfigure(3, weight=2)
strVar1 = StringVar()
strVar2 = StringVar()
strVar3 = StringVar()
strVar1.set('将要显示的文件名')
strVar2.set('进度说明')
strVar3.set('0/0')
btnOpen = Button(root, text='选择Excel文件', command=funOpen).grid(row=1, column=1)
labelName = Label(root, textvariable=strVar1).grid(row=2, column=1)
f1 = Frame(root)
labelProgress = Label(f1, textvariable=strVar2).grid(row=1, column=1)
labelProgressIndex = Label(f1, textvariable=strVar3).grid(row=2, column=2)
# 进度条
progressbarOne = ttk.Progressbar(f1)
# 进度值最大值
progressbarOne['maximum'] = 100
# 进度值初始值
progressbarOne['value'] = 0
progressbarOne.grid(row=2, column=1)
f1.grid(row=3, column=1)
root.mainloop()

首先要先安装pyinstaller,pip install pyinstaller ,最后通过命令 pyinstaller -F xxx.py --noconsole打包成exe可执行程序

  • 0
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值