爬虫11111

-- coding: utf-8 --

import ttkbootstrap as ttk
from ttkbootstrap.constants import *
from ttkbootstrap.dialogs import Messagebox
import tkinter.messagebox
import threading
#import dy_comment_service
import time
import pandas as pd

import requests

def download(pageNo,shopUUid):
headers = {
“Accept”: “/”,
“Accept-Language”: “zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6”,
“Cache-Control”: “no-cache”,
“Connection”: “keep-alive”,
“Pragma”: “no-cache”,
“Referer”: “https://m.dianping.com/shop/la0ac8FgUTV4G4Wu?from=shoplist&shoplistqueryid=16ee7baa-3b3b-4831-9289-e35b398b780a”,
“Sec-Fetch-Dest”: “empty”,
“Sec-Fetch-Mode”: “cors”,
“Sec-Fetch-Site”: “same-origin”,
“User-Agent”: “Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Mobile Safari/537.36 Edg/100.0.1185.50”,
“channel”: “H5”,
“channelversion”: “undefined”,
“minaname”: “dianping-wxapp”,
“minaversion”: “8.9.3”,
“mtgsig”: “{“a1”:“1.0”,“a2”:1651202621659,“a3”:“1651202593614KKWKKQSfd79fef3d01d5e9aadc18ccd4d0c95072025”,“a4”:“79e54980f3f683788049e5797883f6f35bc9bd0a234a4bf9”,“a5”:“4xuHoD7Kw7dO6NdfhKFxjzSPJU0f5wgeS9BkHRnKXOT0RPdCkZTOreCRJSHAeQ4DSJc5kqI=”,“a6”:“h1.0AmS3UOXli8Mp23WhHc1yaFffC+Iq/LcDxPp4NUWS55zjxyQ+fru7iZfXbya9w1FAZHgg6gKGdGq7GM2yZfddvLQ8ij7tqdVdhYPMWKW/CDbbbOdpCHb2fkVDjHEntfSG7H8kp1jQW/6bAW1eRSBiAOeJVegcRNlMl7f8pSo0ykMmFffdJ7My1CbjhzBAy3ptGPpVpugoS/JtofBIduzuOI8ma6+bQu0moJhMi6exfopCnKR+dNndCWv6i5+VG95mrPDI0Vl7q2R5zNe6qKirR+/qTVHeSW1gROUviIiRf7iIM52UOFowV/h1hSl0EP3lPDg1vu35jULNKDwrl2ss18UsvT3K4rmIZ0PEBjxwcBkXR6OMJ15ccIiN0T43Vby9kEFu2lBZbScg49W2O2S4kze8J0gkDekJfU+vExQxN0KcEVcr7c1HcGiVO18rhy4PNpGnxIDYcLcTeahdZ1Zt2OPqtdziRaTfVpbsaax/9H6Z7DgxO579Y9CyHbWtZtvZjM4VY12/+qeNeHoT0lqLB1RyRq0gmeuA2bMfNaULQrMIPgmhkarvZ3ZP++9LCog0”,“x0”:4,“d1”:“553cf1cf2d9f6eb7ba963f6188f87f4e”}”,
“platform”: “undefined”,
“platformversion”: “undefined”,
“sdkversion”: “undefined”,
“sec-ch-ua”: “” Not A;Brand";v=“99”, “Chromium”;v=“100”, “Microsoft Edge”;v=“100"”,
“sec-ch-ua-mobile”: “?1”,
“sec-ch-ua-platform”: ““Android””,
“token”: “null”,
“wechatversion”: “undefined”
}

url = "https://m.dianping.com/ugc/review/shop/shopreview"
params = {
    "pageSize": "10",
    "pageCount": pageNo,
    # "mtsiReferrer": "https://m.dianping.com/shop/la0ac8FgUTV4G4Wu?from=shoplist&shoplistqueryid=16ee7baa-3b3b-4831-9289-e35b398b780a?from=shoplist&shoplistqueryid=16ee7baa-3b3b-4831-9289-e35b398b780a&shopUuid=la0ac8FgUTV4G4Wu&online=1&shopuuid=la0ac8FgUTV4G4Wu&shopId=la0ac8FgUTV4G4Wu&pageName=shop",
    "isNeedNewReview": "1",
    "shopUuid": shopUUid,
    "device_system": "ANDROID"
}
response = requests.get(url, headers=headers, params=params)

return response.json()

def parse(reviewBody):
bodys=list()
childrens=reviewBody[“children”]
for child in childrens:
if “name” in child :
continue
text=child[“text”]

    bodys.append(str(text))

return str("\r".join(bodys))

def crawler(pageNo,shopUUid):
if not bool(shopUUid):
shopUUid=“la0ac8FgUTV4G4Wu”

results=list()
js=download(pageNo,shopUUid)
# print(js)
reviewList=js["shopReviewInfo"]["reviewList"]
for review in reviewList:
    result={}
    result["点评人id"]=review["userId"]
    result["点评帖id"]=review["reviewId"]
    result["用户头像"]=review["userPhoto"]
    result["用户昵称"]=review["userNickName"]
    result["用户简介"] =review["authorDesc"]

    result["vip等级"]=review["vipLevel"]
    result["上次发布时间"] =review["lastTime"]

    result["点评最大点赞数"]=review["flowerTotal"]
    result["点评浏览数"]=review["browseCount"]

    result["点评内容"]=parse(review["reviewBody"])

    results.append(result)

return results

class MyThread(threading.Thread):
def init(self, func, *args):
super().init()

    self.func = func
    self.args = args

    self.setDaemon(True)
    self.start()  # 在这里开始

def run(self):
    return self.func(*self.args)

results=list()

def export_data():
df = pd.DataFrame(results)
# 将空的单元格替换为空字符
df.fillna(‘’, inplace=True)
df.to_excel(“D://点评评论.xlsx”, encoding=‘utf-8’, index=False)
print(“导出完毕”)
results.clear()
tkinter.messagebox.showinfo(‘提示’, ‘导出完毕,导出文件目录为 D://点评评论.xlsx’)

def delButton(tv):
x=tv.get_children()
print(x)
for item in x:
tv.delete(item)

def submit(input,tv):
shopUUid=input.get()

results.clear()
delButton(tv)
#只有是纯数字的才会跑 否则
res=crawler(4,shopUUid)
for i in res:
    results.append(i)
    tv.insert("", END, values=i.values())


tkinter.messagebox.showinfo('提示', '采集完毕,请导出')

def setup_demo(master):
master.geometry(“800x450+200+20”)
root = ttk.Frame(master, padding=10)

theme_selection = ttk.Frame(root, padding=(10, 10, 10, 0),height=4)
theme_selection.pack(fill=X, expand=YES)

theme_selected = ttk.Label(
    master=theme_selection, text="山东景区点评评论采集", font="-size 24 -weight bold"
)
theme_selected.pack(side=LEFT)

ttk.Separator(root).pack(fill=X, pady=10, padx=10)



lframe = ttk.Frame(root, padding=5)
lframe.pack(side=LEFT, fill=BOTH, expand=YES)

rframe = ttk.Frame(root, padding=5)
rframe.pack(side=RIGHT, fill=BOTH, expand=YES)

color_group = ttk.Labelframe(
    master=lframe, text="请输入点评shopUuid", padding=10
)
color_group.pack(fill=X, side=TOP)

input_dy=ttk.Entry(color_group,text="shopUuid",width=60)
input_dy.pack(side=LEFT,padx=5,fill=X)


ttframe = ttk.Frame(lframe)
ttframe.pack(pady=5, fill=X, side=TOP)

# table_data = [
#     ("张三", "这写的真好","2021",3),
#     ("张三", "这写的真好", "2021", 3)
# ]

tv = ttk.Treeview(master=ttframe, columns=[0,1,2,3,4,5,6,7,8,9], show=HEADINGS, height=10)
# for row in table_data:
#     tv.insert("", END, values=row)


tv.heading(0, text="点评人id",)
tv.heading(1, text="点评帖id")
tv.heading(2, text="用户头像")
tv.heading(3, text="用户昵称")
tv.heading(4, text="用户简介")
tv.heading(5, text="vip等级")
tv.heading(6, text="上次发布时间")
tv.heading(7, text="点评点赞数")
tv.heading(8, text="点评浏览数")
tv.heading(9, text="点评内容")

tv.column(0,  anchor=CENTER,width="80")
tv.column(1, anchor=CENTER,width="80")
tv.column(2, anchor=CENTER,width="80")
tv.column(3, anchor=CENTER,width="80")
tv.column(4, anchor=CENTER,width="80")
tv.column(5, anchor=CENTER,width="80")
tv.column(6, anchor=CENTER,width="80")
tv.column(7, anchor=CENTER,width="80")
tv.column(8, anchor=CENTER,width="80")
tv.column(9, anchor=CENTER,)


tv.pack(side=LEFT, anchor=NE, fill=X)



export = ttk.Button(color_group, text="导出", bootstyle="success", width=10,
                    command=lambda: MyThread(export_data))
export.pack(side=RIGHT, expand=YES, padx=5, fill=X)


cb = ttk.Button(color_group, text="提交", bootstyle="success", width=10,
                command=lambda: MyThread(submit, input_dy,tv ))

cb.pack(side=RIGHT, expand=YES, padx=5, fill=X)

return root

if name == “main”:
app = ttk.Window(“点评评论采集”)
bagel = setup_demo(app)
bagel.pack(fill=BOTH, expand=YES)
app.mainloop()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值