在这里插入代码片
```# -*- coding: utf-8 -*-
import ttkbootstrap as ttk
from ttkbootstrap.constants import *
from ttkbootstrap.dialogs import Messagebox
import tkinter.messagebox
import threading
#import dy_comment_service
import time
import pandas as pd
import requests
def download(pageNo,shopUUid):
headers = {
"Accept": "*/*",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Pragma": "no-cache",
"Referer": "https://m.dianping.com/shop/la0ac8FgUTV4G4Wu?from=shoplist&shoplistqueryid=16ee7baa-3b3b-4831-9289-e35b398b780a",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Mobile Safari/537.36 Edg/100.0.1185.50",
"channel": "H5",
"channelversion": "undefined",
"minaname": "dianping-wxapp",
"minaversion": "8.9.3",
"mtgsig": "{\"a1\":\"1.0\",\"a2\":1651202621659,\"a3\":\"1651202593614KKWKKQSfd79fef3d01d5e9aadc18ccd4d0c95072025\",\"a4\":\"79e54980f3f683788049e5797883f6f35bc9bd0a234a4bf9\",\"a5\":\"4xuHoD7Kw7dO6NdfhKFxjzSPJU0f5wgeS9BkHRnKXOT0RPdCkZTOreCRJSHAeQ4DSJc5kqI=\",\"a6\":\"h1.0AmS3UOXli8Mp23WhHc1yaFffC+Iq/LcDxPp4NUWS55zjxyQ+fru7iZfXbya9w1FAZHgg6gKGdGq7GM2yZfddvLQ8ij7tqdVdhYPMWKW/CDbbbOdpCHb2fkVDjHEntfSG7H8kp1jQW/6bAW1eRSBiAOeJVegcRNlMl7f8pSo0ykMmFffdJ7My1CbjhzBAy3ptGPpVpugoS/JtofBIduzuOI8ma6+bQu0moJhMi6exfopCnKR+dNndCWv6i5+VG95mrPDI0Vl7q2R5zNe6qKirR+/qTVHeSW1gROUviIiRf7iIM52UOFowV/h1hSl0EP3lPDg1vu35jULNKDwrl2ss18UsvT3K4rmIZ0PEBjxwcBkXR6OMJ15ccIiN0T43Vby9kEFu2lBZbScg49W2O2S4kze8J0gkDekJfU+vExQxN0KcEVcr7c1HcGiVO18rhy4PNpGnxIDYcLcTeahdZ1Zt2OPqtdziRaTfVpbsaax/9H6Z7DgxO579Y9CyHbWtZtvZjM4VY12/+qeNeHoT0lqLB1RyRq0gmeuA2bMfNaULQrMIPgmhkarvZ3ZP++9LCog0\",\"x0\":4,\"d1\":\"553cf1cf2d9f6eb7ba963f6188f87f4e\"}",
"platform": "undefined",
"platformversion": "undefined",
"sdkversion": "undefined",
"sec-ch-ua": "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"100\", \"Microsoft Edge\";v=\"100\"",
"sec-ch-ua-mobile": "?1",
"sec-ch-ua-platform": "\"Android\"",
"token": "null",
"wechatversion": "undefined"
}
url = "https://m.dianping.com/ugc/review/shop/shopreview"
params = {
"pageSize": "10",
"pageCount": pageNo,
# "mtsiReferrer": "https://m.dianping.com/shop/la0ac8FgUTV4G4Wu?from=shoplist&shoplistqueryid=16ee7baa-3b3b-4831-9289-e35b398b780a?from=shoplist&shoplistqueryid=16ee7baa-3b3b-4831-9289-e35b398b780a&shopUuid=la0ac8FgUTV4G4Wu&online=1&shopuuid=la0ac8FgUTV4G4Wu&shopId=la0ac8FgUTV4G4Wu&pageName=shop",
"isNeedNewReview": "1",
"shopUuid": shopUUid,
"device_system": "ANDROID"
}
response = requests.get(url, headers=headers, params=params)
print(response.json())
return response.json()
def parse(reviewBody):
bodys=list()
childrens=reviewBody["children"]
for child in childrens:
if "name" in child :
continue
text=child["text"]
bodys.append(str(text))
return str("\r".join(bodys))
def crawler(pageNo,shopUUid):
if not bool(shopUUid):
shopUUid="la0ac8FgUTV4G4Wu"
results=list()
js=download(pageNo,shopUUid)
# print(js)
reviewList=js["shopReviewInfo"]["reviewList"]
for review in reviewList:
result={}
result["点评人id"]=review["userId"]
result["点评帖id"]=review["reviewId"]
result["用户头像"]=review["userPhoto"]
result["用户昵称"]=review["userNickName"]
result["用户简介"] =review["authorDesc"]
result["vip等级"]=review["vipLevel"]
result["上次发布时间"] =review["lastTime"]
result["点评最大点赞数"]=review["flowerTotal"]
result["点评浏览数"]=review["browseCount"]
result["点评内容"]=parse(review["reviewBody"])
results.append(result)
return results
class MyThread(threading.Thread):
def __init__(self, func, *args):
super().__init__()
self.func = func
self.args = args
self.setDaemon(True)
self.start() # 在这里开始
def run(self):
return self.func(*self.args)
results=list()
def export_data():
df = pd.DataFrame(results)
# 将空的单元格替换为空字符
df.fillna('', inplace=True)
df.to_excel("D://点评评论.xlsx", encoding='utf-8', index=False)
print("导出完毕")
results.clear()
tkinter.messagebox.showinfo('提示', '导出完毕,导出文件目录为 D://点评评论.xlsx')
def delButton(tv):
x=tv.get_children()
print(x)
for item in x:
tv.delete(item)
def submit(input,tv):
shopUUid=input.get()
results.clear()
delButton(tv)
#只有是纯数字的才会跑 否则
res=crawler(4,shopUUid)
for i in res:
results.append(i)
tv.insert("", END, values=i.values())
tkinter.messagebox.showinfo('提示', '采集完毕,请导出')
def setup_demo(master):
master.geometry("800x450+200+20")
root = ttk.Frame(master, padding=10)
theme_selection = ttk.Frame(root, padding=(10, 10, 10, 0),height=4)
theme_selection.pack(fill=X, expand=YES)
theme_selected = ttk.Label(
master=theme_selection, text="山东景区点评评论采集", font="-size 24 -weight bold"
)
theme_selected.pack(side=LEFT)
ttk.Separator(root).pack(fill=X, pady=10, padx=10)
lframe = ttk.Frame(root, padding=5)
lframe.pack(side=LEFT, fill=BOTH, expand=YES)
rframe = ttk.Frame(root, padding=5)
rframe.pack(side=RIGHT, fill=BOTH, expand=YES)
color_group = ttk.Labelframe(
master=lframe, text="请输入点评shopUuid", padding=10
)
color_group.pack(fill=X, side=TOP)
input_dy=ttk.Entry(color_group,text="shopUuid",width=60)
input_dy.pack(side=LEFT,padx=5,fill=X)
ttframe = ttk.Frame(lframe)
ttframe.pack(pady=5, fill=X, side=TOP)
# table_data = [
# ("张三", "这写的真好","2021",3),
# ("张三", "这写的真好", "2021", 3)
# ]
tv = ttk.Treeview(master=ttframe, columns=[0,1,2,3,4,5,6,7,8,9], show=HEADINGS, height=10)
# for row in table_data:
# tv.insert("", END, values=row)
tv.heading(0, text="点评人id",)
tv.heading(1, text="点评帖id")
tv.heading(2, text="用户头像")
tv.heading(3, text="用户昵称")
tv.heading(4, text="用户简介")
tv.heading(5, text="vip等级")
tv.heading(6, text="上次发布时间")
tv.heading(7, text="点评点赞数")
tv.heading(8, text="点评浏览数")
tv.heading(9, text="点评内容")
tv.column(0, anchor=CENTER,width="80")
tv.column(1, anchor=CENTER,width="80")
tv.column(2, anchor=CENTER,width="80")
tv.column(3, anchor=CENTER,width="80")
tv.column(4, anchor=CENTER,width="80")
tv.column(5, anchor=CENTER,width="80")
tv.column(6, anchor=CENTER,width="80")
tv.column(7, anchor=CENTER,width="80")
tv.column(8, anchor=CENTER,width="80")
tv.column(9, anchor=CENTER,)
tv.pack(side=LEFT, anchor=NE, fill=X)
export = ttk.Button(color_group, text="导出", bootstyle="success", width=10,
command=lambda: MyThread(export_data))
export.pack(side=RIGHT, expand=YES, padx=5, fill=X)
cb = ttk.Button(color_group, text="提交", bootstyle="success", width=10,
command=lambda: MyThread(submit, input_dy,tv ))
cb.pack(side=RIGHT, expand=YES, padx=5, fill=X)
return root
if __name__ == "__main__":
app = ttk.Window("点评评论采集")
bagel = setup_demo(app)
bagel.pack(fill=BOTH, expand=YES)
app.mainloop()
1222222222
最新推荐文章于 2024-04-07 11:05:38 发布