接上次的代码继续完善。添加了爬虫翻页的功能,所以采集的数据显示在同一个Treeview表格中。后面还会继续完善这个框架。等完善了可能会开源。
先看结果图。
窗口代码,self.tree = Treeview(root,show="headings",) 这个是表格的窗口,创建的时候不需要写入columns=('serial','id','singer', 'name', 'url')标题的参数,在后面分离的时候再写Treeview逻辑
import tkinter as tk
from tkinter import PhotoImage,Entry,Button
from tkinter.ttk import LabelFrame,Label,Treeview,Style,Scrollbar
class Slider_tk:
def __init__(self,root=None,):
self.root = root
self.root.title("slider_kz歌曲搜索")
self.root.minsize(1000, 490)
# self.root.resizable(False, False)
pdh = './Image/ico_o.ico'
icon = PhotoImage(file=pdh)
root.iconphoto(True, icon)
self.Labe_lFram3 = LabelFrame(self.root, text='告示')
self.Labe_lFrame = LabelFrame( self.root, text='操作界面')
self.Labe_lFram2 = LabelFrame( self.root, text='数据界面')
self.Labe_lFram4 = LabelFrame(self.root,)
self.Labe_lFram5 = LabelFrame(self.root, )
self.Labe_lFram6 = LabelFrame(self.root, )
self.label1 = Label(self.root, text='双击为下载,下载完毕有提示!请勿填写数字和空,歌曲下载慢,是服务器的原因,要是没有返回结果,需要等一段时间!DJ舞曲搜索请舞曲编号或关键字', )
self.label = Label(self.root, text='zk查找:', )
self.entry = Entry( self.root, font=('Arial 10 bold'), width=33)
self.submit_button = Button(self.root, text="搜索", width=6, height=1, font=('Arial 10 bold'),)
self.label2 = Label(self.root, text='网易查找:', )
self.entry2 = Entry(self.root, font=('Arial 10 bold'), width=33)
self.submit_button2 = Button(self.root, text="搜索", width=6, height=1, font=('Arial 10 bold'), )
self.label_dj = Label(self.root, text='DJ舞曲查找:', )
self.label_pa = Label(self.root, text='输入页数:', )
self.entry2_dj = Entry(self.root, font=('Arial 10 bold'), width=25)
self.entry2_pa = Entry(self.root, font=('Arial 10 bold'), width=25)
self.submit_button_dj = Button(self.root, text="搜索", width=6, height=1, font=('Arial 10 bold'), )
self.submit_button_pe = Button(self.root, text="翻页", width=6, height=1, font=('Arial 10 bold'), )
self.tree = Treeview(root,show="headings",)#columns=('serial','id','singer', 'name', 'url'),
style = Style()
style.theme_use("default")
style.map("Treeview")
self.center_window(520, 480) # 屏幕居中
def center_window(self, width, height):
# 获取屏幕的尺寸
x = (self.root.winfo_screenwidth() - width) // 2
y = (self.root.winfo_screenheight() - height) // 2
# 设置窗口的位置
self.root.geometry(f'{width}x{height}+{x}+{y}')
这里是窗口布局和最后处理爬虫的逻辑。
"""
# @当前时间 :2024/8/19 13:08
# @Author : TS
# @Email : TS@gmail.com
# @File : ts2.py
# @Software: PyCharm
"""
import math
from concurrent.futures import ThreadPoolExecutor
from tkinter import filedialog, messagebox
from slider_tk import Slider_tk
from slider_kz import Slide_kz
from slider_kz import Fart_pi
from dj_zk import Yj_youyou
class Slider_subassembly(Slider_tk):
def __init__(self, root=None):
super().__init__(root=root)
self.root = root
self.kz = Slide_kz()
self.fp = Fart_pi()
self.dj = Yj_youyou()
self.submit_button_dj.config(command=self.dj_searcj)
self.submit_button.config(command=self.on_submit)
self.submit_button2.config(command=self.on_submt)
self.submit_button_pe.config(command=self.Page)
self.tree.bind("<Button-1>", self.on_tree_click)
self.show()
#
def show(self):
self.label1.place(x=10, y=20, )
self.Labe_lFram6.place(x=2, y=190, width=400, height=5)
self.Labe_lFram5.place(x=2, y=110, width=400, height=5)
self.Labe_lFram4.place(x=2, y=150, width=400, height=5)
self.Labe_lFram3.place(x=2, y=1, width=997, height=50)
self.Labe_lFrame.place(x=2, y=50, width=400, height=448)
self.Labe_lFram2.place(x=400, y=50, width=600, height=440)
self.label.place(x=4, y=80, )
self.entry.place(x=100, y=80)
self.submit_button.place(x=340, y=78,)
self.label2.place(x=4, y=125, )
self.entry2.place(x=100, y=125)
self.submit_button2.place(x=340, y=120, )
self.tree.place(x=404, y=68, height=418, width=593, )
# 绑定鼠标点击事件
self.label_dj.place(x=4, y=165)
self.label_pa.place(x=4, y=210)
self.entry2_dj.place(x=80, y=165)
self.entry2_pa.place(x=80, y=210)
self.submit_button_dj.place(x=270, y=160)
self.submit_button_pe.place(x=340, y=210)
def search_(self):
self.tree['columns'] = ('serial', 'id', 'singer', 'name', 'url')
for col in self.tree['columns']:
self.tree.heading(col, text=col)
self.tree.column('serial', width=2, )
self.tree.column('id', width=2, )
self.tree.column('singer', width=2)
self.tree.column('name', width=2, )
self.tree.column('url', width=2, )
n=0
params = self.entry.get()
pase = self.kz.requests(params)
dict_ =self.kz.pase(pase)
for di_ct in dict_:
id = di_ct['id']
song_name = di_ct['song_name']
singer = di_ct['singer']
url_ = di_ct['url']
self.tree.insert("", "end", values=(n, id, singer,song_name, url_))
n+=1
def Page(self):
Page = self.entry2_pa.get()
self.tree.delete(*self.tree.get_children())
self.dj_searcj(Page)
def dj_searcj(self,page=None):
self.tree['columns'] = ('serial', 'id','name','time', '页数', 'url')
for col in self.tree['columns']:
self.tree.heading(col, text=col)
self.tree.column('serial', width=2, )
self.tree.column('id', width=2, )
self.tree.column('页数', width=2)
self.tree.column('name', width=2, )
self.tree.column('time', width=2, )
self.tree.column('url', width=2, )
n = 0
params = self.entry2_dj.get()
pase = self.dj.requests(params,page)
dict_ = self.dj.url_dq(pase)
for i in dict_:
name = i['name']
Pages = i['Recording'] #页数
li_url = i['li_url']
id_st = i['id_st']
time_s=i['time_s']
divisor = 20
result = math.ceil(int(Pages) / divisor)
# print(n, id_st, name,time_s,result,li_url)
self.tree.insert("", "end", values=(n, id_st, name,time_s,result,li_url))
n += 1
def on_tree_cli(self):
self.tree['columns'] = ('serial', 'id', 'singer', 'name', 'url')
for col in self.tree['columns']:
self.tree.heading(col, text=col)
self.tree.column('serial', width=2, )
self.tree.column('id', width=2, )
self.tree.column('singer', width=2)
self.tree.column('name', width=2, )
self.tree.column('url', width=2, )
n=0
params = self.entry2.get()
pase=self.fp.res(params)
dict_=self.fp.pase(pase)
for di_ct in dict_:
id = di_ct['songid']
song_name = di_ct['author']
singer = di_ct['title']
url_ = di_ct['Location']
self.tree.insert("", "end", values=(n, id, singer, song_name, url_))
n += 1
def on_tree_click(self, event):
if event.widget == self.tree:
# 获取双击的item
item = self.tree.identify_row(event.y)
if item:
# 获取item的values
values = self.tree.item(item, "values")
# # 假设values是一个元组,包含了歌曲的id, song_name, singer, url等信息
url = values[-1]
song_name = values[2]
download_message = "是否下载"
if messagebox.askyesno("Download",download_message):
self.fp.save_mp3(url,song_name)
# messagebox.showinfo("下载完毕!",download_messa)
messagebox.askyesno("下载完毕", song_name)
def on_submit(self):
self.tree.delete(*self.tree.get_children()) # 清空树形控件数据
self.search_() # 传入回调函数
def on_submt(self):
self.tree.delete(*self.tree.get_children()) # 清空树形控件数据
self.on_tree_cli() # 传入回调函数
def dj_submt(self):
self.tree.delete(*self.tree.get_children()) # 清空树形控件数据
self.dj_searcj()
这里是布局代码
事件初始化调用
Treeview列表标题在这里处理。其中你想展示那个爬虫的自己定义代码。可以写多个这样的函数调用。
其中一个爬虫代码。里面的url脱敏处理了。
import os
import re
import requests
from slider_tk import Slider_tk
class Yj_youyou:
def __init__(self):
pass
def cookies(self):
cookies = {
'Hm_lvt_93e672d9487d1b71d59dffcaaca8cf4d': '1724142971',
'HMACCOUNT': 'E543AEA66C42B296',
'Hm_lvt_1602614188207057874070b514c435ac': '1724142971',
'musicls': '%7C247911%7C%2C',
'djuu_mlog': '%7C247911%7C%2C',
'PLAYSTYLE': '0',
'bf': '1',
'PHPSESSID': '47q8h5jpk33r19fuop0dq59sik',
'search_log': '%2C%u4F24%u611F',
'Hm_lpvt_93e672d9487d1b71d59dffcaaca8cf4d': '1724154335',
'Hm_lpvt_1602614188207057874070b514c435ac': '1724154335',
}
return cookies
def headers(self):
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
# 'Cookie': 'Hm_lvt_93e672d9487d1b71d59dffcaaca8cf4d=1724142971; HMACCOUNT=E543AEA66C42B296; Hm_lvt_1602614188207057874070b514c435ac=1724142971; musicls=%7C247911%7C%2C; djuu_mlog=%7C247911%7C%2C; PLAYSTYLE=0; bf=1; PHPSESSID=47q8h5jpk33r19fuop0dq59sik; search_log=%2C%u4F24%u611F; Hm_lpvt_93e672d9487d1b71d59dffcaaca8cf4d=1724154335; Hm_lpvt_1602614188207057874070b514c435ac=1724154335',
'Pragma': 'no-cache',
'Referer': Base64 解密aHR0cHM6Ly93d3cuZGp1dS5jb20vc2VhcmNoP211c2ljbmFtZT0lRTQlQkMlQTQlRTYlODQlOUYmbGlzdD0yJmNpZD0wJnBhZ2U9Mw==,
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': '你自己的U-a',
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
return headers
def params(self):
params = {
'musicname': '伤感',
'list': '2',
'cid': '0',
'page': '1',
}
return params
def requests(self, musicname, page=None):
data = self.params()
data['musicname'] = musicname
data['page'] = page
res = requests.get('aHR0cHM6Ly93d3cuZGp1dS5jb20vc2VhcmNo', params=data, cookies=self.cookies(), headers=self.headers()).text
res_url = re.findall(r'\<a href\=\"(.*?)" target\=\"_Pt\" title\=\".*?"\>\<img ', res)
Recording = re.findall(
r'\<div style\=\"line\-height\:24px\; text\-indent\: 10px\; color\:\#c7c7c7 \"\> 共搜索到 \<span class\=\"fbold\" style\=\"color\: \#de651d\"\>(.*?)\<\/span\> 个关于\‘\<span id\=\".*?"\>\<em\>\<\/em\>\<\/span\>\’的记录\. \<\/div\>',
res)[0]
time_s = re.findall(r'\<\/a\>\<\/span\>\<span class\=\"sc_2\"\>TIME (.*?)\<\/span\>', res)
dict = [{
'res_url': res_url,
'Recording': Recording,
'time_s':time_s
}]
return dict
def url_dq(self, musicname):
dict={}
for i in musicname:
for j, k in zip(i['res_url'], i['time_s']):
dict['Recording'] = i['Recording']
url = 'aHR0cHM6Ly93d3cuZGp1dS5jb20v' + j
res = requests.get(url=url, cookies=self.cookies(), headers=self.headers()).text
# print(res)
dict['name'] = re.findall(r'\<h1\>(.*?)<\/h1\>', res)[0]
li_st = re.findall(
r"var music \= \{id\: .*?\, type\: \'.*?\'\, name\: \'.*?'\, file\: \'(.*?)\'\, .*?}\,",
res)[0]
dict['li_url'] = f'https://mp4.djuu.com/{li_st}.m4a'
dict['id_st'] = re.findall(
r"var music \= \{id\: (.*?)\, type\: \'.*?\'\, name\: \'.*?'\, file\: \'.*?\'\, .*?}\,",
res)[0]
dict['time_s'] = k
yield dict
def save_mp3(self, url, song_name):
res = requests.get(url=url, cookies=self.cookies(), headers=self.headers()).content
sfolder_path = 'Song_saving'
if not os.path.exists(sfolder_path):
# 文件不存在,创建一个新的的文件
os.makedirs(sfolder_path)
with open(sfolder_path + '\\' + song_name + '.mp3', mode='wb') as f:
# 写入数据
f.write(res)
爬虫写的不是很好,写的是比较简单的,
https://blog.csdn.net/qq_69920603/article/details/141323031?
完整的代码可以私信联系我。
新手刚写这个,写的不是很好的!转载请说明出处!原创不易