1 #-*- coding:utf-8 -*-
2
3 importrequests4 from requests.exceptions importRequestException5 importtkinter as tk6 from tkinter importttk7 from bs4 importBeautifulSoup8 importbs49 from tkinter import *
10 from tkinter.filedialog importaskdirectory11 importos12
13 classDB():14 def __init__(self):15 self.window = tk.Tk() #创建window窗口
16 self.window.title("Crawler Pics") #定义窗口名称
17 #self.window.resizable(0,0) # 禁止调整窗口大小
18 self.menu = ttk.Combobox(self.window,width=6)19 self.path =StringVar()20 self.lab1 = tk.Label(self.window, text = "目标路径:")21 self.lab2 = tk.Label(self.window, text="选择分类:")22 self.lab3 = tk.Label(self.window, text="爬取页数:")23 self.page = tk.Entry(self.window, width=5)24 self.input = tk.Entry(self.window, textvariable = self.path, width=80) #创建一个输入框,显示图片存放路径
25 self.info = tk.Text(self.window, height=20) #创建一个文本展示框,并设置尺寸
26
27 self.menu['value'] = ('大胸妹','小翘臀', '黑丝袜', '美腿控', '有颜值','大杂烩')28 self.menu.current(0)29
30 #添加一个按钮,用于选择图片保存路径
31 self.t_button = tk.Button(self.window, text='选择路径', relief=tk.RAISED, width=8, height=1, command=self.select_Path)32 #添加一个按钮,用于触发爬取功能
33 self.t_button1 = tk.Button(self.window, text='爬取', relief=tk.RAISED, width=8, height=1,command=self.download)34 #添加一个按钮,用于触发清空输出框功能
35 self.c_button2 = tk.Button(self.window, text='清空输出', relief=tk.RAISED,width=8, height=1, command=self.cle)36
37 defgui_arrang(self):38 """完成页面元素布局,设置各部件的位置"""
39 self.lab1.grid(row=0,column=0)40 self.lab2.grid(row=1, column=0)41 self.menu.grid(row=1, column=1,sticky=W)42 self.lab3.grid(row=2, column=0,padx=5,pady=5,sticky=tk.W)43 self.page.grid(row=2, column=1,sticky=W)44 self.input.grid(row=0,column=1)45 self.info.grid(row=3,rowspan=5,column=0,columnspan=3,padx=15,pady=15)46 self.t_button.grid(row=0,column=2,padx=5,pady=5,sticky=tk.W)47 self.t_button1.grid(row=1,column=2)48 self.c_button2.grid(row=0,column=3,padx=5,pady=5,sticky=tk.W)49
50 defget_cid(self):51 category ={52 'DX': 2,53 'XQT': 6,54 'HSW': 7,55 'MTK': 3,56 'YYZ': 4,57 'DZH': 5
58 }59 cid =None60 if self.menu.get() == "大胸妹":61 cid = category["DX"]62 elif self.menu.get() == "小翘臀":63 cid = category["XQT"]64 elif self.menu.get() == "黑丝袜":65 cid = category["HSW"]66 elif self.menu.get() == "美腿控":67 cid = category["MTK"]68 elif self.menu.get() == "有颜值":69 cid = category["YYZ"]70 elif self.menu.get() == "大杂烩":71 cid = category["DZH"]72 returncid73
74 defselect_Path(self):75 """选取本地路径"""
76 path_ =askdirectory()77 self.path.set(path_)78
79 def get_html(self, url, header=None):80 """请求初始url"""
81 response = requests.get(url, headers=header)82 try:83 if response.status_code == 200:84 #print(response.status_code)
85 #print(response.text)
86 returnresponse.text87 returnNone88 exceptRequestException:89 print("请求失败")90 returnNone91
92 defparse_html(self, html, list_data):93 """提取img的名称和图片url,并将名称和图片地址以字典形式返回"""
94 soup = BeautifulSoup(html, 'html.parser')95 img = soup.find_all('img')96 for t inimg:97 ifisinstance(t, bs4.element.Tag):98 #print(t)
99 name = t.get('alt')100 img_src = t.get('src')101 list_data.append([name, img_src])102 dict_data =dict(list_data)103 returndict_data104
105 defget_image_content(self, url):106 """请求图片url,返回二进制内容"""
107 print("正在下载", url)108 self.info.insert('end',"正在下载:"+url+' ')109 try:110 r =requests.get(url)111 if r.status_code == 200:112 returnr.content113 returnNone114 exceptRequestException:115 returnNone116
117 defdownload(self):118 base_url = 'https://www.dbmeinv.com/index.htm?'
119 for i in range(1, int(self.page.get())+1):120 url = base_url + 'cid=' + str(self.get_cid()) + '&' + 'pager_offset=' +str(i)121 #print(url)
122 header ={123 'Accept': 'text/html,application/xhtml+xml,application/xml;q = 0.9, image/webp,image/apng,*/*;q='
124 '0.8',125 'Accept-Encoding': 'gzip,deflate,br',126 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',127 'Cache-Control': 'max-age=0',128 'Connection': 'keep-alive',129 'Host': 'www.dbmeinv.com',130 'Upgrade-Insecure-Requests': '1',131 'User-Agent': 'Mozilla/5.0(WindowsNT6.1;Win64;x64) AppleWebKit/537.36(KHTML, likeGecko) Chrome/'
132 '70.0.3538.102Safari/537.36'
133 }134 list_data =[]135 html =self.get_html(url)136 #print(html)
137 dictdata =self.parse_html(html, list_data)138
139
140 root_dir =self.input.get()141 case_list = ["大胸妹", "小翘臀", "黑丝袜", "美腿控", "有颜值", "大杂烩"]142 for t incase_list:143 if not os.path.exists(root_dir + '/pics'):144 os.makedirs(root_dir + '/pics')145 if not os.path.exists(root_dir + '/pics/' +str(t)):146 os.makedirs(root_dir + '/pics/' +str(t))147
148
149 if self.menu.get() == "大胸妹":150 save_path = root_dir + '/pics/' + '大胸妹'
151 for t indictdata.items():152 try:153 #file_path = '{0}/{1}.{2}'.format(save_path, t[1], 'jpg')
154 file_path = save_path + '/' + t[0] + 'q' + '.jpg'
155 if not os.path.exists(file_path): #判断是否存在文件,不存在则爬取
156 with open(file_path, 'wb') as f:157 f.write(self.get_image_content(t[1]))158 f.close()159 print('文件保存成功')160 exceptFileNotFoundError:161 continue
162
163 elif self.menu.get() == "小翘臀":164 save_path = root_dir + '/pics/' + '小翘臀'
165 for t indictdata.items():166 try:167 #file_path = '{0}/{1}.{2}'.format(save_path, t[1], 'jpg')
168 file_path = save_path + '/' + t[0] + 'q' + '.jpg'
169 if not os.path.exists(file_path): #判断是否存在文件,不存在则爬取
170 with open(file_path, 'wb') as f:171 f.write(self.get_image_content(t[1]))172 f.close()173 print('文件保存成功')174 exceptFileNotFoundError:175 continue
176
177 elif self.menu.get() == "黑丝袜":178 save_path = root_dir + '/pics/' + '黑丝袜'
179 for t indictdata.items():180 try:181 #file_path = '{0}/{1}.{2}'.format(save_path, t[1], 'jpg')
182 file_path = save_path + '/' + t[0] + 'q' + '.jpg'
183 if not os.path.exists(file_path): #判断是否存在文件,不存在则爬取
184 with open(file_path, 'wb') as f:185 f.write(self.get_image_content(t[1]))186 f.close()187 print('文件保存成功')188 exceptFileNotFoundError:189 continue
190
191 elif self.menu.get() == "美腿控":192 save_path = root_dir + '/pics/' + '美腿控'
193 for t indictdata.items():194 try:195 #file_path = '{0}/{1}.{2}'.format(save_path, t[1], 'jpg')
196 file_path = save_path + '/' + t[0] + 'q' + '.jpg'
197 if not os.path.exists(file_path): #判断是否存在文件,不存在则爬取
198 with open(file_path, 'wb') as f:199 f.write(self.get_image_content(t[1]))200 f.close()201 print('文件保存成功')202 exceptFileNotFoundError:203 continue
204
205 elif self.menu.get() == "有颜值":206 save_path = root_dir + '/pics/' + '有颜值'
207 for t indictdata.items():208 try:209 #file_path = '{0}/{1}.{2}'.format(save_path, t[1], 'jpg')
210 file_path = save_path + '/' + t[0] + 'q' + '.jpg'
211 if not os.path.exists(file_path): #判断是否存在文件,不存在则爬取
212 with open(file_path, 'wb') as f:213 f.write(self.get_image_content(t[1]))214 f.close()215 print('文件保存成功')216 exceptOSError:217 continue
218
219 elif self.menu.get() == "大杂烩":220 save_path = root_dir + '/pics/' + '大杂烩'
221 for t indictdata.items():222 try:223 #file_path = '{0}/{1}.{2}'.format(save_path, t[1], 'jpg')
224 file_path = save_path + '/' + t[0] + 'q' + '.jpg'
225 if not os.path.exists(file_path): #判断是否存在文件,不存在则爬取
226 with open(file_path, 'wb') as f:227 f.write(self.get_image_content(t[1]))228 f.close()229 print('文件保存成功')230 exceptFileNotFoundError:231 continue
232
233 defcle(self):234 """定义一个函数,用于清空输出框的内容"""
235 self.info.delete(1.0,"end") #从第一行清除到最后一行
236
237
238 defmain():239 t =DB()240 t.gui_arrang()241 tk.mainloop()242
243 if __name__ == '__main__':244 main()