python爬虫程序-使用python做一个爬虫GUI程序

1 #-*- coding:utf-8 -*-

2

3 importrequests4 from requests.exceptions importRequestException5 importtkinter as tk6 from tkinter importttk7 from bs4 importBeautifulSoup8 importbs49 from tkinter import *

10 from tkinter.filedialog importaskdirectory11 importos12

13 classDB():14 def __init__(self):15 self.window = tk.Tk() #创建window窗口

16 self.window.title("Crawler Pics") #定义窗口名称

17 #self.window.resizable(0,0) # 禁止调整窗口大小

18 self.menu = ttk.Combobox(self.window,width=6)19 self.path =StringVar()20 self.lab1 = tk.Label(self.window, text = "目标路径:")21 self.lab2 = tk.Label(self.window, text="选择分类:")22 self.lab3 = tk.Label(self.window, text="爬取页数:")23 self.page = tk.Entry(self.window, width=5)24 self.input = tk.Entry(self.window, textvariable = self.path, width=80) #创建一个输入框,显示图片存放路径

25 self.info = tk.Text(self.window, height=20) #创建一个文本展示框,并设置尺寸

26

27 self.menu['value'] = ('大胸妹','小翘臀', '黑丝袜', '美腿控', '有颜值','大杂烩')28 self.menu.current(0)29

30 #添加一个按钮,用于选择图片保存路径

31 self.t_button = tk.Button(self.window, text='选择路径', relief=tk.RAISED, width=8, height=1, command=self.select_Path)32 #添加一个按钮,用于触发爬取功能

33 self.t_button1 = tk.Button(self.window, text='爬取', relief=tk.RAISED, width=8, height=1,command=self.download)34 #添加一个按钮,用于触发清空输出框功能

35 self.c_button2 = tk.Button(self.window, text='清空输出', relief=tk.RAISED,width=8, height=1, command=self.cle)36

37 defgui_arrang(self):38 """完成页面元素布局,设置各部件的位置"""

39 self.lab1.grid(row=0,column=0)40 self.lab2.grid(row=1, column=0)41 self.menu.grid(row=1, column=1,sticky=W)42 self.lab3.grid(row=2, column=0,padx=5,pady=5,sticky=tk.W)43 self.page.grid(row=2, column=1,sticky=W)44 self.input.grid(row=0,column=1)45 self.info.grid(row=3,rowspan=5,column=0,columnspan=3,padx=15,pady=15)46 self.t_button.grid(row=0,column=2,padx=5,pady=5,sticky=tk.W)47 self.t_button1.grid(row=1,column=2)48 self.c_button2.grid(row=0,column=3,padx=5,pady=5,sticky=tk.W)49

50 defget_cid(self):51 category ={52 'DX': 2,53 'XQT': 6,54 'HSW': 7,55 'MTK': 3,56 'YYZ': 4,57 'DZH': 5

58 }59 cid =None60 if self.menu.get() == "大胸妹":61 cid = category["DX"]62 elif self.menu.get() == "小翘臀":63 cid = category["XQT"]64 elif self.menu.get() == "黑丝袜":65 cid = category["HSW"]66 elif self.menu.get() == "美腿控":67 cid = category["MTK"]68 elif self.menu.get() == "有颜值":69 cid = category["YYZ"]70 elif self.menu.get() == "大杂烩":71 cid = category["DZH"]72 returncid73

74 defselect_Path(self):75 """选取本地路径"""

76 path_ =askdirectory()77 self.path.set(path_)78

79 def get_html(self, url, header=None):80 """请求初始url"""

81 response = requests.get(url, headers=header)82 try:83 if response.status_code == 200:84 #print(response.status_code)

85 #print(response.text)

86 returnresponse.text87 returnNone88 exceptRequestException:89 print("请求失败")90 returnNone91

92 defparse_html(self, html, list_data):93 """提取img的名称和图片url,并将名称和图片地址以字典形式返回"""

94 soup = BeautifulSoup(html, 'html.parser')95 img = soup.find_all('img')96 for t inimg:97 ifisinstance(t, bs4.element.Tag):98 #print(t)

99 name = t.get('alt')100 img_src = t.get('src')101 list_data.append([name, img_src])102 dict_data =dict(list_data)103 returndict_data104

105 defget_image_content(self, url):106 """请求图片url,返回二进制内容"""

107 print("正在下载", url)108 self.info.insert('end',"正在下载:"+url+' ')109 try:110 r =requests.get(url)111 if r.status_code == 200:112 returnr.content113 returnNone114 exceptRequestException:115 returnNone116

117 defdownload(self):118 base_url = 'https://www.dbmeinv.com/index.htm?'

119 for i in range(1, int(self.page.get())+1):120 url = base_url + 'cid=' + str(self.get_cid()) + '&' + 'pager_offset=' +str(i)121 #print(url)

122 header ={123 'Accept': 'text/html,application/xhtml+xml,application/xml;q = 0.9, image/webp,image/apng,*/*;q='

124 '0.8',125 'Accept-Encoding': 'gzip,deflate,br',126 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',127 'Cache-Control': 'max-age=0',128 'Connection': 'keep-alive',129 'Host': 'www.dbmeinv.com',130 'Upgrade-Insecure-Requests': '1',131 'User-Agent': 'Mozilla/5.0(WindowsNT6.1;Win64;x64) AppleWebKit/537.36(KHTML, likeGecko) Chrome/'

132 '70.0.3538.102Safari/537.36'

133 }134 list_data =[]135 html =self.get_html(url)136 #print(html)

137 dictdata =self.parse_html(html, list_data)138

139

140 root_dir =self.input.get()141 case_list = ["大胸妹", "小翘臀", "黑丝袜", "美腿控", "有颜值", "大杂烩"]142 for t incase_list:143 if not os.path.exists(root_dir + '/pics'):144 os.makedirs(root_dir + '/pics')145 if not os.path.exists(root_dir + '/pics/' +str(t)):146 os.makedirs(root_dir + '/pics/' +str(t))147

148

149 if self.menu.get() == "大胸妹":150 save_path = root_dir + '/pics/' + '大胸妹'

151 for t indictdata.items():152 try:153 #file_path = '{0}/{1}.{2}'.format(save_path, t[1], 'jpg')

154 file_path = save_path + '/' + t[0] + 'q' + '.jpg'

155 if not os.path.exists(file_path): #判断是否存在文件,不存在则爬取

156 with open(file_path, 'wb') as f:157 f.write(self.get_image_content(t[1]))158 f.close()159 print('文件保存成功')160 exceptFileNotFoundError:161 continue

162

163 elif self.menu.get() == "小翘臀":164 save_path = root_dir + '/pics/' + '小翘臀'

165 for t indictdata.items():166 try:167 #file_path = '{0}/{1}.{2}'.format(save_path, t[1], 'jpg')

168 file_path = save_path + '/' + t[0] + 'q' + '.jpg'

169 if not os.path.exists(file_path): #判断是否存在文件,不存在则爬取

170 with open(file_path, 'wb') as f:171 f.write(self.get_image_content(t[1]))172 f.close()173 print('文件保存成功')174 exceptFileNotFoundError:175 continue

176

177 elif self.menu.get() == "黑丝袜":178 save_path = root_dir + '/pics/' + '黑丝袜'

179 for t indictdata.items():180 try:181 #file_path = '{0}/{1}.{2}'.format(save_path, t[1], 'jpg')

182 file_path = save_path + '/' + t[0] + 'q' + '.jpg'

183 if not os.path.exists(file_path): #判断是否存在文件,不存在则爬取

184 with open(file_path, 'wb') as f:185 f.write(self.get_image_content(t[1]))186 f.close()187 print('文件保存成功')188 exceptFileNotFoundError:189 continue

190

191 elif self.menu.get() == "美腿控":192 save_path = root_dir + '/pics/' + '美腿控'

193 for t indictdata.items():194 try:195 #file_path = '{0}/{1}.{2}'.format(save_path, t[1], 'jpg')

196 file_path = save_path + '/' + t[0] + 'q' + '.jpg'

197 if not os.path.exists(file_path): #判断是否存在文件,不存在则爬取

198 with open(file_path, 'wb') as f:199 f.write(self.get_image_content(t[1]))200 f.close()201 print('文件保存成功')202 exceptFileNotFoundError:203 continue

204

205 elif self.menu.get() == "有颜值":206 save_path = root_dir + '/pics/' + '有颜值'

207 for t indictdata.items():208 try:209 #file_path = '{0}/{1}.{2}'.format(save_path, t[1], 'jpg')

210 file_path = save_path + '/' + t[0] + 'q' + '.jpg'

211 if not os.path.exists(file_path): #判断是否存在文件,不存在则爬取

212 with open(file_path, 'wb') as f:213 f.write(self.get_image_content(t[1]))214 f.close()215 print('文件保存成功')216 exceptOSError:217 continue

218

219 elif self.menu.get() == "大杂烩":220 save_path = root_dir + '/pics/' + '大杂烩'

221 for t indictdata.items():222 try:223 #file_path = '{0}/{1}.{2}'.format(save_path, t[1], 'jpg')

224 file_path = save_path + '/' + t[0] + 'q' + '.jpg'

225 if not os.path.exists(file_path): #判断是否存在文件,不存在则爬取

226 with open(file_path, 'wb') as f:227 f.write(self.get_image_content(t[1]))228 f.close()229 print('文件保存成功')230 exceptFileNotFoundError:231 continue

232

233 defcle(self):234 """定义一个函数,用于清空输出框的内容"""

235 self.info.delete(1.0,"end") #从第一行清除到最后一行

236

237

238 defmain():239 t =DB()240 t.gui_arrang()241 tk.mainloop()242

243 if __name__ == '__main__':244 main()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值