原文作者为:
Donoy
环境:
python2.7
scrollText
tkinter
1.代码分析:
thread = threading.Thread(target=myThread)
thread.start()
创建一个线程,通过start方法启动
#!/usr/bin/env python # -*- coding: utf--*- # coding:utf-8 # @Date : 6--:8: # @Author : Donoy (83@qq.com) # @Link : http://www.cnblogs.com/Donoy/ # @Version : $Id$ # 多线程 爬虫 # GUI 界面 import os import requests import re import threading from Tkinter import * from ScrolledText import ScrolledText # 文本滚动框 import urllib # 这个模块中有下载的功能 import sys # sys 模块的输出编码 格式 reload(sys) sys.setdefaultencoding('utf-8') Video_Data = [] def creatWnd(): global root global varl global text # 创建一个窗口 root = Tk() # 窗口 # 窗口的标题 root.title('DSpider') # 文本滚动窗口 text = ScrolledText(root, font=('微软雅黑')) text.grid() # 设置Lable varl = StringVar() lable = Label(root, font=('微软雅黑'), fg='red', textvariable=varl) lable.grid() varl.set('Fight......') # 设置按钮 button = Button(root, text='开始爬取', font='黑体', command=begin_Thread) button.grid() # root.mainloop() # 显示窗口 def getHtmlData(url): # print html.text # 网站的内容 # print html.status_code # 请求的返回值 RequestHeader = { 'User-Agent': 'Mozilla/. (Windows NT.3; WOW6) AppleWebKit/3.36 (KHTML, like Gecko) Chrome/..883. Safari/3.36' } html = requests.get(url, headers=RequestHeader) # print html.text return html.text def getVideoUrl(html): # re.S 是匹配换行符 Parse = re.compile(r'(<div class="j-r-list-c">.*?</div>.*?</div>)', re.S) context = re.findall(Parse, html) Parse = re.compile(r'data-mp4="(.*?)"') for item in context: VideoUrl = re.findall(Parse, item) if VideoUrl: Name_Parse = re.compile(r'(<a href="/detail-.{8}.html">(.*?)</a>)', re.S); VideoName = re.findall(Name_Parse, item) for Name, url in zip(VideoName, VideoUrl): # zip这个内置函数就是将List重新整合一下 Video_Data.append([Name, url]) # Video_Data.append(['123','http://clips.vorwaerts-gmbh.de/big_buck_bunny.mp4']) # Video_Data.append(['123','http://mvideo.spriteapp.cn/video/2017/1212/5a2febb81b7cb_wpcco.mp4']) def begin_Thread(): try: # for i in range(10,12): url = 'http://www.budejie.com/video/' # url = 'http://clips.vorwaerts-gmbh.de/big_buck_bunny.mp4' html = getHtmlData(url) getVideoUrl(html) except Exception as e: raise varl.set('一共%s个小视频,现在开始下载......' % (len(Video_Data) / 2)) thread = threading.Thread(target=myThread) thread.start() def myThread(): id = 1 for Data in Video_Data: text.insert(END, str(id) + '.' + Data[0][1] + Data[1] + '\n') urllib.urlretrieve(Data[1], 'F:\save\\vedio\\' + str(id) + '.mp4') Video_Data.pop() id += 1 varl.set('所有的视频都下载完成')