爬虫GUI,结合OpenCV与Tk库来显示视频和图片

一个Python实验作业

源码

# -*- coding: utf-8 -*-
"""
Created on Mon Nov 16 15:06:56 2020

@author: 散修涵
"""

'''
爬取的信息
'''

i=0#控制索引位置
datalist=[]#存放爬取数据
from tkinter import *
import cv2
import os
from PIL import Image, ImageTk
from tkinter import ttk
import requests

from bs4 import BeautifulSoup
import re 
import urllib.request,urllib.error
import xlwt
import sqlite3
import PIL.Image,PIL.ImageTk
import tkinter as tk
#import HP_mplay as hmv#用这个弥补opencv没有声音的缺憾



class System:
	def __init__(self):
		self.camera = None   # 摄像头
		self.root = Tk()
		self.root.title('MOVIE  SYSTEM                 @author: 散修涵')
		self.root.geometry('800x600')
		self.createFirstPage()
		mainloop()

	def createFirstPage(self):
		self.pageShow = Frame(self.root)
		self.pageShow.pack()
		Label(self.pageShow, text='查询即将上映电影系统', font=('粗体', 20)).pack()
		self.data1 = Label(self.pageShow,width=780)		
		self.data1.pack(padx=5, pady=5)
        
   
        
		Label(self.pageShow, text='当前电影信息', font=('粗体', 10)).pack(padx=5, pady=5)
		self.text=Text(self.pageShow,height=20,width=80)
		self.text.pack()
		self.infomationShow()
        
        
		self.button15 = Button(self.pageShow, width=8, height=2, text="上一部", bg="Cornsilk",font=("宋", 12),
							   relief='raise',command=self.Prior)
		self.button15.pack(side=LEFT, padx=215, pady = 20)
        
		self.button16 = Button(self.pageShow, width=8, height=2, text="下一部", bg="Cornsilk", font=("宋", 12),
							   relief='raise',command=self.Next)
		self.button16.pack(side=LEFT, padx=0, pady = 20)
   
		self.page1 = Frame(self.root)
		self.page1.pack()

		self.button11 = Button(self.page1, width=18, height=2, text="海报",bg="Aquamarine", font=("宋", 12),
							   relief='raise',command = self.createImgPage)
		self.button11.pack(side=LEFT, padx=25, pady = 10)
		self.button12 = Button(self.page1, width=18, height=2, text="预告片", bg="Aquamarine",font=("宋", 12),
		                       relief='raise', command = self.createSecondPage)
		self.button12.pack(side=LEFT, padx=25, pady = 10)
		self.button13 = Button(self.page1, width=18, height=2, text="查询电影列表", bg="Aquamarine", font=("宋", 12), relief='raise',
							   command = self.checkDataView)
		self.button13.pack(side=LEFT, padx=25, pady = 10)
		self.button14 = Button(self.page1, width=18, height=2, text="退出系统", bg="Aquamarine", font=("宋", 12),
							   relief='raise',command = self.quitMain)
		self.button14.pack(side=LEFT, padx=25, pady = 10)



	def Next(self):
		global i
		if i>=len(datalist):
			return
		else:
			i=i+1
		self.infomationShow()
         
        
        
	def Prior(self):
		global i
		if i<=0:
			return
		else:
			i=i-1
		self.infomationShow()        
     
	def infomationShow(self):
		
		self.text.delete('1.0','end')
		self.text.insert(END,"电影名:"+str(datalist[i][4])+"\n"+"\n")
		self.text.insert(END,"导演:"+str(datalist[i][5])+"\n"+"\n")
		self.text.insert(END,"主演:")
		na=datalist[i][0]
		for Na in na:
				self.text.insert(END,str(Na)+"  ")
		self.text.insert(END,"\n"+"\n")	
		self.text.insert(END,"电影详情链接:"+str(datalist[i][3])+"\n"+"\n ")
        
    
	def saveImaFile(self,URL):
		response = requests.get(URL)
		l_img = os.path.join("F:","video")#文件储存地址
		self.file_path='{0}/{1}.{2}'.format(l_img,str(datalist[i][4]), 'jpg')#format('文件储存地址',用电影名作文件名称,'文件格式')
		if not os.path.exists(self.file_path):
				with open(self.file_path, 'wb')as f:
						f.write(response.content)
	def saveVidFile(self,URL):
		response = requests.get(URL)
		l_img = os.path.join("F:","video")#文件储存地址
		self.file_path='{0}/{1}.{2}'.format(l_img,str(datalist[i][4]), 'mp4')#format('文件储存地址',用电影名作文件名称,'文件格式')
		if not os.path.exists(self.file_path):
				with open(self.file_path, 'wb')as f:
						f.write(response.content)	
    
	def createImgPage(self):
		self.url1="".join(datalist[i][2])
		self.saveImaFile(self.url1)
		self.camera=cv2.VideoCapture(self.url1)
		self.page1.pack_forget()
		self.pageShow.pack_forget()
		self.page2 = Frame(self.root)
		self.page2.pack()
		Label(self.page2, text='电影海报', font=('粗体', 20)).pack()
		self.data2 = Label(self.page2)
		self.data2.pack(padx=5, pady=5)
        
        
        
		self.button21 = Button(self.page2, width=18, height=2, text="返回", bg='gray', font=("宋", 12),relief='raise',command = self.backFirst)
		self.button21.pack(padx=25,pady = 10)
        
		self.video_loop(self.data2)

	def createSecondPage(self):
		self.url="".join(datalist[i][1])
		self.saveVidFile(self.url)
        
		self.camera = cv2.VideoCapture(self.url)
		self.pageShow.pack_forget()
		self.page1.pack_forget()    
		self.page2 = Frame(self.root)
		self.page2.pack()
		Label(self.page2, text='预告片', font=('粗体', 20)).pack()
		self.data2 = Label(self.page2)
		self.data2.pack(padx=5, pady=5)

		self.button21 = Button(self.page2, width=18, height=2, text="返回", bg='gray', font=("宋", 12),
							   relief='raise',command = self.backFirst)
		self.button21.pack(padx=25,pady = 10)
		self.video_loop(self.data2)

	def video_loop(self, panela):

		success, img = self.camera.read()  # 从视频读取图片
		if success:
			cv2image = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA)  # 转换颜色从BGR到RGBA
			current_image = Image.fromarray(cv2image)  # 将图像转换成Image对象
			imgtk = ImageTk.PhotoImage(image=current_image)
			panela.imgtk = imgtk 
			panela.config(image=imgtk)
			self.root.after(1, lambda: self.video_loop(panela))   

	#  电影列表信息展示
	# noinspection PyAttributeOutsideInit 
	def checkDataView(self):
		self.page3 = Frame(self.root)
		self.page1.pack_forget()
		self.pageShow.pack_forget()
		self.root.geometry('600x360')
		Label(self.page3, text='当前获取的电影列表', fg='red', font=('宋体', 25)).pack(side=TOP, fill='x')
		self.checkDate = ttk.Treeview(self.page3,column=('name' ))

		self.checkDate.heading('#0', text='序号')
		self.checkDate.heading('name',text='电影名')
		self.checkDate.column('name', width=200, anchor="center") 
		c=list(range(1,len(datalist)+1)) 
		d=[]
		for inde in range(0,len(datalist)):
				d.append(datalist[inde][4])
		dict1 = dict(zip(c, d))

		rowCount=1
		self.checkDate.tag_configure("evenColor",background="LightBlue")
		for inde in dict1.keys():
			if rowCount%2==0:
				self.checkDate.insert("", 'end',text=inde, values=dict1[inde])
			else:
				self.checkDate.insert("", 'end',text=inde, values=dict1[inde],tags=("evenColor"))
			rowCount+=1
		

		 # y滚动条
		yscrollbar = Scrollbar(self.page3, orient=VERTICAL, command=self.checkDate.yview)
		self.checkDate.configure(yscrollcommand=yscrollbar.set)
		yscrollbar.pack(side=RIGHT, fill=Y)
        
        
		

		self.checkDate.pack(expand = 1, fill = BOTH)
		Button(self.page3, width=20, height=2, text="返回", bg='gray', font=("宋", 12),
							   relief='raise',command =self.backMain).pack(padx = 20, pady = 20)
		self.page3.pack()


	def backFirst(self):
		self.page2.pack_forget()

		self.pageShow.pack() 
		self.page1.pack()
		# 释放视频资源
		self.camera.release()
		cv2.destroyAllWindows()

	def backMain(self):
		self.root.geometry('900x600')
		self.page3.pack_forget()
		self.pageShow.pack()        
		self.page1.pack()
        
        
        
	def quitMain(self):
		sys.exit(0)

class Catch():
        def __init__(self):
            
            # self.findLink=re.compile(r'<a class="ticket-btn" data-psource="poster" href="(.*?)" target="_blank">')
            # self.findImag=re.compile(r'<img.*?src="(.*?)".*?>',re.S)
            # self.findTitle=re.compile(r'<span property="v:itemreviewed">(.*)</span>')
            # self.findDrector=re.compile(r'<a href=".*?" rel="v:directedBy">(.*)</a>')
            # self.findActor=re.compile(r'<a\b href="[^"]*"[^>]*>([\s\S]*?)</a>',re.S)
            # self.findVideo=re.compile(r'<a\b[^>]+\bhref="([^"]*)"[^>]*>[\s\S]*?</a>',re.S)
            # self.findRealVideo=re.compile(r'<source src="(.*?)".*?>')
            self.Main()



        def Main(self):
            self.findLink=re.compile(r'<a class="ticket-btn" data-psource="poster" href="(.*?)" target="_blank">')
            self.findImag=re.compile(r'<img.*?src="(.*?)".*?>',re.S)
            self.findTitle=re.compile(r'<span property="v:itemreviewed">(.*)</span>')
            self.findDrector=re.compile(r'<a href=".*?" rel="v:directedBy">(.*)</a>')
            self.findActor=re.compile(r'<a\b href="[^"]*"[^>]*>([\s\S]*?)</a>',re.S)
            self.findVideo=re.compile(r'<a\b[^>]+\bhref="([^"]*)"[^>]*>[\s\S]*?</a>',re.S)
            self.findRealVideo=re.compile(r'<source src="(.*?)".*?>')
            baseurl="https://movie.douban.com/cinema/nowplaying/nanchang/"
            urlList=self.getUrl(baseurl)
            datalist=self.getData(urlList)

        '''
        获取每部电影的链接地址
        '''
        def getUrl(self,baseurl):
            urlList=[]    
            html=self.askURL(baseurl)
            soup=BeautifulSoup(html,"html.parser")  
            for item in soup.find_all("li",class_="poster"):
                item=str(item)
                link=re.findall(self.findLink,item)
                urlList.append(link)
            return urlList
    


        '''
        获取每部电影信息
        '''
        def getData(self,urlList):

            for i in range(1,10):
                url="".join(urlList[i])
                html=self.askURL(url)
                print("***********")
                print(html)
                soup=BeautifulSoup(html,"html.parser")
                for item in soup.find_all('div',id="wrapper"):
                     data=[]
                     
                     #演员的爬取,弄得人要裂开了,终于实现成功了
                     ac=[]
                     v=[]
                     for item2 in item.find_all('a',rel="v:starring"):
                             item2=str(item2)
                             actor=re.findall(self.findActor,item2)
                             ac.append("".join(actor))
                     data.append(ac)
                     
                     #视频链接
                     for item3 in item.find_all('li',class_="label-trailer"):
                         item3=str(item3)
                         video=re.findall(self.findVideo,item3)#找到了存放视频的链接,但还不是视频自己的链接,所以继续爬
                         urlvideo="".join(video)
                         #print(urlvideo)
                         htmlvideo=self.askURL(urlvideo)
                         soup_video=BeautifulSoup(htmlvideo,"html.parser")
                         for V in soup_video.find_all('div', class_="cont"):
                             #print(V)
                             V=str(V)
                             real_video=re.findall(self.findRealVideo,V)
                             #print(real_video)
                             data.append("".join(real_video))
                    
                     #海报链接
                     for item4 in item.find_all('div',class_="subject clearfix"):
                                 #print("*******************")
                                 #print(item4)
                                 item4=str(item4)
                                 img=re.findall(self.findImag,item4)
                                 #print(img)
                                 data.append("".join(img))
        
                     #先转成字符串,才能在其中进行爬取
                     item=str(item)
                     
                     #电影地址
                     data.append(url)
                      
                     #电影名称
                     titles=re.findall(self.findTitle,item)
                     data.append("".join(titles))
                     
                     #导演
                     drector=re.findall(self.findDrector,item)
                     data.append("".join(drector))
                     
                     datalist.append(data)         
                   
            return datalist

        '''
        访问豆瓣网页
        ''' 
        def askURL(self,url):
    
            head={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36" 
                  }
            request=urllib.request.Request(url,headers=head)
            html=" "
            try:
                response=urllib.request.urlopen(request)
                html=response.read().decode("utf-8")

            except urllib.error.URLError as e:
                if hasattr(e, "code"):
                    print(e.code)
                if hasattr(e, "reason"):
                    print(e.reason)
            return html


if __name__ == '__main__':
	catch=Catch()
	spyder= System()


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值