结合tkinter与opencv爬取豆瓣电影

结合tkinter与opencv爬取豆瓣电影(1

前言:练习python爬虫技术

文章声明:广搜多方资源,如有侵权,请立即联系本人修改

——————————————————————————

一、学习前需掌握的技术

1.正则表达式
2.tkinter库
3.opencv库
4.文件操作
5.爬虫的基本知识

二、代码使用说明

在D盘中新建两个文件夹,分别命名为poster和vedio,将User-Agent改为自己浏览器里的,如图:
在这里插入图片描述

之前需要下载第三方库,在cmd上即可安装,具体查询百度,实在解决不了或者速度太慢,请在评论区说明情况,教你使用清华镜像安装^ ^,基本操作实现好了,可以复制粘贴到自己的编译器上运行即可

三、源代码

from tkinter import *
import tkinter as tk
from tkinter import ttk
import cv2
import os
import requests
from bs4 import BeautifulSoup
import re 
import urllib.request,urllib.error
import PIL.Image,PIL.ImageTk
from PIL import Image, ImageTk

class Douban:
    def __init__(self):
        self.camera = None  
        self.root = Tk()
        self.root.title('douban.movie')
        self.root.geometry('1000x800')
        self.Douban_info()
        mainloop()

    def Douban_info(self):
        self.frame1=Frame(self.root)
        self.Image_info1=Label(self.frame1)
        self.Image_info1.pack(padx=5, pady=5)
        self.frame2=Frame(self.root)  
        self.Image_info2=Label(self.frame2,width=500,height=500)
        self.Image_info2.pack(padx=5, pady=5)
        self.text=Text(self.root,width=25,height=15)
        self.text.place(x=5,y=350)
        Label(self.root,text='预告片信息列表', fg='green', bg='yellow',font=('宋体', 15)).place(x=23,y=320)
        Label(self.root,text='海 报 图 片', fg='purple', bg='white',font=('宋体', 18)).place(x=780,y=320)
        self.Movie_list()
        self.Basic_info()
        self.Poster_info()
        self.button1=Button(self.root, width=8, height=2, text="上一部", bg="orange",font=("楷", 12),command=self.Prior_film).place(x=38, y=600)
        self.button2=Button(self.root, width=8, height=2, text="下一部", bg="orange",font=("楷", 12),command=self.Next_film).place(x=38, y=650)       
        self.button3=Button(self.root, width=18, height=2, text="预告片", bg="blue",fg="white",font=("宋", 12), command = self.Film_info).place(x=0, y=700)
        
    def Next_film(self):
        global i
        if i>=len(Data_info):
            return
        else:
            i+=1
        self.Basic_info()
        self.Image_info1.pack_forget()
        self.frame1.pack_forget()
        self.Image_info2.pack_forget()
        self.frame2.pack_forget()
        self.Poster_info()
         
    def Prior_film(self):
        global i
        if i<=0:
            return
        else:
            i-=1
        self.Basic_info()
        self.Image_info1.pack_forget()
        self.frame1.pack_forget()
        self.Image_info2.pack_forget()
        self.frame2.pack_forget()
        self.Poster_info()
        
    def Basic_info(self):
        self.text.delete('1.0','end')
        self.text.tag_config("tag_1", backgroun="yellow", foreground="red")
        self.text.tag_config("tag_2", backgroun="white", foreground="blue")
        self.text.insert(END,"电影名: ","tag_1")
        self.text.insert(END,str(Data_info[i][4])+"\n"+"\n","tag_2")
        self.text.insert(END,"导演: ","tag_1")
        self.text.insert(END,str(Data_info[i][5])+"\n"+"\n","tag_2")
        self.text.insert(END,"主演: ","tag_1")
        actors=Data_info[i][0]
        for actor in actors:
                self.text.insert(END,str(actor)+"  ","tag_2")
        self.text.insert(END,"\n"+"\n") 
        self.text.insert(END,"预告片链接:","tag_1")
        self.text.insert(END,str(Data_info[i][3])+"\n"+"\n","tag_2")
        self.text.insert(END,"海报链接:","tag_1")
        self.text.insert(END,str(Data_info[i][2])+"\n"+"\n","tag_2")
    def Save_poster(self,URL):
        response=requests.get(URL)
        Poster=os.path.join("D:/","poster")
        self.File_path='{0}/{1}.{2}'.format(Poster,str(Data_info[i][4]),'jpg')
        if not os.path.exists(self.File_path):
            with open(self.File_path,'wb')as f:
                f.write(response.content)

    def Save_video(self,URL):
        response = requests.get(URL)
        Video = os.path.join("D:/","video")
        self.File_path='{0}/{1}.{2}'.format(Video,str(Data_info[i][4]),'mp4')
        if not os.path.exists(self.File_path):
            with open(self.File_path,'wb')as f:
                f.write(response.content)   
    
    def Poster_info(self):
        self.url1="".join(Data_info[i][2])
        self.Save_poster(self.url1)
        self.camera=cv2.VideoCapture(self.url1)
        self.frame1.place(x=700,y=350)
        self.Image_info1=Label(self.frame1)
        self.Image_info1.pack(padx=5, pady=5)
        self.Loop_poster_film(self.Image_info1)
        
    def Film_info(self):
        self.Image_info2.pack_forget()
        self.frame2.pack_forget()
        self.url="".join(Data_info[i][1])
        self.Save_video(self.url)
        self.camera=cv2.VideoCapture(self.url)
        self.frame2.place(x=180,y=300)
        self.Image_info2=Label(self.frame2,width=510,height=500)
        self.Image_info2.pack(padx=5, pady=5)
        self.Loop_poster_film(self.Image_info2)
        
    def Loop_poster_film(self, Loop):
        Success,img=self.camera.read() 
        if Success:
            cv2image=cv2.cvtColor(img,cv2.COLOR_BGR2RGBA)
            Current_image=Image.fromarray(cv2image) 
            imgtk=ImageTk.PhotoImage(image=Current_image)
            Loop.imgtk=imgtk 
            Loop.config(image=imgtk)
            self.root.after(1,lambda:self.Loop_poster_film(Loop))   

    def Movie_list(self):
        Label(self.root,text='豆  瓣  电  影', fg='red', bg='yellow',font=('宋体', 30)).place(x=380,y=30)
        self.checkDate=ttk.Treeview(self.root,column=('name'))
        self.checkDate.heading('#0',text='电影序号')
        self.checkDate.heading('name',text='电影名称')
        self.checkDate.column('name',width=800,anchor='center') 
        c=list(range(1,len(Data_info)+1)) 
        d=[]
        for index in range(0,len(Data_info)):
                d.append(Data_info[index][4])
        dict1 = dict(zip(c,d))
        rowCount=1
        self.checkDate.tag_configure("evenColor",background="LightBlue")
        for index in dict1.keys():
            if rowCount%2==0:
                self.checkDate.insert("",'end',text='                  '+str(index), values=dict1[index])
            else: 
                self.checkDate.insert("",'end',text='                  '+str(index), values=dict1[index],tags=("evenColor"))
            rowCount+=1
        yscrollbar = Scrollbar(self.root, orient=VERTICAL, command=self.checkDate.yview)
        self.checkDate.configure(yscrollcommand=yscrollbar.set)
        yscrollbar.pack(side=RIGHT,fill=Y)
        self.checkDate.place(x=0,y=80)

class Clutch():
        def __init__(self):
            self.findLink=re.compile(r'<a class="ticket-btn" data-psource="poster" href="(.*?)" target="_blank">')
            self.findImag=re.compile(r'<img.*?src="(.*?)".*?>',re.S)
            self.findTitle=re.compile(r'<span property="v:itemreviewed">(.*)</span>')
            self.finddirector=re.compile(r'<a href=".*?" rel="v:directedBy">(.*)</a>')
            self.findActor=re.compile(r'<a\b href="[^"]*"[^>]*>([\s\S]*?)</a>',re.S)
            self.findVideo=re.compile(r'<a\b[^>]+\bhref="([^"]*)"[^>]*>[\s\S]*?</a>',re.S)
            self.findRealVideo=re.compile(r'<source src="(.*?)".*?>')
            Douban_url="https://movie.douban.com/cinema/nowplaying/beijing/"
            List_url=self.Get_url(Douban_url)
            Data_info=self.Crawing(List_url)

        def Get_url(self,Douban_url):
            List_url=[]    
            html=self.Ask_url(Douban_url)
            soup=BeautifulSoup(html,"html.parser")  
            for item in soup.find_all("li",class_="poster"):
                item=str(item)
                link=re.findall(self.findLink,item)
                List_url.append(link)
            return List_url
    
        def Crawing(self,List_url):
            for i in range(0,39):
                url="".join(List_url[i])
                html=self.Ask_url(url)
                soup=BeautifulSoup(html,"html.parser")
                for item in soup.find_all('div',id="wrapper"):
                      print("爬虫中……")
                      data=[]
                      actors=[]
                      for item1 in item.find_all('a',rel="v:starring"):
                              item1=str(item1)
                              actor=re.findall(self.findActor,item1)
                              actors.append("".join(actor))
                      data.append(actors)
                      for item2 in item.find_all('li',class_="label-trailer"):
                          item2=str(item2)
                          video=re.findall(self.findVideo,item2)
                          urlvideo="".join(video)
                          htmlvideo=self.Ask_url(urlvideo)
                          soup_video=BeautifulSoup(htmlvideo,"html.parser")
                          for V in soup_video.find_all('div', class_="cont"):
                              V=str(V)
                              real_video=re.findall(self.findRealVideo,V)
                              data.append("".join(real_video))
                    
                      for item3 in item.find_all('div',class_="subject clearfix"):
                                  item3=str(item3)
                                  img=re.findall(self.findImag,item3)
                                  data.append("".join(img))
                      item=str(item)
                      data.append(url)
                      titles=re.findall(self.findTitle,item)
                      data.append("".join(titles))     
                      director=re.findall(self.finddirector,item)
                      data.append("".join(director))    
                      Data_info.append(data)           
            return Data_info

        def Ask_url(self,url):
            headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36 Edg/87.0.664.47" }
            request=urllib.request.Request(url,headers=headers)
            html=" "
            try:
                response=urllib.request.urlopen(request)
                html=response.read().decode("utf-8")

            except urllib.error.URLError as e:
                if hasattr(e, "code"):
                    print(e.code)
                if hasattr(e, "reason"):
                    print(e.reason)
            return html
if __name__ == '__main__':
    i=0
    Data_info=[]
    Clutch=Clutch() 
    Douban=Douban()

运行效果:

你试了就知道awa

最后:

有不懂的欢迎在评论区留言!

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小z吖

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值