自学python,无聊的时候写了一个程序,界面化来展示最新电影,并解析迅雷下载地址。
全部原创!!!!!
数据爬取源码
import urllib.request
import requests
from bs4 import BeautifulSoup
import re
import tkinter as tk
import webbrowser
from lxml import etree
import os
from PIL import Image,ImageTk
from urllib.request import urlretrieve
import xlwt
import xlrd
import sqlite3
import pyperclip
def get_data():
req = urllib.request.Request("https://dytt8.net/index2.htm")
webpage = urllib.request.urlopen(req)
html = webpage.read()
soup = BeautifulSoup(html, 'html.parser')
soup=str(soup)
soup.encode(encoding="utf8",errors="strict")
return soup
date=get_data()
def get_zuixin_1():
content=re.findall('最新电影下载</a>]<a href="(.*)</a><br/>',date)
return content
def get_zuixin_2():
content=re.findall('迅雷电影资源</a>]<a href="(.*)</a><br/>',date)
return content
zuixin_1 = get_zuixin_1()
zuixin_2 = get_zuixin_2()
ul_1=[]
ul_2=[]
def zuixin_url(zuixin_1):
for i in zuixin_1:
url=re.findall('(.*?)">',i)
for j in url:
url='https://dytt8.net'+j
ul_1.append(url)
zuixin_url(zuixin_1)
def zuixin_url(zuixin_2):
for i in zuixin_2:
url=re.findall('(.*?)">',i)
for j in url:
url='https://dytt8.net'+j
ul_2.append(url)
zuixin_url(zuixin_2)
name=[]
def zuixin_nam(zuixin_1):
for i in zuixin_1:
nam=re.findall('>(.*)',i)
for j in nam:
name.append(j)
zuixin_nam(zuixin_1)
def zuixin_nam(zuixin_2):
for i in zuixin_2:
nam=re.findall('>(.*)',i)
for j in nam:
name.append(j)
zuixin_nam(zuixin_2)
def url_data(url):
head = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36"
}
req = urllib.request.Request(url,headers=head)
webpage = urllib.request.urlopen(req)
html = webpage.read()
soup = BeautifulSoup(html, 'html.parser')
soup=str(soup)
soup.encode(encoding="utf8",errors="strict")
return soup
namm=[]
def get_img_1():
os.makedirs('./image/', exist_ok=True)
for i in range(len(ul_1)):
url_1=url_data(ul_1[i])
html=etree.HTML(url_1)
result=html.xpath('//div[@class="co_content8"]//img/@src')
IMAGE_URL = result[0]
nam=result[0][-15:-4]
namm.append(nam)
urlretrieve(IMAGE_URL, './image/'+nam+'.jpg')
get_img_1()
def get_img_2():
os.makedirs('./image/', exist_ok=True)
for i in range(len(ul_2)):
url_2=url_data(ul_2[i])
html=etree.HTML(url_2)
result=html.xpath('//div[@class="co_content8"]//img/@src')
IMAGE_URL = result[0]
nam=result[0][-15:-4]
namm.append(nam)
urlretrieve(IMAGE_URL, './image/'+nam+'.jpg')
get_img_2()
xx=[]
def get_xinxi_1():
for i in range(len(ul_1)):
xinxi=[]
url_1=url_data(ul_1[i])
ym=re.findall('◎译 名\u3000(.*?)<br/>◎',url_1)
xinxi.append(ym)
ym=re.findall('◎片 名\u3000(.*?)<br/>◎',url_1)
xinxi.append(ym)
ym=re.findall('◎产 地\u3000(.*?)<br/>◎',url_1)
xinxi.append(ym)
ym=re.findall('◎字 幕\u3000(.*?)<br/>◎',url_1)
xinxi.append(ym)
ym=re.findall('◎片 长\u3000(.*?)<br/>◎',url_1)
xinxi.append(ym)
xx.append(xinxi)
get_xinxi_1()
def get_xinxi_2():
for i in range(len(ul_2)):
xinxi=[]
url_2=url_data(ul_2[i])
ym=re.findall('◎译 名\u3000(.*?)<br/>◎',url_2)
xinxi.append(ym)
ym=re.findall('◎片 名\u3000(.*?)<br/>◎',url_2)
xinxi.append(ym)
ym=re.findall('◎产 地\u3000(.*?)<br/>◎',url_2)
xinxi.append(ym)
ym=re.findall('◎字 幕\u3000(.*?)<br/>◎',url_2)
xinxi.append(ym)
ym=re.findall('◎片 长\u3000(.*?)<br/>◎',url_2)
xinxi.append(ym)
xx.append(xinxi)
get_xinxi_2()
xl=[]
def get_xl_1():
for i in range(len(ul_1)):
url_1=url_data(ul_1[i])
html=etree.HTML(url_1)
result=html.xpath('//div[@class="co_content8"]//a/@href')
xl.append(result)
get_xl_1()
def get_xl_2():
for i in range(len(ul_2)):
url_2=url_data(ul_2[i])
html=etree.HTML(url_2)
result=html.xpath('//div[@class="co_content8"]//a/@href')
xl.append(result)
get_xl_2()
def save():
book = xlwt.Workbook(encoding="utf-8",style_compression=0)
sheet = book.add_sheet('电影Top',cell_overwrite_ok=True)
book.save('数据库.xls')
col = ("img名称","译 名:","片 名:","产 地:","字 幕:","片 长:","链接地址","宣传名")
for i in range(0,8):
sheet.write(0,i,col[i])
for j in range(0,len(namm)):
sheet.write(j+1,0,namm[j])
for s in range(0,len(namm)):
sheet.write(s+1,6,xl[s])
for v in range(0,len(name)):
sheet.write(v+1,7,name[v])
for k in range(0,len(namm)):
for l in range(5):
sheet.write(1+k,1+l,xx[k][l])
book.save('数据库.xls')
save()
本地文件
本地数据库
界面化处理
界面化源码
import urllib.request
import requests
from bs4 import BeautifulSoup
import re
import tkinter as tk
import webbrowser
from lxml import etree
import os
from PIL import Image,ImageTk
from urllib.request import urlretrieve
import xlwt
import xlrd
import sqlite3
import pyperclip
wb=xlrd.open_workbook("数据库.xls")
sheet=wb.sheet_by_index(0)
col_data=sheet.col_values(0)
col_om=sheet.col_values(7)
row_data=sheet.row_values(1)
row_om=sheet.row_values(0)
def copy(vale):
pyperclip.copy(vale)
## 第1个电影
def b_1():
root = tk.Toplevel()
root.title(row_data[1])
im=Image.open('./image/'+col_data[1]+'.jpg')
im=im.resize((254,367))
img=ImageTk.PhotoImage(im)
tk.Label(root,image=img).grid(row=0,column=0,columnspan=5)
for i in range(5):
tk.Label(root, text=row_data[i+1]).grid(row=i+1,sticky="w",column=3)
for j in range(5):
tk.Label(root, text=row_om[j+1]).grid(row=j+1,sticky="e",column=2)
tk.Button(root, text="下载", command=lambda:copy(row_data[6])).grid(row=5,column=4,sticky="e")
root.mainloop()
## 第2个电影
def b_2():
row_data=sheet.row_values(2)
root = tk.Toplevel()
root.title(row_data[1])
im=Image.open('./image/'+col_data[2]+'.jpg')
im=im.resize((254,367))
img=ImageTk.PhotoImage(im)
tk.Label(root,image=img).grid(row=0,column=0,columnspan=5)
for i in range(5):
tk.Label(root, text=row_data[i+1]).grid(row=i+1,sticky="w",column=3)
for j in range(5):
tk.Label(root, text=row_om[j+1]).grid(row=j+1,sticky="e",column=2)
tk.Button(root, text="下载", command=lambda:copy(row_data[6])).grid(row=5,column=4,sticky="e")
root.mainloop()
## 第3个电影
def b_3():
row_data=sheet.row_values(3)
root = tk.Toplevel()
root.title(row_data[1])
im=Image.open('./image/'+col_data[3]+'.jpg')
im=im.resize((254,367))
img=ImageTk.PhotoImage(im)
tk.Label(root,image=img).grid(row=0,column=0,columnspan=5)
for i in range(5):
tk.Label(root, text=row_data[i+1]).grid(row=i+1,sticky="w",column=3)
for j in range(5):
tk.Label(root, text=row_om[j+1]).grid(row=j+1,sticky="e",column=2)
tk.Button(root, text="下载",