淘票票电影热榜网址:
https://dianying.taobao.com/showList.htm?spm=a1z21.6646273.city.2.4ed46d6ekOc3wH&n_s=new&city=310100
网站截图:
spider4taopiaopiao.py
爬取网站电影排行榜
import requests
import re
import os
import time
import json
def mySpider():
# 伪装 用于可以伪装成浏览器。
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
}
print("网页请求中...")
time.sleep(0.5)
url = "https://dianying.taobao.com/showList.htm?spm=a1z21.6646273.city.2.4ed46d6ekOc3wH&n_s=new&city=310100"
response = requests.get(url, headers=headers)
html = response.text # 获取html信息
# print(html)
print("网页信息已获取...")
time.sleep(0.5)
destinationPath = "result.txt"
fd = open(destinationPath,"w+",encoding='utf-8')
fd.writelines(html)
end = html.find('<!-- 即将热映 -->')
# print("位置为:",end)
if end != -1:
html = html[:end]
fd.close()
s = '<img width="160" height="224" data-src="(.*?)" src=' +\
'.*?<span class="bt-l">(.+?)</span>.*?<span class="bt-r">(\d.\d)?</span>' + \
".*?<span>导演:(.*?)</span>" + ".*?<span>主演:(.*?)</span>" + ".*?<span>类型:(.*?)</span>"+\
".*?<span>地区:(.*?)</span>" + ".*?<span>语言:(.*?)</span>" + ".*?<span>片长:(.*?)</span>"
pattern = re.compile(s,re.S)
items = re.findall(pattern, html)
# print(items)
# print(type(items))
# print(type(html))
for outer in range(len(items)):
items[outer] = list(items[outer])
for i in range(len(items[outer])):
if items[outer][i] == "":
items[outer][i] = "暂无信息"
else:
# pass # ·
items[outer][i] = items[outer][i].replace("·","·")
# print(items)
destinationPath = "items.json"
fd = open(destinationPath,"w+",encoding='utf-8')
json.dump(items,fd)
fd.close()
# 建立下载目录
dir_name = "./images"
if not os.path.exists(dir_name):
os.mkdir(dir_name)
cnt = 0
for item in items:
url = item[0] # 以'/'来分割字符串
file_name = str(cnt) + ".jpg"
cnt += 1
response = requests.get(url, headers=headers)
# 保存
with open(dir_name + "/" + file_name, 'wb') as f:
f.write(response.content) # 将图片写入到文件夹下保存
info = "图片文件: {0:25}{1}".format(file_name," 成功下载...")
print(info)
# print(items)
return items
if __name__ == "__main__":
# pass
mySpider()
运行结果展示:
GUI4Spider.py
制作简易的tkinter GUI 图形化用户交互界面
from spider4taopiaopiao import mySpider
from tkinter import *
import time
from PIL import Image,ImageTk
import json
# sourcePath = "items.json"
# fs = open(sourcePath,"r",encoding='utf-8')
# items = json.load(fs)
# fs.close()
# print(len(items))
# print(items)
items = mySpider()
# 0图片链接 1电影名 2评分 3导演 4主演 5类型 6地区 7语言 8片长
infoMap = {
0:"图片链接:", 1:"电影名:", 2:"评分:", 3:"导演:",
4:"主演:", 5:"类型:", 6:"地区:", 7:"语言:", 8:"片长:"
}
current_rank = 1
total_rank = len(items)
root = Tk()
root.title("淘票票电影热映排行榜,更新时间:"+\
time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
root.geometry('800x800')
root.iconbitmap("movie.ico")
def showPre():
global current_rank,total_rank
if current_rank <= 1:
current_rank = 2
current_rank -= 1
print("显示前一部电影...",current_rank)
labimgconfig()
labInfoConfig()
def showNxt():
global current_rank,total_rank
if current_rank >= total_rank:
current_rank = total_rank-1
current_rank += 1
print("显示后一部电影...",current_rank)
labimgconfig()
labInfoConfig()
def labimgconfig():
filename = "images/" + str(current_rank-1) + ".jpg"
global newImage
newImage = getImage(filename)
labimg.config(image=newImage)
def getImage(filename):
imageJPG = Image.open(filename)
image = ImageTk.PhotoImage(imageJPG)
return image
def labInfoConfig():
info = items[current_rank-1]
for i in range(len(labInfo)):
labInfo[i].config(text=infoMap[i+1]+info[i+1])
labRank.config(text="排名:#" +str(current_rank))
image = getImage("images/0.jpg")
labimg = Label(root) # 设置Widget控件显示的图像
labimg.config(image=image)
labimg.pack() # 包装与定位组件
colors = ["Red","Orange","Yellow","Green","Blue","Violet","Purple","Chocolate"]
labInfo = []
for color in colors:
labtemp = Label(root,bg=color,width=200,height=3,wraplength=1000)
labtemp.pack()
labInfo.append(labtemp)
labRank = Label(root,bg="Red",width=9,height=3,text="排名:#" +str(current_rank))
labRank.pack()
# labName = Label(root,bg="Red",width=50,height=2)
# labName.pack()
# labScore = Label(root,bg="Orange",width=50,height=2)
# labScore.pack()
# labDirector = Label(root,bg="Yellow",width=50,height=2)
# labDirector.pack()
# labActor = Label(root,bg="Green",width=50,height=2)
# labActor.pack()
# labType = Label(root,bg="Blue",width=50,height=2)
# labType.pack()
# labDistrict = Label(root,bg="Violet",width=50,height=2)
# labDistrict.pack()
# labLanguage = Label(root,bg="Purple",width=50,height=2)
# labLanguage.pack()
# labLength = Label(root,bg="Chocolate",width=50,height=2)
# labLength.pack()
btnPre = Button(root,width=15,height=5,text="显示前一个",command=showPre)
btnNxt = Button(root,width=15,height=5,text="显示后一个",command=showNxt)
btnPre.pack(side=LEFT,anchor=S)
btnNxt.pack(side=RIGHT,anchor=S)
showPre()
root.mainloop()
运行结果如下: