code:
# !/usr/bin/env python
# -*-coding:utf-8-*-
# date :2021/3/10 22:23
# author:Sabo
import requests
from bs4 import BeautifulSoup
import os
vedioPath = 'F:/麻辣烫耙耳朵'
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.104 Safari/537.36',
'Connection': 'close'
}
def getUrls(origin):
dstLinks = []
response = requests.get(url=origin)
if response.status_code == 200:
response.encoding = 'utf-8'
txt = response.text
mainpage = BeautifulSoup(txt, 'html.parser')
urlLinks = mainpage.find_all('div', attrs={"class": "item-cell"})
lenOfLinks = urlLinks.__len__()
i = 0
for i in range(0, lenOfLinks):
aTag = urlLinks[i].find_all('a')
url = aTag[0].get('href')
ans = origin.replace('index.shtml', url)
dstLinks.append(ans)
else:
print('Error!')
return dstLinks
def getTitle(title):
title = title.__str__()
title = title.strip()
indexBegin = title.find('>')
lastBegin = title.find('<', indexBegin)
title = title.__str__()
ans = title[indexBegin + 1: lastBegin]
ans = ans.strip()
title = ans
return title
def getUrl(origin):
newUrl = ''
response = requests.get(url=origin, headers=header)
if response.status_code == 200:
response.encoding = 'utf-8'
txt = response.text
mainPage = BeautifulSoup(response.text, 'html.parser')
first = mainPage.find("div", attrs={"class": "img"}).find('a')
web = first.get("href")
dstStr = web[2:len(web)]
newUrl = origin.replace('index.shtml', dstStr) # 拿到了目的网址
else:
print('Error!')
return newUrl
def paserVedio(vedioAddress):
commond = 'you-get -i {}'.format(vedioAddress)
os.system(commond)
def download(vedioName, vedioAddress):
commond = 'you-get -o {0} -O {1} "{2}"'.format(vedioPath, vedioName, vedioAddress)
print(commond)
os.system(commond)
def getVedioName(website):
vedioName = ''
response = requests.get(url=website, timeout=1)
if response.status_code == 200:
response.encoding = 'utf-8'
childrenPage = BeautifulSoup(response.text, 'html.parser')
title = childrenPage.find("div", attrs={'class':"videoTitle container turn-off"})
vedioName = getTitle(title)
else:
print('Error!')
return vedioName
if __name__ == '__main__':
origin = 'http://show.sctv.com/mlt/index.shtml'
# origin = ''
dstUrl = getUrls(origin)
for link in dstUrl:
vedioAddress = link
vedioName = getVedioName(link)
if vedioName is not '':
paserVedio(vedioAddress)
download(vedioName, vedioAddress)
else:
print('No vedio name ,can\'t download the vedio!')