python软件安装链接电视_Python3.6实现根据电影名称（支持电视剧名称），获取下载链接的方法...-CSDN博客

本文链接：https://blog.csdn.net/weixin_30717635/article/details/113518315

摘要：这篇Python开发技术栏目下的“Python3.6实现根据电影名称(支持电视剧名称)，获取下载链接的方法”，介绍的技术点是“python3.6、Python3、电视剧名称、下载链接、电影名称、3.6”，希望对大家开发技术学习和问题解决有帮助。这篇文章主要介绍了Python3.6实现根据电影名称(支持电视剧名称)，获取下载链接的方法,涉及Python爬虫与正则相关操作技巧,需要的朋友可以参考下

本文实例讲述了Python3.6实现根据电影名称(支持电视剧名称)，获取下载链接的方法。分享给大家供大家参考，具体如下：

做个笔记

(python 3.6，django 2.0)

def get_url(outer_order_id):

refundId = get_refundId(outer_order_id)

host_url = 'http://test.shequ.com/order/agreeRefund?'

reason_list = ['商品已售完','重复订单','没有骑手接单','联系不上顾客','顾客需要重新下单']

reason = random.choice(reason_list)

api_data = {

'reason':reason,

'refundId':refundId,

'sendType':0

}

url = host_url + parse.urlencode(api_data)

return url

print(get_url('3086123456'))

http://test.shequ.com/order/agreeRefund?reason=%E9%87%8D%E5%A4%8D%E8%AE%A2%E5%8D%95&refundId=1170611&sendType=0

# -*- coding: utf-8 -*-

import urllib

from bs4 import BeautifulSoup

import re

#访问url，返回html页面

def get_html(url):

req = urllib.request.Request(url)

req.add_header('User-Agent','Mozilla/5.0')

response = urllib.request.urlopen(url)

html = response.read()

return html

def get_movie_url(movie_name):#根据电影名称，生成搜索结果的URL

host_url = 'http://s.dydytt.net/plus/search.php?kwtype=0&keyword='

movie_sign = urllib.parse.quote(movie_name.encode('GBK'))

search_url = host_url + movie_sign

return search_url

#从搜索结果页面，提取电影的详情页面链接,存入列表返回

def get_movie_list(url):

m_list = []

html = get_html(url)

soup = BeautifulSoup(html,'html.parser')

fixed_html = soup.prettify()

a_urls = soup.find_all('a')

host = "http://www.ygdy8.com"

for a_url in a_urls:

m_url = a_url.get('href')

m_url = str(m_url)

if re.search(r'\d{8}',m_url) and (host not in m_url):

m_list.append(host + m_url)

return m_list

#从电影详情页面中获取电影标题

def get_movie_title(html):

soup=BeautifulSoup(html,'html.parser')

fixed_html=soup.prettify()

title=soup.find('h1')

title=title.string

return title

#从电影详情页面中获取此页面所有的的下载链接

def get_movie_download_url(html):

soup = BeautifulSoup(html,'html.parser')

fixed_html = soup.prettify()

td = soup.find_all('td',attrs={'style':'WORD-WRAP: break-word'})

down_urls = []

for t in td:

down_urls.append(t.a.get('href'))

return down_urls

#传入电影列表，获取每个电影的下载地址

def get_movie(movie_list):

movie_dict = {}

for i in range(0,len(movie_list)):

html = get_html(movie_list[i])

html = html.decode('GBK','ignore') #忽略编码错误

m_title = get_movie_title(html)

if u'游戏' not in m_title: #过滤游戏

if u'动画' not in m_title: #过滤动画片

m_url_list = get_movie_download_url(html)

for m_url in m_url_list:

movie_dict[m_url] = m_title

return movie_dict

用django展现在页面效果如下：

另一个网站的

# -*- coding: utf-8 -*-

from xpinyin import Pinyin

from bs4 import BeautifulSoup

from urllib import request,error

import time,re

import ssl

ssl._create_default_https_context = ssl._create_unverified_context #关闭https协议验证证书

def get_html(url): #访问url，返回html页面,如果url错误，则返回状态码,一般是404

req = request.Request(url)

req.add_header('User-Agent','Mozilla/5.0')

try:

response = request.urlopen(url)

html = response.read()

return html

except error.HTTPError as e:

return e.code

def get_m_html(movie_name):#根据电影名称，返回正确的电影html

pin = Pinyin()

pinyin_movie_name = pin.get_pinyin(movie_name,"")#不使用分隔符，默认是-

movie_type = {

"Sciencefiction":"科幻片",

"Horror" :"恐怖片",

"Drama" :"剧情片",

"Action" :"动作片",

"Comedy" :"喜剧片",

"Love" :"爱情片",

"War" :"战争片"

}

host = "https://www.kankanwu.com"

for k,v in movie_type.items():

movie_url = host + "/" + k + "/" + pinyin_movie_name + "/"

html = get_html(movie_url)

if isinstance(html,int):

time.sleep(10)

else:

return html

def get_dload_url(html): #从电影html页面中获取下载地址

movie_dict = {}

soup = BeautifulSoup(html,'lxml')

fixed_html = soup.prettify()

a_urls = soup.find_all(href=re.compile("thunder"))#找到含有thunder链接的href

for url in a_urls:

m_title = url.get('title')

m_url = url.get('href')

movie_dict[m_title] = m_url

return movie_dict

希望本文所述对大家Python程序设计有所帮助。