git地址:
http://git.oschina.net/juskin/plugin.video.youkukid.git
完整代码如下:
# coding=utf-8
import os, sys
# 配置第三方python包的路径
lib_path = os.path.join(sys.path[0], 'resources', 'lib')
# print lib_path
sys.path.append(lib_path)
# print sys.path
import urllib2
import gzip
import StringIO
import re
import time, random
import json
from xbmcswift2 import Plugin
from bs4 import BeautifulSoup
from bs4 import SoupStrainer
plugin = Plugin()
# 伪装成浏览器访问
UserAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:28.0) Gecko/20100101 Firefox/28.0'
# UserAgent = 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)'
# 亲宝儿歌url地址
# url = 'http://www.youku.com/show_page/id_z750491c6a69911e2b2ac.html'
# 解析url,返回html_doc文档
def Get_Html_Doc(url):
req = urllib2.Request(url)
req.add_header('User-Agent', UserAgent)
response = urllib2.urlopen(req)
html_doc = response.read()
if response.headers.get('content-encoding', None) == 'gzip':
html_doc = gzip.GzipFile(fileobj=StringIO.StringIO(html_doc)).read()
charset = response.headers.getparam('charset')
response.close()
match = re.compile('
if match:
charset = match[0]
else:
match = re.compile('
if match:
charset = match[0]
if charset:
charset = charset.lower()
if (charset != 'utf-8') and (charset != 'utf8'):
html_doc = html_doc.decode(charset, 'ignore').encode('utf-8', 'ignore')
return html_doc
# 获取video各分页的url地址
def Get_Page_Url(url):
html_doc = Get_Html_Doc(url)
soup_div = BeautifulSoup(html_doc, parse_only=SoupStrainer('div', {'class':'pgm-tab'}))
re_data = re.compile('data="(.*)"')
reload_num_list = re_data.findall(str(soup_div))
show_page_id = url [-26:-5]
page_url_list = []
for reload_num in reload_num_list:
page_url_list.append('http://www.youku.com/show_point/id_%s.html?dt=json&divid=%s&tab=0&__rt=1&__ro=%s' % (show_page_id, reload_num, reload_num))
# reload_num_list = []
return page_url_list
# 优酷视频唯一不变的id(XNDQ0ODg2OTMy),http://v.youku.com/v_show/id_XNDQ0ODg2OTMy.html
# 获取视频名称及id,分别作为键值存放到dict里
def Get_Video_Id(url):
video_id_dict = {}
page_url_list = Get_Page_Url(url)
for page_url in page_url_list:
html_doc = Get_Html_Doc(page_url)
soup_link = BeautifulSoup(html_doc, parse_only=SoupStrainer('div', {'class':'link'}))
# 获取video name 和 video id,并组成dict
re_id = re.compile('http://v.youku.com/v_show/id_(.*).html')
id_list = soup_link.find_all('a')
for tag in id_list:
key = tag.get('title')
values = re_id.findall(tag.get('href'))
video_id_dict[key] = values[0]
# id_list = []
# page_url_list = []
return video_id_dict
# 获取视频名称及图像src,分别作为键值存放到dict里
def Get_Video_thumb(url):
video_pic_dict = {}
page_url_list = Get_Page_Url(url)
for page_url in page_url_list:
html_doc = Get_Html_Doc(page_url)
soup_thumb = BeautifulSoup(html_doc, parse_only=SoupStrainer('div', {'class':'thumb'}))
# 获取video name 和 video thumbnail的组成的dict
img_list = soup_thumb.find_all('img')
for tag in img_list:
key = tag.get('alt')
values = tag.get('src')
video_pic_dict[key] = values
# img_list = []
# page_url_list = []
return video_pic_dict
# 创建SID
def Create_Sid():
now_time = int(time.time() * 1000)
# print now_time
# 返回1000~1999之间的随机数,包含边界
random1 = random.randint(1000, 1999)
# print random1
random2 = random.randint(1000, 9999)
# print random2
sid = "%d%d%d" % (now_time, random1, random2)
# print sid
return sid
# 生成混合字符串mixed
def Get_Mix_String(seed):
source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\\:._-1234567890")
mixed = []
while source:
# seed = (seed * 211 + 30031 ) % 65536
seed = (seed * 211 + 30031) & 0xFFFF
index = seed * len(source) >> 16
c = source.pop(index)
mixed += c
return mixed
# 获取真实播放地址
def Get_RealPlay_Url(url):
video_id_dict = Get_Video_Id(url)
real_url_dict = {}
for key, values in video_id_dict.items():
video_id = values
data_json = urllib2.urlopen('http://v.youku.com/player/getPlayList/VideoIDS/' + video_id)
info = json.loads(data_json.read().decode('utf-8'))
# 视频质量信息,标清flv、高清mp4、超清hd2
segs = info['data'][0]['segs']
# types = segs.keys()
seed = info['data'][0]['seed']
mixed = Get_Mix_String(seed)
sid = Create_Sid()
# 获取streamfileids,并拼接成真实url地址
ids = info['data'][0]['streamfileids']['flv'].split('*')[:-1]
vid = ''.join(mixed[int(i)] for i in ids)
for s in segs['flv']:
no = '%02x' % int(s['no'])
real_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%s/st/flv/fileid/%s%s%s?K=%s&ts=%s' % (sid, no, vid[:8], no.upper(), vid[10:], s['k'], s['seconds'])
real_url_dict[key] = real_url
# video_id_dict.clear()
return real_url_dict
# 生成xbmc的item list
def Get_Items(url):
real_url_dict = Get_RealPlay_Url(url)
video_pic_dict = Get_Video_thumb(url)
item = []
for key, values in real_url_dict.items():
# for key1, values1 in video_pic_dict.items():
# if key == key1:
thumbnail_url = video_pic_dict[key]
item.append({'label':key, 'path':values, 'thumbnail':thumbnail_url, 'is_playable':True})
# real_url_dict.clear()
# video_pic_dict.clear()
return item
@plugin.route('/')
def index():
item = [{'label': '亲宝儿歌'.decode('utf-8'), 'path': plugin.url_for('show_qinbao_video'), 'is_playable': False, 'thumbnail':'http://r3.ykimg.com/050E000051B6F860675839709C0377AB'},
{'label': '贝瓦儿歌'.decode('utf-8'), 'path': plugin.url_for('show_beva_video'), 'is_playable': False, 'thumbnail':'http://r3.ykimg.com/050E0000525F416567583974A90C31A9'}
]
return plugin.finish(item)
@plugin.route('/show_qinbao_video/')
def show_qinbao_video():
url = 'http://www.youku.com/show_page/id_z750491c6a69911e2b2ac.html'
item = Get_Items(url)
return plugin.finish(item, sort_methods=['label'])
@plugin.route('/show_beva_video/')
def show_beva_video():
url = 'http://www.youku.com/show_page/id_z440609eae5f311e1a19e.html'
item = Get_Items(url)
return plugin.finish(item, sort_methods=['label'])
if __name__ == '__main__':
plugin.run()
# print Get_Items(url)