#!/usr/bin/pytyon
# -*- coding:utf-8 -*-
import re
import urllib
vurl = 'http://www.enet.com.cn/eschool/video/c++/'
domain = 'http://www.enet.com.cn'
def getVideoList(vurl):
#find all c++ url
html = urllib.urlopen(vurl).read()
reg = r'href="(/eschool/video/autohtml/310/.*?.shtml)".*>(.*?)<\/a>'
videoRe = re.compile(reg)
videoList = re.findall(videoRe, html)
videoListCount = len(videoList)
x = 0
# iteration c++ url
for urlInfo in videoList:
url = urlInfo[0]
# find real c++ url
url = domain+url
html = urllib.urlopen(url).read()
reg = r'(http:\/\/images\.enet\.com\.cn\/eschool\/c\+\+\/.*?\.swf)'
videoRe = re.compile(reg)
swfUrl = re.findall(videoRe, html)
#print swfUrl
# name
reg = r'c\+\+\/(.*?)\/.*?\.swf'
videoRe = re.compile(reg)
videoName = re.findall(videoRe, swfUrl[0])[0].replace('/','_')+'_'+urlInfo[1].replace(':','').replace(' ','_').replace('.','_')+'.swf'
urllib.urlretrieve(swfUrl[0], videoName)
x = x + 1
ratio = x*100/videoListCount
print videoName,' to --> ',x,' ==> ',videoListCount
print '[','#'*ratio,' '*(100-ratio),'] ',ratio,'%'
#exit()
else:
print 'endding!!'
getVideoList(vurl)
python抓取c++视频示例
最新推荐文章于 2022-04-18 13:52:30 发布