python下载电影链接_python 电影下载链接爬虫

#-*- coding: utf8 -*-

from bs4 importBeautifulSoupimportrequests, lxmlfrom urllib.parse importquoteimportredefget_name():while 1:

moviename= input('请输入要查找的电影名\n->')

moviename_quote= quote(moviename.encode('gb2312'))

get_url_from_ygdy(moviename_quote)

get_url_from_bttiantang(moviename)

get_url_from_dytt(moviename_quote)defget_url_from_ygdy(moviename):

baseurl= 'http://s.dydytt.net/plus/search.php?kwtype=0&keyword='url= baseurl +str(moviename)

content= BeautifulSoup(requests.get(url).content.decode('gb2312', 'ignore'), 'lxml')

first_page= content.find_all('td', width="30")

movie_infos= content.find_all('td', width="55%")if movie_infos.__len__() ==0:print('查无此电影,请检查后重试')return

else:print('阳光电影搜索结果:')if first_page.__len__() ==0:for movie_info inmovie_infos:

get_info(movie_info, moviename)else:

last_page_url= first_page[1].find('a').get('href') + '"'pattern= re.compile('PageNo=(.*?)"')

pnt=re.findall(pattern, last_page_url)for i inrange(int(pnt[0])):print('第', i + 1, '页:')

page_url= url + '&PageNo=' + str(i + 1)

pagecontent= BeautifulSoup(requests.get(page_url).content.decode('gb2312', 'ignore'), 'lxml')

movie_infos= pagecontent.find_all('td', width='55%')for movie_info inmovie_infos:

get_info(movie_info, moviename)defget_info(movie_info, name):

movie_url= movie_info.find('a').get('href')

moviename=movie_info.textif '游戏' not in name and '游戏' inmoviename:return

else:print('电影名:', moviename)

url= 'http://www.ygdy8.com' +movie_url

info= BeautifulSoup(requests.get(url).content.decode('gbk', 'ignore'), 'lxml')

download= info.find_all('td', style="WORD-WRAP: break-word")print('下载链接:')if download.__len__() == 1:print(download[0].find('a').string)else:for each in range(download.__len__()):print('链接', each + 1, ':', download[each].find('a').string)print('\n')defget_url_from_bttiantang(moviename):

baseurl= 'http://www.bttiantang.com/s.php?q=' +str(moviename)

page_content= requests.get(baseurl).content.decode('utf8', 'ignore')

pattern= re.compile('条(.*?)')

pagenum_info=re.findall(pattern, page_content)

page_content= BeautifulSoup(page_content, 'lxml')

content= page_content.find_all('p', class_="tt cl")if content.__len__() ==0:print('查无此电影,请检查后重试')return

else:print('BT天堂搜索结果:')if pagenum_info.__len__() ==0:for each incontent:

get_movieinfo(each, moviename)else:for i inrange(int(pagenum_info[0])):print('第', i + 1, '页:')

page_url= baseurl + '&PageNo=' + str(i + 1)

page_content= BeautifulSoup(requests.get(page_url).content.decode('utf8', 'ignore'), 'lxml')

content= page_content.find_all('p', class_="tt cl")for each incontent:

get_movieinfo(each, moviename)defget_movieinfo(movie_content, name):

url= 'http://www.bttiantang.com/' + movie_content.find('a').get('href')

moviename=movie_content.textif '游戏' not in name and '游戏' inmoviename:return

print('电影名:', moviename)

info= BeautifulSoup(requests.get(url).content.decode('utf8', 'ignore'), 'lxml')

links= info.find_all('div', class_='tinfo')print('下载链接:')

i=0for each inlinks:

i+= 1

print('链接' + str(i) + ':')print('http://www.bttiantang.com' + each.find('a').get('href'))defget_url_from_dytt(moviename):

baseurl= 'http://www.dytt.com/search.asp?searchword=' +str(moviename)

content= requests.get(baseurl).content.decode('gbk', 'ignore')

pattern= re.compile('下一页.*?href.*?page=(.*?)&')

result=re.findall(pattern, content)

content= BeautifulSoup(content, 'lxml')

items= content.find_all('p', class_='s1')if items.__len__() == 1:print('查无此电影,请检查后重试')return

else:print('电影淘淘搜索结果:')if result.__len__() ==0:for i in range(items.__len__() - 1):

get_movieinfo_from_dytt(items[i+ 1], moviename)else:for i inrange(int(result[0])):print('第', i + 1, '页:')

url= baseurl + '&page=' + str(i + 1)

page_content= BeautifulSoup(requests.get(url).content.decode('gbk', 'ignore'), 'lxml')

items= page_content.find_all('p', class_='s1')for i in range(items.__len__() - 1):

get_movieinfo_from_dytt(items[i+ 1], moviename)defget_movieinfo_from_dytt(item, name):

moviename= item.find('a').text

movieurl= 'http://www.dytt.com' + item.find('a').get('href')if '游戏' not in name and '游戏' inmoviename:return

print('电影名:', moviename)

pagecontent= requests.get(movieurl).content.decode('gbk', 'ignore')

links= re.findall(re.compile('ed2k:(.*?)\|/'), pagecontent)

i=0print('下载链接:')if links.__len__() !=0:for link inlinks:

i+= 1

print('链接' + str(i) + ':', 'ed2k://|file|' + link + '|/')else:

links= re.findall(re.compile('http:(.*?)torrent'), pagecontent)if links.__len__() !=0:for link inlinks:

i+= 1

print('链接' + str(i) + ':', 'http:' + link + 'torrent')else:

links= re.findall(re.compile('ftp:(.*?)mkv'), pagecontent)for link inlinks:

i+= 1

print('链接' + str(i) + ':', 'ftp:' + link + 'mkv')if __name__ == '__main__':

get_name()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值