#-*- coding: utf8 -*-
from bs4 importBeautifulSoupimportrequests, lxmlfrom urllib.parse importquoteimportredefget_name():while 1:
moviename= input('请输入要查找的电影名\n->')
moviename_quote= quote(moviename.encode('gb2312'))
get_url_from_ygdy(moviename_quote)
get_url_from_bttiantang(moviename)
get_url_from_dytt(moviename_quote)defget_url_from_ygdy(moviename):
baseurl= 'http://s.dydytt.net/plus/search.php?kwtype=0&keyword='url= baseurl +str(moviename)
content= BeautifulSoup(requests.get(url).content.decode('gb2312', 'ignore'), 'lxml')
first_page= content.find_all('td', width="30")
movie_infos= content.find_all('td', width="55%")if movie_infos.__len__() ==0:print('查无此电影,请检查后重试')return
else:print('阳光电影搜索结果:')if first_page.__len__() ==0:for movie_info inmovie_infos:
get_info(movie_info, moviename)else:
last_page_url= first_page[1].find('a').get('href') + '"'pattern= re.compile('PageNo=(.*?)"')
pnt=re.findall(pattern, last_page_url)for i inrange(int(pnt[0])):print('第', i + 1, '页:')
page_url= url + '&PageNo=' + str(i + 1)
pagecontent= BeautifulSoup(requests.get(page_url).content.decode('gb2312', 'ignore'), 'lxml')
movie_infos= pagecontent.find_all('td', width='55%')for movie_info inmovie_infos:
get_info(movie_info, moviename)defget_info(movie_info, name):
movie_url= movie_info.find('a').get('href')
moviename=movie_info.textif '游戏' not in name and '游戏' inmoviename:return
else:print('电影名:', moviename)
url= 'http://www.ygdy8.com' +movie_url
info= BeautifulSoup(requests.get(url).content.decode('gbk', 'ignore'), 'lxml')
download= info.find_all('td', style="WORD-WRAP: break-word")print('下载链接:')if download.__len__() == 1:print(download[0].find('a').string)else:for each in range(download.__len__()):print('链接', each + 1, ':', download[each].find('a').string)print('\n')defget_url_from_bttiantang(moviename):
baseurl= 'http://www.bttiantang.com/s.php?q=' +str(moviename)
page_content= requests.get(baseurl).content.decode('utf8', 'ignore')
pattern= re.compile('条(.*?)')
pagenum_info=re.findall(pattern, page_content)
page_content= BeautifulSoup(page_content, 'lxml')
content= page_content.find_all('p', class_="tt cl")if content.__len__() ==0:print('查无此电影,请检查后重试')return
else:print('BT天堂搜索结果:')if pagenum_info.__len__() ==0:for each incontent:
get_movieinfo(each, moviename)else:for i inrange(int(pagenum_info[0])):print('第', i + 1, '页:')
page_url= baseurl + '&PageNo=' + str(i + 1)
page_content= BeautifulSoup(requests.get(page_url).content.decode('utf8', 'ignore'), 'lxml')
content= page_content.find_all('p', class_="tt cl")for each incontent:
get_movieinfo(each, moviename)defget_movieinfo(movie_content, name):
url= 'http://www.bttiantang.com/' + movie_content.find('a').get('href')
moviename=movie_content.textif '游戏' not in name and '游戏' inmoviename:return
print('电影名:', moviename)
info= BeautifulSoup(requests.get(url).content.decode('utf8', 'ignore'), 'lxml')
links= info.find_all('div', class_='tinfo')print('下载链接:')
i=0for each inlinks:
i+= 1
print('链接' + str(i) + ':')print('http://www.bttiantang.com' + each.find('a').get('href'))defget_url_from_dytt(moviename):
baseurl= 'http://www.dytt.com/search.asp?searchword=' +str(moviename)
content= requests.get(baseurl).content.decode('gbk', 'ignore')
pattern= re.compile('下一页.*?href.*?page=(.*?)&')
result=re.findall(pattern, content)
content= BeautifulSoup(content, 'lxml')
items= content.find_all('p', class_='s1')if items.__len__() == 1:print('查无此电影,请检查后重试')return
else:print('电影淘淘搜索结果:')if result.__len__() ==0:for i in range(items.__len__() - 1):
get_movieinfo_from_dytt(items[i+ 1], moviename)else:for i inrange(int(result[0])):print('第', i + 1, '页:')
url= baseurl + '&page=' + str(i + 1)
page_content= BeautifulSoup(requests.get(url).content.decode('gbk', 'ignore'), 'lxml')
items= page_content.find_all('p', class_='s1')for i in range(items.__len__() - 1):
get_movieinfo_from_dytt(items[i+ 1], moviename)defget_movieinfo_from_dytt(item, name):
moviename= item.find('a').text
movieurl= 'http://www.dytt.com' + item.find('a').get('href')if '游戏' not in name and '游戏' inmoviename:return
print('电影名:', moviename)
pagecontent= requests.get(movieurl).content.decode('gbk', 'ignore')
links= re.findall(re.compile('ed2k:(.*?)\|/'), pagecontent)
i=0print('下载链接:')if links.__len__() !=0:for link inlinks:
i+= 1
print('链接' + str(i) + ':', 'ed2k://|file|' + link + '|/')else:
links= re.findall(re.compile('http:(.*?)torrent'), pagecontent)if links.__len__() !=0:for link inlinks:
i+= 1
print('链接' + str(i) + ':', 'http:' + link + 'torrent')else:
links= re.findall(re.compile('ftp:(.*?)mkv'), pagecontent)for link inlinks:
i+= 1
print('链接' + str(i) + ':', 'ftp:' + link + 'mkv')if __name__ == '__main__':
get_name()