# -*- coding: utf-8 -*-
#2345电影排行榜
import requests
from bs4 import BeautifulSoup
#获取网站的通用类
def get_html(url):
try:
r=requests.get(url,timeout=30)
r.raise_for_status()
print(r.encoding)
r.encoding='gbk'
return r.text
except:
print ("wrong")
#下载图片的通用工具类
def get_pic_from_url(url):
#从url以二进制的格式下载图片数据
pic_content = requests.get(url,stream=True).content
open('filename','wb').write(pic_content)
def main(url):
html=get_html(url)
soup=BeautifulSoup(html,'lxml')
#获取这个ul大标签的对象
moves_list=soup.find('ul',attrs={'class':'picList clearfix'});
#获取ul下的所有的li标签
li_list=moves_list.find_all('li')
for li in li_list:
#获取图片的连接
'''
<img οnerrοr="javascript:this.src='//imgwx1.2345.com/dypcimg/tv/newimages/default_poster.jpg'"
src="//imgwx4.2345.com/dypcimg/img/f/66/sup198834_223x310.jpg" alt="壹号别墅" title="壹号别墅" width="130" height="173">
'''
img_src=li.find('img')['src']
#print(img_src)
#获取影片的名字
'''
<span class="sTit"><a href="//dianying.2345.com/detail/195766.html" target="_blank">妈妈咪鸭</a></span>
'''
name=li.find('span',attrs={'class':'sTit'}).a.text
#print (name)
#上映时间
'''
<span class="sIntro">上映时间:2015-05-12</span>
'''
try:
time=li.find('span',attrs={'class':'sIntro'}).text
#print (time)
except:
time='还没上映'
#print ('还没上映')
#角色
'''
<p class="pActor">主演:
<a target="_blank" href="//dianying.2345.com/list/---ZHANGYI5---.html" title="张译">张译</a>
<a target="_blank" href="//dianying.2345.com/list/---HUANGJINGYU---.html" title="黄景瑜">黄景瑜</a>
<a target="_blank" href="//dianying.2345.com/list/---HAIQING---.html" title="海清">海清</a></p>
'''
actors=li.find('p',attrs='pActor')
act=''
for actor in actors:
act+=actor.string+' '
#print (act)
#介绍
'''
<p class="pTxt pIntroShow">简介:作为远达建筑公司的副总监杨维(王健饰),
工作上处处受到上级和同事的打压,家庭中妻子(王妍饰)对其也不尊重。各种的压迫下,导致杨维走上歧途。
将周燕(吕小漫饰)、白亚楠(徐艺涵饰)、沈美玲(刘雨晴饰)分别抓到自己的地窖中,将其虐待......地窖外面
,三位女性的亲人苦苦寻找,白亚楠的父亲白景山(梁岩饰)和周燕...
<a href="javascript:void(0);" target="_self" class="aMore pIntroShowMore">展开全部 <i class="iconfont"></i></a></p>
'''
instroture=li.find('p',attrs={'class':'pTxt pIntroShow'}).text
#print (instroture)
print ('{}\t{}\n{}\n{}\n'.format(name,time,act,instroture))
#下载图片
with open('C:testdata/image/'+name+'.png','wb+') as f:
f.write(requests.get('http:'+img_src).content)
#获取
#print (li_list)
#print (soup)
url='http://dianying.2345.com/top/'
#调用方法
if __name__=='__main__':
main(url)
python爬虫六:爬取电影图片及简介
最新推荐文章于 2024-04-15 16:44:18 发布