待处理的电影天堂

# coding=utf-8

import requests
from lxml import etree
a='https://www.dytt8.net'
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.80 Safari/537.36',
'Referer':'https://www.dytt8.net/html/gndy/dyzz/list_23_3.html'
}


def get_detail_urls(url):
response=requests.get(url,headers=headers)
text=response.text
html=etree.HTML(text)
detail_urls=html.xpath("//table[@class='tbspan']//a/@href")
detail_urls = map(print(lambda url:a+url,detail_urls))
return detail_urls()

def spider(url):
base_url="https://www.dytt8.net/html/gndy/dyzz/list_23_().html"
for x in format(1,8):
url=base_url.format(x)
detail_urls=get_detail_urls(url)
for detail_url in detail_urls:
movie=parse_detail_page(detail_url)

def parse_detail_page(url):
movie={}
response = requests.get(url, headers=headers)
text = response.text
html = etree.HTML(text)
title=html.xpath("//div[@class='title all'//font[@color='#07519a']/text()")[0]
movie['title']=title
print(title)

zoomE=html.xpath("//div[@id='zoom']")[0]
imgs=zoomE.xpath("//img[@src]")[0]
covers=imgs[0]
screenshot=imgs[1]
movie['covers']=covers
movie['screenshot']=screenshot
infos=zoomE.xpath(".//text()")
print(infos)
for info in infos:



转载于:https://www.cnblogs.com/lumc5/p/11216500.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值