电影天堂python脚本

转载请注明作者和出处: https://www.nb-spring.cn/a/gongsixinwen/20190310/53.html
CSDN同步更新地址:https://blog.csdn.net/yml1984/article/details/88375021
知乎同步更新地址:https://zhuanlan.zhihu.com/p/58848188
Github代码获取:https://github.com/311111a/python-/blob/master/
Python版本: Python3.x
运行平台: Windows
IDE: Notepad++
文章首发地址:https://www.nb-spring.cn/

import requests #导入requests库
from lxml import etree
from lxml import html
import csv

movieUrls =[]
urls=[]
start_url='https://www.dygod.net/html/gndy/dyzz/index.html'
def url_link():                                  #定义函数    
    urls.append(start_url)
    for k in range(1,3):
        url='https://www.dygod.net/html/gndy/dyzz/index_'+str(k)+'.html'
        urls.append(url)
        
def html_heat(urls):                                  #定义函数    
    for index4,index3 in enumerate(urls):
        heat=requests.get(index3)                     #下载网站内容
        heat.encoding =heat.apparent_encoding#解决乱码
        soup1=html.fromstring(heat.text)
        news=soup1.xpath('//*[@class="ulink"]/@href')
        for j in news:
            m='https://www.dygod.net/'+j
            movieUrls.append(m)

def movie(movieUrls):                                  #定义函数
    for index1,index in enumerate(movieUrls):
        heat1=requests.get(index)                     #下载网站内容
        heat1.encoding =heat1.apparent_encoding#解决乱码
        soup2=html.fromstring(heat1.text)
        result2=soup2.xpath('//table[2]//tbody//tr//td//a/@href')
        result3=soup2.xpath('//div/div[3]/div/div[4]/div[1]/h1/text()')
        for link in result2:
            for name in result3:
                item={                          #将获取的结果存储为字典
                    "name":name,
                    "link":link
                }
                save_result(item)               #每次获取一个结果后,存储一次
                item.clear()                    #存储后清空字典,为下次存储做准备

def save_result(item):                      #存储结果
    with open('dytt.csv','a',newline='',encoding='utf-8') as csvfile:   #打开一个csv文件,用于存储
        fieldnames=['name','link']
        writer=csv.DictWriter(csvfile,fieldnames=fieldnames)
        writer.writerow(item)

def main():                                          #主程序
    with open('dytt.csv','a',newline='') as csvfile:   #写入表头
        writer=csv.writer(csvfile)            
        writer.writerow(['name','link'])
    headers = {
    'User_Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',}
    url_link()
    html_heat(urls)
    movie (movieUrls)
if __name__ == '__main__':     #运行主程序
    main()

转发自宁波斯谱瑞环保科技有限公司(www.nb-spring.cn

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值