ins图片视频批量下载

ins图片视频批量下载

ins图片视频批量下载

自己写的一个ins图片和视频的下载工具,自用还可以,和大家交流一下

import requests
import re
import os
from urllib.request import urlretrieve
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
#---------------     常数区 ---------------------
res=requests.session()
dic=[]
cookie=open('cookie.txt','r').read()#把cookie写到cookie.txt文件里
h={
   'cookie': cookie
}
#---------------    函数区 ----------------------
def get_id_end(name):
	
    x=res.get('https://www.instagram.com/'+name+'/',headers=h).text
    id=re.findall('profilePage_([^"]+)',x)[0]
    end=re.findall('end_cursor":"([^"]+)',x)[0]
    return [id,end]

def get_first(name,id,end):
    if os.path.exists(name) == False:
        os.makedirs(name)
    bfurl = 'https://www.instagram.com/graphql/query/?query_hash=d496eb541e5c789274548bf473cc553e&variables=%7B%22id%22%3A%22'+id+'%22%2C%22first%22%3A1%2C%22before%22%3A%22' + end + '%22%7D'
    x=res.get(bfurl,headers=h).json()['data']['user']['edge_owner_to_timeline_media']
    end=x['page_info']['end_cursor']
    data = x['edges']
    for i in data:
        videoy = i['node']['is_video']
        try:
            text = re.sub('https://\S+', '',i['node']['edge_media_to_caption']['edges'][0]['node']['text'].replace("\n", "").replace(":","").replace("\'","").replace('\"','').replace('/',''))
            if len(text) > 100:
                text = i['node']['edge_media_to_caption']['edges'][0]['node']['text'].split(' ')[0].replace("\n", "").replace(":","").replace("\'","").replace('\"','').replace('/','')
        except:
            text = "No"
        picid = i['node']['id']
        if videoy == False:
            try:
                pics = i['node']['edge_sidecar_to_children']['edges']
                j = 0
                for pic in pics:
                    src = pic['node']['display_resources'][-1]['src']
                    picname = name+'/'+text + '-' + picid + '-' + str(j) + '.jpg'
                    download(src,picname)
                    j += 1
            except:
                src = i['node']['display_resources'][-1]['src']
                picname =name+'/'+ text + '-' + picid + '.jpg'
                download(src, picname)
        else:
            video = i['node']['video_url']
            videoname = name+'/'+text + '-' + picid + '.mp4'
            download(video, videoname)
        print(picid,"下载完成")
    return end

def get_over(name,id,end):
    l=48
    while l==48:
        afurl='https://www.instagram.com/graphql/query/?query_hash=d496eb541e5c789274548bf473cc553e&variables=%7B%22id%22%3A%22'+id+'%22%2C%22first%22%3A48%2C%22after%22%3A%22' + end + '%22%7D'
        x = res.get(afurl,headers=h).json()['data']['user']['edge_owner_to_timeline_media']
        end = x['page_info']['end_cursor']

        data = x['edges']

        for i in data:
            videoy=i['node']['is_video']
            try:
                text = re.sub('https://\S+', '',i['node']['edge_media_to_caption']['edges'][0]['node']['text'].replace("\n", "").replace("\:","").replace("\'","").replace('\"','').replace('/',''))
                if len(text)>100:
                    text=i['node']['edge_media_to_caption']['edges'][0]['node']['text'].split(' ')[0].replace("\n", "").replace("\:","").replace("\'","").replace('\"','').replace('/','')
            except:
                text="No"
            picid=i['node']['id']
            if videoy==False:
                try:
                    pics = i['node']['edge_sidecar_to_children']['edges']
                    j=0
                    for pic in pics:
                        src =  pic['node']['display_resources'][-1]['src']
                        picname=name+'/'+text+'-'+picid+'-'+str(j)+'.jpg'
                        download(src, picname)
                        j+=1
                except:
                    src = i['node']['display_resources'][-1]['src']
                    picname = name+'/'+text+'-'+picid  + '.jpg'
                    download(src,picname)
            else:
                video=i['node']['video_url']
                videoname =name+'/'+ text+'-'+picid  + '.mp4'
                download(video,videoname)
        l = len(data)

def download(file_link,file_name):
    if os.path.exists(file_name) == False:
        try:
            urlretrieve(file_link, file_name)
            print((file_name,'下载完成'))
        except:
            print(file_name,'下载失败')
            dic.append(file_link)
    else:
        pass

if __name__ == '__main__':
    name=input('输入待爬取账号')
    uu=get_id_end(name)#获取id 和 after 
    end=get_first(name,uu[0],uu[1]) #  获取id之前的媒体文件
    get_over(name,uu[0],end)#  获取id之后的媒体文件

老魏的公众号,会发一些自己的爬虫心得和爬虫案例,大家一起交流,感兴趣的朋友可以关注一下
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值