ins图片视频批量下载
ins图片视频批量下载
自己写的一个ins图片和视频的下载工具,自用还可以,和大家交流一下
import requests
import re
import os
from urllib.request import urlretrieve
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
#--------------- 常数区 ---------------------
res=requests.session()
dic=[]
cookie=open('cookie.txt','r').read()#把cookie写到cookie.txt文件里
h={
'cookie': cookie
}
#--------------- 函数区 ----------------------
def get_id_end(name):
x=res.get('https://www.instagram.com/'+name+'/',headers=h).text
id=re.findall('profilePage_([^"]+)',x)[0]
end=re.findall('end_cursor":"([^"]+)',x)[0]
return [id,end]
def get_first(name,id,end):
if os.path.exists(name) == False:
os.makedirs(name)
bfurl = 'https://www.instagram.com/graphql/query/?query_hash=d496eb541e5c789274548bf473cc553e&variables=%7B%22id%22%3A%22'+id+'%22%2C%22first%22%3A1%2C%22before%22%3A%22' + end + '%22%7D'
x=res.get(bfurl,headers=h).json()['data']['user']['edge_owner_to_timeline_media']
end=x['page_info']['end_cursor']
data = x['edges']
for i in data:
videoy = i['node']['is_video']
try:
text = re.sub('https://\S+', '',i['node']['edge_media_to_caption']['edges'][0]['node']['text'].replace("\n", "").replace(":","").replace("\'","").replace('\"','').replace('/',''))
if len(text) > 100:
text = i['node']['edge_media_to_caption']['edges'][0]['node']['text'].split(' ')[0].replace("\n", "").replace(":","").replace("\'","").replace('\"','').replace('/','')
except:
text = "No"
picid = i['node']['id']
if videoy == False:
try:
pics = i['node']['edge_sidecar_to_children']['edges']
j = 0
for pic in pics:
src = pic['node']['display_resources'][-1]['src']
picname = name+'/'+text + '-' + picid + '-' + str(j) + '.jpg'
download(src,picname)
j += 1
except:
src = i['node']['display_resources'][-1]['src']
picname =name+'/'+ text + '-' + picid + '.jpg'
download(src, picname)
else:
video = i['node']['video_url']
videoname = name+'/'+text + '-' + picid + '.mp4'
download(video, videoname)
print(picid,"下载完成")
return end
def get_over(name,id,end):
l=48
while l==48:
afurl='https://www.instagram.com/graphql/query/?query_hash=d496eb541e5c789274548bf473cc553e&variables=%7B%22id%22%3A%22'+id+'%22%2C%22first%22%3A48%2C%22after%22%3A%22' + end + '%22%7D'
x = res.get(afurl,headers=h).json()['data']['user']['edge_owner_to_timeline_media']
end = x['page_info']['end_cursor']
data = x['edges']
for i in data:
videoy=i['node']['is_video']
try:
text = re.sub('https://\S+', '',i['node']['edge_media_to_caption']['edges'][0]['node']['text'].replace("\n", "").replace("\:","").replace("\'","").replace('\"','').replace('/',''))
if len(text)>100:
text=i['node']['edge_media_to_caption']['edges'][0]['node']['text'].split(' ')[0].replace("\n", "").replace("\:","").replace("\'","").replace('\"','').replace('/','')
except:
text="No"
picid=i['node']['id']
if videoy==False:
try:
pics = i['node']['edge_sidecar_to_children']['edges']
j=0
for pic in pics:
src = pic['node']['display_resources'][-1]['src']
picname=name+'/'+text+'-'+picid+'-'+str(j)+'.jpg'
download(src, picname)
j+=1
except:
src = i['node']['display_resources'][-1]['src']
picname = name+'/'+text+'-'+picid + '.jpg'
download(src,picname)
else:
video=i['node']['video_url']
videoname =name+'/'+ text+'-'+picid + '.mp4'
download(video,videoname)
l = len(data)
def download(file_link,file_name):
if os.path.exists(file_name) == False:
try:
urlretrieve(file_link, file_name)
print((file_name,'下载完成'))
except:
print(file_name,'下载失败')
dic.append(file_link)
else:
pass
if __name__ == '__main__':
name=input('输入待爬取账号')
uu=get_id_end(name)#获取id 和 after
end=get_first(name,uu[0],uu[1]) # 获取id之前的媒体文件
get_over(name,uu[0],end)# 获取id之后的媒体文件
老魏的公众号,会发一些自己的爬虫心得和爬虫案例,大家一起交流,感兴趣的朋友可以关注一下

1863

被折叠的 条评论
为什么被折叠?



