爬取网址:https://v.paixin.com/
声明:本篇博客只是为了分享技术,严禁转载用于商用,后果自负。
抓取步骤:
1.输入关键词;此处输入‘打架’
2.获取ID
直接上代码:
import requests
import spider_tools as t
import time
header=t.str2dict('''
Content-Type: application/json;charset=UTF-8
Origin: https://v.paixin.com
User-Agent: Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36
''')
for a in range(38,107):
print(a)
url='https://api2.paixin.com/medias/1/search?page={}&size=80'.format(a)
format={
'searchQuery': "打架",
'type': "6"
}
try:
response=requests.post(url,headers=header,json=format,proxies=t.get_ip(),timeout=10).json()
# print(response.text)
datalist=response.get('elements')
for i,data in enumerate(datalist):
id=data.get('image')
url='https:{}'.format(id)
print(url)
t.get_photo(url,image_path=r"C:\Users\Administrator\Desktop\image\{}.jpg".format(time.time()))
except:
pass