我本次爬取的是一批物联网设备ip中的xhr返回json信息,并解析json
得到具体型号。
已有数据:
1.物联网ip和端口
2.网站登录页面F12---->XHR----Ctrl + R ----->headers,获取url
# -*- coding: utf-8 -*-
import requests
from concurrent.futures import ProcessPoolExecutor
import re
import json
urls_list = []
with open('E:/get_js/ip.txt','r') as f:
for line in f:
#print line,
urls_list.append(line.replace('\n', ''))
hea = {'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'}
pool = ProcessPoolExecutor(20)
def request(url):
# response = requests.get(url)
try:
response = requests.get('http://%s'%(url),headers = hea,timeout = 30 ,verify=False)
return response
except Exception as e:
response = requests.get('https://%s'%(url),headers = hea,timeout = 30 ,verify=False)
return response
print e
def read_data(future,*args,**kwargs):
response = future.result()
response1 = response.json()
response.encoding = 'utf-8'
# print(response.status_code,response.url)
# productJSON = response.json.loads(response1)
product = response1["szDeviceName"]+'\n'
for each in product:
# print each
with open('%s.txt'%'productnvr','a') as f:
f.write((each))
def main():
for url in urls_list:
done = pool.submit(request,url)
done.add_done_callback(read_data)
if __name__ == '__main__':
main()
pool.shutdown(wait=True)
f.close()
print '====sucfull===='