今日份工作需要,记录一下。
TXT文档去重
line_seen=set()#初始化空的无序集合
in_file=open('IP.txt','r')
out_file=open('EIP.txt','w')
lines=in_file.readlines()
for line in lines:
if line not in line_seen:
print(line)
out_file.write(line)
line_seen.add(line)
in_file.close()
out_file.close()
看下效果
接口返回状态码探测
import requests
import random
user_agents = ['Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11']
f=open("url.txt","r")
lines = f.readlines()
for url in lines:
headers = {'User-Agent': random.choice(user_agents)}
response = requests.get(url,headers = headers)
response.enconding = "utf-8"
if str(response) == '<Response [404]>':
print(url+'请求404')
if str(response) == '<Response [200]>':
print(url+'请求200')
看下效果