# _*_ coding: utf-8 _*_
import requests
import sys
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36" }##去重方法
def distinct_data():
##读取txt中文档的url列表datalist_blank=[]
pathtxt = 'H:/Request.txt'
with open(pathtxt) as f:
f_data_list=f.readlines() #d得到的是一个list类型forain f_data_list:
datalist_blank.append(a.strip())#去掉\n strip去掉头尾默认空格或换行符# print(datalist)data_dict={}
for data in datalist_blank:
#print(type(data),data,'\n')
#print(data.split('/'),'\n',data.split('/').index('m'),'\n')
#url中以/为切分,在以m为切分
##把m后面的值放进字典key的位置,利用字典特性去重
if int(data.split('/').index('m')) == 4 :
#此处为v6开头的url
#print(data