主要作用
日常对url进行一些小处理的小脚本
1、去重、
2、url与域名互转、
3、取出主域的url、
代码如下:
#/bin/bash
from encodings.utf_8 import decode
import statistics
import sys
from os import system
import re
from tokenize import Ignore
import urllib3
import requests
import os
#过滤类,覆盖写入
class guolv:
#获取当前路径
def get_path():
path = os.getcwd()
#过滤出主域的url
def mainurl(urls_file):
dir = os.getcwd()
f = open("./main_urls.txt".format(dir), "w",encoding='utf-8',errors='ignore')
with open(urls_file,'r',encoding='utf-8',errors='ignore') as d:
url = []
for i in d.readlines():
i = i.strip('\n')
try:
xieyi = i.split('/',3)[0]
domain = i.split('/',3)[2]
except Exception:
i=i
res = xieyi + r"//" + domain
print(res)
url.append(res)
f.write(res + '\n')
f.close()
return url
#过滤出域名
def domain(urls_file):
f = open("./domains.txt", "w",encoding='utf-8',errors='ignore')
with open(urls_file,'r',encoding='utf-8') as d:
domain = [