包含使用python3读取json文件,写入json文件,读取csv文件,写入csv文件,读取pickle,写入pickle,还有读取指定文件夹下的所有文件,复制文件到指定路径,创建文件夹,多进程处理列表数据,这里也算做个记录这样下次用到的时候就不用再次查找了
# -*- coding: utf-8 -*-
# 存放常用的操作函数
import pickle
import csv
import codecs
import os
import multiprocessing as mp
import sys
csv.field_size_limit(500 * 1024 * 1024)
import json
def to_number(s):
#判断是否是数字,返回字符串转化的数字
try:
if '.' in s:
float(s)
return float(s)
else:
int(s)
return int(s)
except ValueError:
return s
def get_input_args_dict(input_args=['']):
# 输入参数,获取返回的参数字典
args_dict = {}
count = 0
for each_str in input_args:
if each_str.startswith('--'):
args_dict.update({each_str[2:]: to_number(input_args[count + 1])})
count += 1
return args_dict
def write_dict2json(dictdata, save_path="test_dict.json"):
# 字典保存为json文件
try:
json_str = json.dumps(dictdata)
if dictdata == {}:
return
with open(save_path, 'w') as f:
json.dump(dictdata, f, indent=4)
except Exception as e:
print("write_dict2json Error")
def load_jsondata(file_path='FARE_result0.225.pkl.json'):
# 读取json文件
with open(file_path, 'r', encoding='utf8')as fp:
json_data = json.load(fp)
return json_data
def get_data_path(dir="E:\\avclass\\behavior\\behavior", ftype=''):
# 获取指定文件下所有文件的全路径,返回一个列表
g = os.walk(dir)
print("正在读取" + dir + "下的所有文件路径名")
print("读取的文件后缀名:" + ftype)
result_ls = []
for path, d, filelist in g:
for filename in filelist:
if filename.endswith(ftype):
final_path = os.path.join(path, filename)
final_path=final_path.replace('\\','/')
#统一换成/结尾
result_ls.append(final_path)
return result_ls
def load_picklefile(filename):
# 读取pickle文件
if not filename.endswith('.pkl'):
filename = filename + '.pkl'
pickfile = open(filename, 'rb')
listfile = pickle.load(pickfile)
return listfile
# -*-coding:utf-8 -*-
def write_to_pickle(dictdata, filename):
# 把dict_data以pickle的形式保存
if not filename.endswith('.pkl'):
filename = filename + '.pkl'
pick_file = open(filename, 'wb')
pickle.dump(dictdata, pick_file)
pick_file.close()
def read_csv(filename="A_test_data.csv"):
# 读取csv文件
full_data = []
with codecs.open(filename, 'r', encoding='utf_8_sig', errors='ignore') as f:
reader = csv.reader(f)
for row in reader:
full_data.append(row)
return full_data[1:] # 删去抬头
def write_csv(answer_data=[(0, 0)], data_head=[
("域名", "域名排名", "家族", "类型")
], filename='default.csv'):
# 把数据写入csv文件
data = data_head + answer_data
f = codecs.open(filename, 'w', 'utf_8_sig', errors='ignore')
writer = csv.writer(f)
for i in data:
writer.writerow(i)
f.close()
def update_count_dict(data_dict={}, count_str='hi'):
# 更新统计字典
if count_str in data_dict.keys():
data_dict.update({count_str: 1 + data_dict.get(count_str)})
else:
data_dict.update({count_str: 1})
def update_list_dict(data_dict={}, input_str='hi', update_fam='new_fam'):
# 更新列表字典
former_list = data_dict.get(update_fam)
if former_list is None:
data_dict.update({update_fam: [input_str]})
else:
former_list.append(input_str)
data_dict.update({update_fam: former_list})
def makedir(path):
# 判断路径是否存在
isExists = os.path.exists(path)
if not isExists:
# 如果不存在,则创建目录(多层)
os.makedirs(path)
print(path + '目录创建')
return True
else:
return False
def mycopyfile(srcfile, dstpath):
# 复制文件到指定路径
if not os.path.isfile(srcfile):
print("%s not exist!" % (srcfile))
else:
fpath, fname = os.path.split(srcfile) # 分离文件名和路径
if not os.path.exists(dstpath):
os.makedirs(dstpath) # 创建路径
shutil.copy(srcfile, dstpath + fname) # 复制文件
print("copy %s -> %s" % (srcfile, dstpath + fname))
def task_split(task_list=None, maxprocess_num=2):
# 根据进程数对需要处理的列表进行内容切片以便分配给不同进程处理
if task_list is None:
task_list = []
temp = []
step = len(task_list) // maxprocess_num
for count in range(0, maxprocess_num - 1):
temp.append(task_list[step * count:step * count + step])
temp.append(task_list[step * (maxprocess_num - 1):])
return temp
def load_file(file_path=""):
#根据文件结尾处理文件
if file_path.endswith('.pkl'):
return load_picklefile(file_path)
if file_path.endswith('.json'):
return load_jsondata(file_path)
def multiprocess_task_list(task_list, task_function, maxprocess_num, have_return, *input_args):
# 多进程处理列表数据
splited_task = task_split(task_list, maxprocess_num)
pool = mp.Pool(processes=maxprocess_num)
if have_return != True and have_return != False:
print("请设置have_return参数!")
return
if have_return:
# 如果有返回值,那么准备一个公共的列表,在调用函数的时候将结果存入该列表中
manager = mp.Manager
all_result_list = manager().list()
for process_id in range(maxprocess_num):
# 多进程处理数据
pool.apply_async(task_function, args=(
splited_task[process_id], all_result_list, *input_args), )
pool.close()
pool.join()
return all_result_list
else:
for process_id in range(maxprocess_num):
# 多进程处理数据
pool.apply_async(task_function, args=(
splited_task[process_id], *input_args), )
pool.close()
pool.join()