- json转换
一段json文件,先查json编辑器http://www.bejson.com/,如果能够正常进行解析,可以放进Python中进行解析,两种形式:
1.1 读成string形式Python可自动识别
1.2 对于识别不了的,可以用以下代码:
import pandas as pd
from pandas.core.frame import DataFrame
import json
def is_json(myjson):
try:
json.loads(myjson)
except ValueError:
return False
return True
path=r'json_test.txt'
f1=open(path,'r',encoding='utf-8')
for i in f1.readlines():
if is_json(i):
mxf=json.loads(i)
2.md5加密
手机号、身份证号等都可以进行md5加密,普通md5加密代码
import hashlib
def md5(data):
m = hashlib.md5(data.encode(encoding='gb2312'))
data_md5=m.hexdigest()
return data_md5
3.文件分割,将大文件分割成小文件
limit = 5000 #每份文件5000
file_count=0
url_list=[]
with open('test3.txt') as f:
for line in f:
url_list.append(line)
if len(url_list)<limit:
continue
file_name=str(file_count)+'.txt'
with open(file_name,'w') as file:
file.write('name'+'\n')
for url in url_list[:-1]:
file.write(url)
file.write(url_list[-1].strip())
url_list=[]
file_count+=1
if url_list:
file_name=str(file_count)+'.txt'
with open(file_name,'w') as file:
file.write('name'+'\n')
for url in url_list:
file.write(url)
print('done')
4.随机抽取文件
import random
f1=open('样本.txt','r')
f2=open('样本_1.txt','w')
lines=f1.readlines()
randline=[]
for i in range(1,2101):
randline1=random.randint(0,len(lines))
randline.append(randline1)
for i in randline:
f2.write(lines[i])
f1.close()
f2.close()