# -*- coding: utf-8 -*-
"""
Created on Thu Mar 31 10:28:30 2022
@author: zfmin 合并csv
"""
import matplotlib
import matplotlib.pyplot as plt
import os
import pandas as pd
import csv
import time
# 合并csv
path = 'C:\\User\\han_time_blockNum\\han_timestamp\\' # 设置csv所在文件夹
files = os.listdir(path) # 获取文件夹下所有文件名
df1 = pd.read_csv(path + files[0], encoding='gb18030') # 读取首个csv文件,保存到df1中
for file in files[1:]:
# 打开csv文件,注意编码问题,保存到df2中
df2 = pd.read_csv(path + file, encoding='gb18030')
df1 = pd.concat([df1, df2], axis=0, ignore_index=True) # 将df2数据与df1合并
df1 = df1.drop_duplicates() # 去重
# df1 = df1.reset_index(drop=True) #重新生成index
df1.to_csv(path + 'total.csv', index=False,
encoding='gb18030') # 将结果保存为新的csv文件
# 赛选出有多少个区块嵌入数据,根据区块统计为字典类型
merge_path = 'C:\\Users\\merge\\total_merge.csv'
count_dic = dict()
df = pd.read_csv(path + 'total.csv', encoding='gb18030')
blockNumber = df['blockNumber']
for i in blockNumber:
count_dic.setdefault(i, 0)
count_dic[i] += 1
blocknum_sort = [(k, v) for k, v in count_dic.items()]
blocknum_sort.sort(reverse=False)
a1 = sorted(count_dic.items(), key=lambda x: x[1], reverse=True)
# 将字典写入csv
csvfile = open(path+'blocknumber_sort.csv', 'a+', newline='', encoding='utf-8')
writer = csv.writer(csvfile)
writer.writerow(["blocknuber", "count"])
for k, v in count_dic.items():
writer.writerow([k, v])
csvfile.flush()
csvfile.close()
# 将时间戳转换为时间格式
#timestamps = [1613878080, 1613921160, 1614332520]
# ['2021-02-21 11:28:00', '2021-02-21 23:26:00', '2021-02-26 17:42:00']
# timeArray = [time.localtime(one) for one in timestamps]
# datestrings = [time.strftime("%Y-%m-%d", one) for one in timeArray]
path = 'C:\\Users\\zfmin\\Desktop\\TSMC\\CSV\\han_time_blockNum\\merge\\'
df = pd.read_csv(path + 'total_merge.csv', encoding='utf-8')
timestamp = df['timestamp']
timearray = [time.localtime(i) for i in timestamp]
datestrings = [time.strftime("%Y-%m-%d", one) for one in timearray]
df.insert(5, 'date', datestrings)
df.to_csv(path+"add_date.csv", encoding='utf-8', index=False)
# 赛选出合约地址
# contract_han = include_contract_address[include_contract_address['label'] == 1]
# contract_english = include_contract_address[include_contract_address['label'] == 2]
# contract_others = include_contract_address[include_contract_address['label'] == 3]
# 按区块中嵌入的个数统计 统计有多少个区块
# 例:有100个区块只嵌入了一个input,嵌入1的有多少个区块,嵌入100的有多少个区块
path2 = "C:\\Users\\TSMC\\CSV\\按字段分析\\han\\blocknumber\\"
enbed_count = dict()
df = pd.read_csv(path2+'blocknumber_sort.csv', encoding='utf-8')
count_data = df['count']
for i in count_data:
enbed_count.setdefault(i, 0)
enbed_count[i] += 1
csvfile2 = open(path2+"block_count_statistic.csv",
'a+', newline='', encoding='utf-8')
writer2 = csv.writer(csvfile2)
writer2.writerow(['count', 'blockCount'])
for k, v in enbed_count.items():
writer2.writerow([k, v])
csvfile2.flush()
csvfile2.close()
# 画图
# 设置中文字体和负号正常显示
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
matplotlib.rcParams['axes.unicode_minus'] = False
x_list = []
y_list = []
data = pd.read_csv(path2+"block_count_statistic.csv", encoding='utf-8')
# -*- coding: utf-8 -*-
import time
import json
import os
import zipfile
import csv
#zip转换为csv
def zip_to_csv():
path= 'F:\\trace\\'
out_path="F:\\300W_data\\12965000_1300w\\_1300wdata.csv"
files= os.listdir(path)
# 创建新的csv
csvfile=open(out_path,"w",newline='')
writer = csv.writer(csvfile)
writer.writerow(["transactionHash","inputdata","outputdata","from","to"]) # 写入数据
for file in files:
mtime = os.path.getmtime(path+file)
if time.time() -mtime<60:
if(os.path.splitext(file)[1]!='.zip'):continue
zip = zipfile.ZipFile(path+file, 'r')
print(zip)
for txtzip in zip.namelist():
print(txtzip)
txt=zip.open(txtzip).read()
if txt.decode('utf-8')[1]=="'":data=json.loads(json.dumps(eval(txt)))
else :data=json.loads(txt)
for i in range(0,len(data['result'])):
if 'transactionHash' in data['result'][i]:
if data['result'][i]['transactionHash']:
transactionHash=data['result'][i]['transactionHash']
inputdata=""
outputdata=""
From=""
To=""
if 'input' in data['result'][i]['action']:
if data['result'][i]['action']['input']:
inputdata=data['result'][i]['action']['input']
From=data['result'][i]['action']['from']
To=data['result'][i]['action']['to']
if'result' in data['result'][i]:
if data['result'][i]['result']:
if 'output' in data['result'][i]['result']:
if data['result'][i]['result']['output']:
outputdata=data['result'][i]['result']['output']
writer.writerow([transactionHash,inputdata,outputdata,From,To])
csvfile.flush()
csvfile.close()
zip.close()
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 31 10:28:30 2022
@author: zfmin 合并csv
"""
import matplotlib
import matplotlib.pyplot as plt
import os
import pandas as pd
import csv
import time
# 合并csv
path = 'C:\\User\\han_time_blockNum\\han_timestamp\\' # 设置csv所在文件夹
files = os.listdir(path) # 获取文件夹下所有文件名
df1 = pd.read_csv(path + files[0], encoding='gb18030') # 读取首个csv文件,保存到df1中
for file in files[1:]:
# 打开csv文件,注意编码问题,保存到df2中
df2 = pd.read_csv(path + file, encoding='gb18030')
df1 = pd.concat([df1, df2], axis=0, ignore_index=True) # 将df2数据与df1合并
df1 = df1.drop_duplicates() # 去重
# df1 = df1.reset_index(drop=True) #重新生成index
df1.to_csv(path + 'total.csv', index=False,
encoding='gb18030') # 将结果保存为新的csv文件
# 赛选出有多少个区块嵌入数据,根据区块统计为字典类型
merge_path = 'C:\\Users\\merge\\total_merge.csv'
count_dic = dict()
df = pd.read_csv(path + 'total.csv', encoding='gb18030')
blockNumber = df['blockNumber']
for i in blockNumber:
count_dic.setdefault(i, 0)
count_dic[i] += 1
blocknum_sort = [(k, v) for k, v in count_dic.items()]
blocknum_sort.sort(reverse=False)
a1 = sorted(count_dic.items(), key=lambda x: x[1], reverse=True)
# 将字典写入csv
csvfile = open(path+'blocknumber_sort.csv', 'a+', newline='', encoding='utf-8')
writer = csv.writer(csvfile)
writer.writerow(["blocknuber", "count"])
for k, v in count_dic.items():
writer.writerow([k, v])
csvfile.flush()
csvfile.close()
# 将时间戳转换为时间格式
#timestamps = [1613878080, 1613921160, 1614332520]
# ['2021-02-21 11:28:00', '2021-02-21 23:26:00', '2021-02-26 17:42:00']
# timeArray = [time.localtime(one) for one in timestamps]
# datestrings = [time.strftime("%Y-%m-%d", one) for one in timeArray]
path = 'C:\\Users\\zfmin\\Desktop\\TSMC\\CSV\\han_time_blockNum\\merge\\'
df = pd.read_csv(path + 'total_merge.csv', encoding='utf-8')
timestamp = df['timestamp']
timearray = [time.localtime(i) for i in timestamp]
datestrings = [time.strftime("%Y-%m-%d", one) for one in timearray]
df.insert(5, 'date', datestrings)
df.to_csv(path+"add_date.csv", encoding='utf-8', index=False)
# 赛选出合约地址
# contract_han = include_contract_address[include_contract_address['label'] == 1]
# contract_english = include_contract_address[include_contract_address['label'] == 2]
# contract_others = include_contract_address[include_contract_address['label'] == 3]
# 按区块中嵌入的个数统计 统计有多少个区块
# 例:有100个区块只嵌入了一个input,嵌入1的有多少个区块,嵌入100的有多少个区块
path2 = "C:\\Users\\TSMC\\CSV\\按字段分析\\han\\blocknumber\\"
enbed_count = dict()
df = pd.read_csv(path2+'blocknumber_sort.csv', encoding='utf-8')
count_data = df['count']
for i in count_data:
enbed_count.setdefault(i, 0)
enbed_count[i] += 1
csvfile2 = open(path2+"block_count_statistic.csv",
'a+', newline='', encoding='utf-8')
writer2 = csv.writer(csvfile2)
writer2.writerow(['count', 'blockCount'])
for k, v in enbed_count.items():
writer2.writerow([k, v])
csvfile2.flush()
csvfile2.close()
# 画图
# 设置中文字体和负号正常显示
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
matplotlib.rcParams['axes.unicode_minus'] = False
x_list = []
y_list = []
data = pd.read_csv(path2+"block_count_statistic.csv", encoding='utf-8')