区块链非法交易预测分析
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix
import seaborn as sns
from sklearn.preprocessing import StandardScaler
edges = pd.read_csv("/home/kesci/input/first4634/elliptic_txs_edgelist.csv")
features = pd.read_csv("/home/kesci/input/first4634/elliptic_txs_features.csv",header=None)
classes = pd.read_csv("/home/kesci/input/first4634/elliptic_txs_classes.csv")
tx_features = ["tx_feat_"+str(i) for i in range(2,95)]
agg_features = ["agg_feat_"+str(i) for i in range(1,73)]
features.columns = ["txId","time_step"] + tx_features + agg_features
features = pd.merge(features,classes,left_on="txId",right_on="txId",how='left')
features['class'] = features['class'].apply(lambda x: '0' if x == "unknown" else x)
异常交易相关性特征分析
fig, axes = plt.subplots(3,2, figsize=(10,10))
grouped = features.groupby(['time_step', 'class'])['tx_feat_2'].mean().reset_index()
sns.lineplot(x='time_step', y='tx_feat_2', hue='class', data=grouped, ax = axes[0][0])
plt.legend(loc=(1.0, 0.8))
plt.ylabel('tx_feat_2')
grouped = features.groupby(['time_step', 'class'])['tx_feat_6'].mean().reset_index()
sns.lineplot(x='time_step', y='tx_feat_6', hue='class', data=grouped, ax = axes[0][1])
plt.legend(loc=(1.0, 0.8))
plt.ylabel('tx_feat_6')
grouped = features.groupby(['time_step', 'class'])['tx_feat_9'].mean().reset_index()
sns.lineplot(x='time_step', y='tx_feat_9', hue='class', data=grouped, ax = axes[1][0])
plt.legend(loc=(1.0, 0.8))
plt.ylabel('tx_feat_9')
grouped = features.groupby(['time_step', 'class'])['tx_feat_10'].mean().reset_index()
sns.lineplot(x='time_step', y='tx_feat_10', hue='class', data=grouped, ax = axes[1][1])
plt.legend(loc=(1.0, 0.8))
plt.ylabel('tx_feat_10')
grouped = features.groupby(['time_step', 'class'])['tx_feat_12'].mean().reset_index()
sns.lineplot(x='time_step', y='tx_feat_12', hue='class', data=grouped, ax = axes[2][0])
plt.legend(loc=(1.0, 0.8))
plt.ylabel('tx_feat_12')
grouped = features.groupby(['time_step', 'class'])['tx_feat_18'].mean().reset_index()
sns.lineplot(x='time_step', y='tx_feat_18', hue='class', data=grouped, ax = axes[2][1])
plt.legend(loc="upper right")
plt.ylabel('tx_feat_18')
plt.show()