dataset_name = 'cora'
nclass = 7
nfeature = 1433
ngraph = 2708
微调任务1节点分类
超参数
lr = 0.01
k = 7
max_epoch = 150
x_size = nfeature # =1433
hidden_size = intermediate_size = 32
num_attention_heads = 2
num_hidden_layers = 2
y_size = nclass # =7
graph_size = ngraph # = 2708
residual_type = 'graph_raw'
创建DatasetLoader对象(定义自code.DatasetLoader.py)
data_obj = DatasetLoader()
data_obj.dataset_source_folder_path = './data/' + dataset_name + '/'
data_obj.dataset_name = dataset_name
data_obj.k = k
data_obj.load_all_tag = True
创建GraphBertConfig对象(定义自code.MethodBertComp.py)
bert_config = GraphBertConfig(residual_type = residual_type, k=k, x_size=nfeature, y_size=y_size, hidden_size=hidden_size, intermediate_size=intermediate_size, num_attention_heads=num_attention_heads, num_hidden_layers=num_hidden_layers)
创建MethodGraphBertNodeClassification对象(定义自code.MethodGraphBertNodeClassification.py)
method_obj = MethodGraphBertNodeClassification(bert_config)
#---- 把spy_tag设为false会运行更快? ----
method_obj.spy_tag = True
method_obj.max_epoch = max_epoch
method_obj.lr = lr
创建ResultSaving对象(定义自code.ResultSaving.py)
result_obj = ResultSaving()
result_obj.result_destination_folder_path = './result/GraphBert/'
result_obj.result_destination_file_name = dataset_name + '_' + str(num_hidden_layers)
创建Settings对象(定义自code.Settings.py)
setting_obj = Settings()
evaluate_obj = None
#prepare来自code.base_class.setting,用来准备四类对象
setting_obj.prepare(data_obj, method_obj, result_obj, evaluate_obj)
#load_run_save_evaluate来自code.Settings,
#将数据集data_obj送入method_obj运行,结果送入result_obj保存,若evaluate_obj!=None再送入evaluate_obj
setting_obj.load_run_save_evaluate()
微调任务2图聚类
只有method_obj和result_obj不同
创建MethodGraphBertGraphClustering对象(定义自code.MethodGraphBertGraphClustering.py)
method_obj = MethodGraphBertGraphClustering(bert_config)
#---- set to false to run faster ----
method_obj.cluster_number = y_size
method_obj.spy_tag = True
method_obj.max_epoch = max_epoch
method_obj.lr = lr
result_obj = ResultSaving()
result_obj.result_destination_folder_path = './result/GraphBert/clustering_' + dataset_name
result_obj.result_destination_file_name = '_' + ''