codess大苏打

Charles Chou

已于 2024-04-25 19:54:39 修改

阅读量106

点赞数 2

文章标签： python

于 2024-04-25 19:32:41 首次发布

本文链接：https://blog.csdn.net/weixin_43249548/article/details/138197802

版权

本文介绍了使用Python库pandas、csv、numpy和networkx进行数据读取、处理，构建有向图，并实现了一个函数db_layer_handle_circle，用于对数据进行分层，最后绘制网络图并计算连通组件。

摘要由CSDN通过智能技术生成

import pandas as pd
import csv
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

import copy

def cons_nx_graph(start_fd, end_fd, weights):
    edges = pd.DataFrame()
    edges["start"] = start_fd
    edges["end"] = end_fd
    edges["weights"] = weights
    G = nx.from_pandas_edgelist(edges,source="start",target="end",edge_attr="weights")
    return G


def read_data(path):
    # 读取CSV文件
    df = pd.read_csv(path)
    # 将每一列数据保存在列表中
    column_data = []
    for column in df.columns:
        column_data.append(df[column].tolist())
    return column_data


def db_layer_handle_circle(fr, to, path='./process_ret/tb_nums_in_db_degree.csv'):
    layers = []
    layer_first = []
    layer_last = []
    all_db = list(set(fr + to))

    # 重新构造出度和入度的数据，排除重复表达的以及自身到自身的边
    # 减少数据量，去掉重复的from - to
    relations = [(f, t) for f, t in zip(fr, to)]
    uni_relation = list(set(relations))  # 去除完全相同的边
    uni_relation = [(f, t) for f, t in uni_relation if f != t]  # 去除自身到自身的边
    deduplicate_f = [f for f, _ in uni_relation]
    deduplicate_t = [t for _, t in uni_relation]
    dict_of_degree = {db: [deduplicate_f.count(db), deduplicate_t.count(db)] for db in all_db}
    db_code, out_d, in_d = [], [], []
    for k, v in dict_of_degree.items():
        db_code.append(k)
        out_d.append(v[0])
        in_d.append(v[1])




    # 找入度为0的结点，作为第一层
    for info in zip(db_code, out_d, in_d):
        db, od, id = info
        if id == 0 and od != 0:
            layer_first.append(db)
            all_db.remove(db)
        if od == 0 and id == 0:
            layer_first.append(db)
            all_db.remove(db)
    print(f"len of first layer : {len(layer_first)}")
    layers.append(layer_first)

    # 找中间层 step1
    index = 0
    layer_cur = []
    print(f"len of rest db all : {len(all_db)}")
    while len(all_db) > 0:

        lenoflayer = len(layers)
        print(f"layers {layers}")
        for db in layers[index]:
            print(f"now db : {db}")
            for re in uni_relation:
                f, t = re
                if db == f and t in all_db:
                    layer_cur.append(t)
                    all_db.remove(t)
        print(f"cur layer : {layer_cur}")
        # c = input("暂停")
        # step2，寻找指向本层的结点,因为要找完，不确定有没有，并且顺序也不知道
        flag = 1
        while flag != 0:
            flag = 0
            for re in uni_relation:
                f, t = re
                if t in layer_cur and f in all_db:  # 存在一个边指向本层结点，并且起点是没使用过的，表明可以开始寻找
                    flag = 1
                    layer_cur.append(f)
                    all_db.remove(f)
        print(f"cur layer : {layer_cur}")
        # c = input("暂停")
        layer_tmp = copy.deepcopy(layer_cur)
        layers.append(layer_tmp)
        print(f"len of layers : {len(layers)}")
        print(f"len of cur layer   : {len(layer_cur)}")
        print(f"len of rest db all : {len(all_db)}")
        print("\n")
        index += 1
        print(f"index now : {index}")
        layer_cur.clear()
    print(f"所有节点分层：{layers}")
    G = cons_nx_graph(deduplicate_f, deduplicate_t, deduplicate_f)
    nx.draw(G, with_labels=True, edge_color='b', node_color='g', node_size=1000)
    plt.show()
    print(f"lian tong",len(list(nx.connected_components(G))))


path = './process_ret/re_code_all.csv'

data = read_data(path)
fr, to = data[1], data[4]
fr_test = [1, 2, 3, 3, 5, 6]
to_test = [2, 3, 4, 5, 6, 2]
db_layer_handle_circle(fr_test, to_test)