CS224W 2023 Fall Colab 1

最新推荐文章于 2024-10-01 09:43:24 发布
Evenrose
最新推荐文章于 2024-10-01 09:43:24 发布
阅读量91
点赞数
分类专栏： CS224W 文章标签： python
本文链接：https://blog.csdn.net/Evenrose/article/details/133841592
版权
CS224W 专栏收录该内容
3 篇文章 0 订阅
订阅专栏
CS224W 2023 Fall Colab 1 练习记录，Colab题目连接
import networkx as nx
import matplotlib.pyplot as plt
import torch
import random
from sklearn.decomposition import PCA

G = nx.karate_club_graph()


# Question 1: What is the average degree of the karate club network? (5 Points)


def average_degree(num_edges, num_nodes):
    # TODO: Implement this function that takes number of edges
    # and number of nodes, and returns the average node degree of
    # the graph. Round the result to nearest integer (for example
    # 3.3 will be rounded to 3 and 3.7 will be rounded to 4)

    avg_degree = round(num_edges*2/num_nodes)

    return avg_degree


num_edges = G.number_of_edges()
num_nodes = G.number_of_nodes()
avg_degree = average_degree(num_edges, num_nodes)
print("Average degree of karate club network is {}".format(avg_degree))

# Question 2: What is the average clustering coefficient of the karate club network? (5 Points)


def average_clustering_coefficient(G):
    # TODO: Implement this function that takes a nx.Graph
    # and returns the average clustering coefficient. Round
    # the result to 2 decimal places (for example 3.333 will
    # be rounded to 3.33 and 3.7571 will be rounded to 3.76)

    avg_cluster_coef = round(nx.average_clustering(G), 2)

    return avg_cluster_coef


avg_cluster_coef = average_clustering_coefficient(G)
print("Average clustering coefficient of karate club network is {}".format(avg_cluster_coef))

# Question 3: What is the PageRank value for node 0 (node with id 0) after one PageRank iteration? (5 Points)


def one_iter_pagerank(G, beta, r0, node_id):
    # TODO: Implement this function that takes a nx.Graph, beta, r0 and node id.
    # The return value r1 is one interation PageRank value for the input node.
    # Please round r1 to 2 decimal places.

    r1 = 0

    for neighbor in G.neighbors(node_id):
        node_degree = G.degree[neighbor]
        r1 += beta*r0/node_degree

    r1 += (1-beta)/G.number_of_nodes()
    r1 = round(r1, 2)
    return r1


beta = 0.8
r0 = 1 / G.number_of_nodes()
node = 0
r1 = one_iter_pagerank(G, beta, r0, node)
print("The PageRank value for node 0 after one iteration is {}".format(r1))

# Question 4: What is the (raw) closeness centrality for the karate club network node 5? (5 Points)


def closeness_centrality(G, node=5):
    # TODO: Implement the function that calculates closeness centrality
    # for a node in karate club network. G is the input karate club
    # network and node is the node id in the graph. Please round the
    # closeness centrality result to 2 decimal places.

    closeness = nx.closeness_centrality(G, u=node, wf_improved=False)
    closeness = round(closeness/(G.number_of_nodes()-1), 2)
    return closeness

node = 5
closeness = closeness_centrality(G, node=node)
print("The node 5 has closeness centrality {}".format(closeness))

# Question 5: Get the edge list of the karate club network and transform it into torch.LongTensor.
# What is the torch.sum value of pos_edge_index tensor? (10 Points)


def graph_to_edge_list(G):
    # TODO: Implement the function that returns the edge list of
    # an nx.Graph. The returned edge_list should be a list of tuples
    # where each tuple is a tuple representing an edge connected
    # by two nodes.

    edge_list = []
    for edge in G.edges():
        edge_list.append(edge)

    return edge_list


def edge_list_to_tensor(edge_list):
    # TODO: Implement the function that transforms the edge_list to
    # tensor. The input edge_list is a list of tuples and the resulting
    # tensor should have the shape [2, len(edge_list)].

    edge_index = torch.LongTensor(edge_list).T
    return edge_index


pos_edge_list = graph_to_edge_list(G)
pos_edge_index = edge_list_to_tensor(pos_edge_list)
print("The pos_edge_index tensor has shape {}".format(pos_edge_index.shape))
print("The pos_edge_index tensor has sum value {}".format(torch.sum(pos_edge_index)))

# Question 6: Please implement following function that samples negative edges.
# Then answer which edges (edge_1 to edge_5) are the negative edges in the karate club network? (10 Points)


def sample_negative_edges(G, num_neg_samples):
    # TODO: Implement the function that returns a list of negative edges.
    # The number of sampled negative edges is num_neg_samples. You do not
    # need to consider the corner case when the number of possible negative edges
    # is less than num_neg_samples. It should be ok as long as your implementation
    # works on the karate club network. In this implementation, self loops should
    # not be considered as either a positive or negative edge. Also, notice that
    # the karate club network is an undirected graph, if (0, 1) is a positive
    # edge, do you think (1, 0) can be a negative one?

    neg_edge_list = []
    for neg_edge in nx.non_edges(G):
        neg_edge_list.append(neg_edge)

    neg_edge_list = random.sample(neg_edge_list, num_neg_samples)
    return neg_edge_list


# Sample 78 negative edges
neg_edge_list = sample_negative_edges(G, len(pos_edge_list))

# Transform the negative edge list to tensor
neg_edge_index = edge_list_to_tensor(neg_edge_list)
print("The neg_edge_index tensor has shape {}".format(neg_edge_index.shape))

# Which of following edges can be negative ones?
edge_1 = (7, 1)
edge_2 = (1, 33)
edge_3 = (33, 22)
edge_4 = (0, 4)
edge_5 = (4, 2)


def if_negative_edge(*edges):
    i = 1
    for edge in edges:
        edge_back = (edge[1], edge[0])
        if edge in pos_edge_list or edge_back in pos_edge_list:
            print('edge{} can\'t be a negative edge'.format(i))
            i += 1

        else:
            print('edge{} can be a negative edge'.format(i))
            i += 1


if_negative_edge(edge_1, edge_2, edge_3, edge_4, edge_5)

# Please do not change / reset the random seed
torch.manual_seed(1)


def create_node_emb(num_node=34, embedding_dim=16):
    # TODO: Implement this function that will create the node embedding matrix.
    # A torch.nn.Embedding layer will be returned. You do not need to change
    # the values of num_node and embedding_dim. The weight matrix of returned
    # layer should be initialized under uniform distribution.

    emb = torch.nn.Embedding(num_embeddings=num_node, embedding_dim=embedding_dim)
    emb.weight.data = torch.rand(num_node, embedding_dim)

    return emb


emb = create_node_emb()
ids = torch.LongTensor([0, 3])

# Print the embedding layer
print("Embedding: {}".format(emb))

# An example that gets the embeddings for node 0 and 3
print(emb(ids))


def visualize_emb(emb):
  X = emb.weight.data.numpy()
  pca = PCA(n_components=2)
  components = pca.fit_transform(X)
  plt.figure(figsize=(6, 6))
  club1_x = []
  club1_y = []
  club2_x = []
  club2_y = []
  for node in G.nodes(data=True):
    if node[1]['club'] == 'Mr. Hi':
      club1_x.append(components[node[0]][0])
      club1_y.append(components[node[0]][1])
    else:
      club2_x.append(components[node[0]][0])
      club2_y.append(components[node[0]][1])
  plt.scatter(club1_x, club1_y, color="red", label="Mr. Hi")
  plt.scatter(club2_x, club2_y, color="blue", label="Officer")
  plt.legend()
  plt.show()

# Visualize the initial random embeddding
visualize_emb(emb)

# Question 7: Training the embedding!
# What is the best performance you can get? Please report both the best loss and accuracy on Gradescope. (20 Points)


def accuracy(pred, label):
    # TODO: Implement the accuracy function. This function takes the
    # pred tensor (the resulting tensor after sigmoid) and the label
    # tensor (torch.LongTensor). Predicted value greater than 0.5 will
    # be classified as label 1. Else it will be classified as label 0.
    # The returned accuracy should be rounded to 4 decimal places.
    # For example, accuracy 0.82956 will be rounded to 0.8296.

    pred_label = torch.round(pred)
    correct_num = 0
    correct_num += (pred_label == label).sum().item()
    total_num = len(label)
    accu = round(correct_num/total_num, 4)
    return accu


def train(emb, loss_fn, sigmoid, train_label, train_edge):
    # TODO: Train the embedding layer here. You can also change epochs and
    # learning rate. In general, you need to implement:
    # (1) Get the embeddings of the nodes in train_edge
    # (2) Dot product the embeddings between each node pair
    # (3) Feed the dot product result into sigmoid
    # (4) Feed the sigmoid output into the loss_fn
    # (5) Print both loss and accuracy of each epoch
    # (6) Update the embeddings using the loss and optimizer
    # (as a sanity check, the loss should decrease during training)

    epochs = 500
    learning_rate = 0.1

    optimizer = torch.optim.SGD(emb.parameters(), lr=learning_rate, momentum=0.9)

    for i in range(epochs):
        optimizer.zero_grad()
        train_node_embedding = emb(train_edge)
        dot_prod = torch.mul(train_node_embedding[0], train_node_embedding[1]).sum(dim=1)
        A = sigmoid(dot_prod)
        loss = loss_fn(A, train_label)
        accu = accuracy(A, train_label)
        ith = i+1
        if ith % 25 == 0:
            print('In the {}th epoch , loss is {} and accuracy is {}'.format(ith, loss, accu))

        loss.backward()
        optimizer.step()


loss_fn = torch.nn.BCELoss()
sigmoid = torch.nn.Sigmoid()

print(pos_edge_index.shape)

# Generate the positive and negative labels
pos_label = torch.ones(pos_edge_index.shape[1], )
neg_label = torch.zeros(neg_edge_index.shape[1], )

# Concat positive and negative labels into one tensor
train_label = torch.cat([pos_label, neg_label], dim=0)

# Concat positive and negative edges into one tensor
# Since the network is very small, we do not split the edges into val/test sets
train_edge = torch.cat([pos_edge_index, neg_edge_index], dim=1)
print(train_edge.shape)

train(emb, loss_fn, sigmoid, train_label, train_edge)

visualize_emb(emb)