CS224W 2023 Fall Colab 1 练习记录,Colab题目连接
import networkx as nx
import matplotlib.pyplot as plt
import torch
import random
from sklearn.decomposition import PCA
G = nx.karate_club_graph()
# Question 1: What is the average degree of the karate club network? (5 Points)
def average_degree(num_edges, num_nodes):
# TODO: Implement this function that takes number of edges
# and number of nodes, and returns the average node degree of
# the graph. Round the result to nearest integer (for example
# 3.3 will be rounded to 3 and 3.7 will be rounded to 4)
avg_degree = round(num_edges*2/num_nodes)
return avg_degree
num_edges = G.number_of_edges()
num_nodes = G.number_of_nodes()
avg_degree = average_degree(num_edges, num_nodes)
print("Average degree of karate club network is {}".format(avg_degree))
# Question 2: What is the average clustering coefficient of the karate club network? (5 Points)
def average_clustering_coefficient(G):
# TODO: Implement this function that takes a nx.Graph
# and returns the average clustering coefficient. Round
# the result to 2 decimal places (for example 3.333 will
# be rounded to 3.33 and 3.7571 will be rounded to 3.76)
avg_cluster_coef = round(nx.average_clustering(G), 2)
return avg_cluster_coef
avg_cluster_coef = average_clustering_coefficient(G)
print("Average clustering coefficient of karate club network is {}".format(avg_cluster_coef))
# Question 3: What is the PageRank value for node 0 (node with id 0) after one PageRank iteration? (5 Points)
def one_iter_pagerank(G, beta, r0, node_id):
# TODO: Implement this function that takes a nx.Graph, beta, r0 and node id.
# The return value r1 is one interation PageRank value for the input node.
# Please round r1 to 2 decimal places.
r1 = 0
for neighbor in G.neighbors(node_id):
node_degree = G.degree[neighbor]
r1 += beta*r0/node_degree
r1 += (1-beta)/G.number_of_nodes()
r1 = round(r1, 2)
return r1
beta = 0.8
r0 = 1 / G.number_of_nodes()
node = 0
r1 = one_iter_pagerank(G, beta, r0, node)
print("The PageRank value for node 0 after one iteration is {}".format(r1))
# Question 4: What is the (raw) closeness centrality for the karate club network node 5? (5 Points)
def closeness_centrality(G, node=5):
# TODO: Implement the function that calculates closeness centrality
# for a node in karate club network. G is the input karate club
# network and node is the node id in the graph. Please round the
# closeness centrality result to 2 decimal places.
closeness = nx.closeness_centrality(G, u=node, wf_improved=False)
closeness = round(closeness/(G.number_of_nodes()-1), 2)
return closeness
node = 5
closeness = closeness_centrality(G, node=node)
print("The node 5 has closeness centrality {}".format(closeness))
# Question 5: Get the edge list of the karate club network and transform it into torch.LongTensor.
# What is the torch.sum value of pos_edge_index tensor? (10 Points)
def graph_to_edge_list(G):
# TODO: Implement the function that returns the edge list of
# an nx.Graph. The returned edge_list should be a list of tuples
# where each tuple is a tuple representing an edge connected
# by two nodes.
edge_list = []
for edge in G.edges():
edge_list.append(edge)
return edge_list
def edge_list_to_tensor(edge_list):
# TODO: Implement the function that transforms the edge_list to
# tensor. The input edge_list is a list of tuples and the resulting
# tensor should have the shape [2, len(edge_list)].
edge_index = torch.LongTensor(edge_list).T
return edge_index
pos_edge_list = graph_to_edge_list(G)
pos_edge_index = edge_list_to_tensor(pos_edge_list)
print("The pos_edge_index tensor has shape {}".format(pos_edge_index.shape))
print("The pos_edge_index tensor has sum value {}".format(torch.sum(pos_edge_index)))
# Question 6: Please implement following function that samples negative edges.
# Then answer which edges (edge_1 to edge_5) are the negative edges in the karate club network? (10 Points)
def sample_negative_edges(G, num_neg_samples):
# TODO: Implement the function that returns a list of negative edges.
# The number of sampled negative edges is num_neg_samples. You do not
# need to consider the corner case when the number of possible negative edges
# is less than num_neg_samples. It should be ok as long as your implementation
# works on the karate club network. In this implementation, self loops should
# not be considered as either a positive or negative edge. Also, notice that
# the karate club network is an undirected graph, if (0, 1) is a positive
# edge, do you think (1, 0) can be a negative one?
neg_edge_list = []
for neg_edge in nx.non_edges(G):
neg_edge_list.append(neg_edge)
neg_edge_list = random.sample(neg_edge_list, num_neg_samples)
return neg_edge_list
# Sample 78 negative edges
neg_edge_list = sample_negative_edges(G, len(pos_edge_list))
# Transform the negative edge list to tensor
neg_edge_index = edge_list_to_tensor(neg_edge_list)
print("The neg_edge_index tensor has shape {}".format(neg_edge_index.shape))
# Which of following edges can be negative ones?
edge_1 = (7, 1)
edge_2 = (1, 33)
edge_3 = (33, 22)
edge_4 = (0, 4)
edge_5 = (4, 2)
def if_negative_edge(*edges):
i = 1
for edge in edges:
edge_back = (edge[1], edge[0])
if edge in pos_edge_list or edge_back in pos_edge_list:
print('edge{} can\'t be a negative edge'.format(i))
i += 1
else:
print('edge{} can be a negative edge'.format(i))
i += 1
if_negative_edge(edge_1, edge_2, edge_3, edge_4, edge_5)
# Please do not change / reset the random seed
torch.manual_seed(1)
def create_node_emb(num_node=34, embedding_dim=16):
# TODO: Implement this function that will create the node embedding matrix.
# A torch.nn.Embedding layer will be returned. You do not need to change
# the values of num_node and embedding_dim. The weight matrix of returned
# layer should be initialized under uniform distribution.
emb = torch.nn.Embedding(num_embeddings=num_node, embedding_dim=embedding_dim)
emb.weight.data = torch.rand(num_node, embedding_dim)
return emb
emb = create_node_emb()
ids = torch.LongTensor([0, 3])
# Print the embedding layer
print("Embedding: {}".format(emb))
# An example that gets the embeddings for node 0 and 3
print(emb(ids))
def visualize_emb(emb):
X = emb.weight.data.numpy()
pca = PCA(n_components=2)
components = pca.fit_transform(X)
plt.figure(figsize=(6, 6))
club1_x = []
club1_y = []
club2_x = []
club2_y = []
for node in G.nodes(data=True):
if node[1]['club'] == 'Mr. Hi':
club1_x.append(components[node[0]][0])
club1_y.append(components[node[0]][1])
else:
club2_x.append(components[node[0]][0])
club2_y.append(components[node[0]][1])
plt.scatter(club1_x, club1_y, color="red", label="Mr. Hi")
plt.scatter(club2_x, club2_y, color="blue", label="Officer")
plt.legend()
plt.show()
# Visualize the initial random embeddding
visualize_emb(emb)
# Question 7: Training the embedding!
# What is the best performance you can get? Please report both the best loss and accuracy on Gradescope. (20 Points)
def accuracy(pred, label):
# TODO: Implement the accuracy function. This function takes the
# pred tensor (the resulting tensor after sigmoid) and the label
# tensor (torch.LongTensor). Predicted value greater than 0.5 will
# be classified as label 1. Else it will be classified as label 0.
# The returned accuracy should be rounded to 4 decimal places.
# For example, accuracy 0.82956 will be rounded to 0.8296.
pred_label = torch.round(pred)
correct_num = 0
correct_num += (pred_label == label).sum().item()
total_num = len(label)
accu = round(correct_num/total_num, 4)
return accu
def train(emb, loss_fn, sigmoid, train_label, train_edge):
# TODO: Train the embedding layer here. You can also change epochs and
# learning rate. In general, you need to implement:
# (1) Get the embeddings of the nodes in train_edge
# (2) Dot product the embeddings between each node pair
# (3) Feed the dot product result into sigmoid
# (4) Feed the sigmoid output into the loss_fn
# (5) Print both loss and accuracy of each epoch
# (6) Update the embeddings using the loss and optimizer
# (as a sanity check, the loss should decrease during training)
epochs = 500
learning_rate = 0.1
optimizer = torch.optim.SGD(emb.parameters(), lr=learning_rate, momentum=0.9)
for i in range(epochs):
optimizer.zero_grad()
train_node_embedding = emb(train_edge)
dot_prod = torch.mul(train_node_embedding[0], train_node_embedding[1]).sum(dim=1)
A = sigmoid(dot_prod)
loss = loss_fn(A, train_label)
accu = accuracy(A, train_label)
ith = i+1
if ith % 25 == 0:
print('In the {}th epoch , loss is {} and accuracy is {}'.format(ith, loss, accu))
loss.backward()
optimizer.step()
loss_fn = torch.nn.BCELoss()
sigmoid = torch.nn.Sigmoid()
print(pos_edge_index.shape)
# Generate the positive and negative labels
pos_label = torch.ones(pos_edge_index.shape[1], )
neg_label = torch.zeros(neg_edge_index.shape[1], )
# Concat positive and negative labels into one tensor
train_label = torch.cat([pos_label, neg_label], dim=0)
# Concat positive and negative edges into one tensor
# Since the network is very small, we do not split the edges into val/test sets
train_edge = torch.cat([pos_edge_index, neg_edge_index], dim=1)
print(train_edge.shape)
train(emb, loss_fn, sigmoid, train_label, train_edge)
visualize_emb(emb)