# UNQ_C1# GRADED FUNCTION: compute_entropydefcompute_entropy(y):"""
Computes the entropy for
Args:
y (ndarray): Numpy array indicating whether each example at a node is
edible (`1`) or poisonous (`0`)
Returns:
entropy (float): Entropy at that node
"""# You need to return the following variables correctly
entropy =0.iflen(y)!=0:
p1 =len(y[y ==1])/len(y)if p1 !=0and p1 !=1:
entropy =-p1 * np.log2(p1)-(1- p1)* np.log2(1- p1)else:
entropy =0.return entropy
# UNQ_C2# GRADED FUNCTION: split_datasetdefsplit_dataset(X, node_indices, feature):"""
Splits the data at the given node into
left and right branches
Args:
X (ndarray): Data matrix of shape(n_samples, n_features)
node_indices (ndarray): List containing the active indices. I.e, the samples being considered at this step.
feature (int): Index of feature to split on
Returns:
left_indices (ndarray): Indices with feature value == 1
right_indices (ndarray): Indices with feature value == 0
"""# You need to return the following variables correctly
left_indices =[]
right_indices =[]### START CODE HERE #### Go through the indices of examples at that nodefor i in node_indices:if X[i][feature]==1:
left_indices.append(i)else:
right_indices.append(i)### END CODE HERE ###return left_indices, right_indices
# UNQ_C4# GRADED FUNCTION: get_best_splitdefget_best_split(X, y, node_indices):"""
Returns the optimal feature and threshold value
to split the node data
Args:
X (ndarray): Data matrix of shape(n_samples, n_features)
y (array like): list or ndarray with n_samples containing the target variable
node_indices (ndarray): List containing the active indices. I.e, the samples being considered in this step.
Returns:
best_feature (int): The index of the best feature to split
"""# Some useful variables
num_features = X.shape[1]# You need to return the following variables correctly
best_feature =-1### START CODE HERE ###
max_info_gain =0# Iterate through all featuresfor feature inrange(num_features):# Your code here to compute the information gain from splitting on this feature
info_gain = compute_information_gain(X, y, node_indices, feature)# If the information gain is larger than the max seen so farif info_gain > max_info_gain:# Your code here to set the max_info_gain and best_feature
max_info_gain = info_gain
best_feature = feature
### END CODE HERE ## return best_feature