西瓜好坏的判断可以使用决策树算法,常见的决策树算法包括ID3、C4.5、CART等。以下是一个简单的基于CART算法的决策树示例,用于判断西瓜好坏:
```c
#include <stdio.h>
#define MAX_SAMPLES 10
// 训练数据
int x[MAX_SAMPLES][2] = {
{0, 1},
{0, 1},
{1, 0},
{1, 1},
{1, 0},
{0, 1},
{0, 0},
{1, 0},
{0, 0},
{1, 0},
};
int y[MAX_SAMPLES] = {0, 0, 1, 1, 1, 0, 1, 1, 1, 1};
// 决策树节点
struct Node {
int feature; // 分裂特征
int threshold; // 分裂阈值
int label; // 叶子节点标签
struct Node *left; // 左子树
struct Node *right; // 右子树
};
// 计算基尼指数
double calc_gini(int *samples, int n_samples)
{
int count[2] = {0};
for (int i = 0; i < n_samples; i++) {
count[samples[i]]++;
}
double gini = 1.0;
for (int i = 0; i < 2; i++) {
double p = (double)count[i] / n_samples;
gini -= p * p;
}
return gini;
}
// 计算基尼指数增益
double calc_gini_gain(int *samples, int n_samples, int feature, int threshold, int *left_samples, int *right_samples)
{
int n_left = 0, n_right = 0;
for (int i = 0; i < n_samples; i++) {
if (x[samples[i]][feature] <= threshold) {
left_samples[n_left++] = samples[i];
} else {
right_samples[n_right++] = samples[i];
}
}
double gini_gain = calc_gini(samples, n_samples);
gini_gain -= (double)n_left / n_samples * calc_gini(left_samples, n_left);
gini_gain -= (double)n_right / n_samples * calc_gini(right_samples, n_right);
return gini_gain;
}
// 训练决策树
struct Node* train_tree(int *samples, int n_samples)
{
// 如果样本全属于同一类别,则直接返回叶子节点
int label = y[samples[0]];
int is_pure = 1;
for (int i = 1; i < n_samples; i++) {
if (y[samples[i]] != label) {
is_pure = 0;
break;
}
}
if (is_pure) {
struct Node *leaf = (struct Node*)malloc(sizeof(struct Node));
leaf->feature = -1;
leaf->threshold = -1;
leaf->label = label;
leaf->left = NULL;
leaf->right = NULL;
return leaf;
}
// 选择最优分裂特征和阈值
int best_feature = 0, best_threshold = 0;
double best_gini_gain = 0.0;
int left_samples[MAX_SAMPLES], right_samples[MAX_SAMPLES];
for (int i = 0; i < 2; i++) {
for (int j = 0; j < n_samples; j++) {
int threshold = x[samples[j]][i];
double gini_gain = calc_gini_gain(samples, n_samples, i, threshold, left_samples, right_samples);
if (gini_gain > best_gini_gain) {
best_feature = i;
best_threshold = threshold;
best_gini_gain = gini_gain;
}
}
}
// 分裂样本并递归建树
struct Node *node = (struct Node*)malloc(sizeof(struct Node));
node->feature = best_feature;
node->threshold = best_threshold;
node->label = -1;
node->left = train_tree(left_samples, n_samples * best_threshold / 10);
node->right = train_tree(right_samples, n_samples * (10 - best_threshold) / 10);
return node;
}
// 预测新样本
int predict(struct Node *root, int *sample)
{
while (root->label == -1) {
if (sample[root->feature] <= root->threshold) {
root = root->left;
} else {
root = root->right;
}
}
return root->label;
}
int main()
{
// 训练决策树
int samples[MAX_SAMPLES];
for (int i = 0; i < MAX_SAMPLES; i++) {
samples[i] = i;
}
struct Node *root = train_tree(samples, MAX_SAMPLES);
// 预测新样本
int sample[2];
printf("请输入西瓜的色泽(0-青绿,1-乌黑,2-浅白): ");
scanf("%d", &sample[0]);
printf("请输入西瓜的根蒂(0-蜷缩,1-稍蜷,2-硬挺): ");
scanf("%d", &sample[1]);
int label = predict(root, sample);
if (label == 0) {
printf("这是个好西瓜!\n");
} else {
printf("这是个坏西瓜!\n");
}
return 0;
}
```
在上述代码中,我们使用CART算法训练了一个决策树,并用于判断新的西瓜好坏。其中,训练数据包括10个样本,每个样本有两个特征:色泽和根蒂,标签为0表示好西瓜,标签为1表示坏西瓜。在预测新样本时,需要输入西瓜的色泽和根蒂信息,程序会输出预测结果。