分类:
#include <iostream>
#include <algorithm>
#include <iomanip>
#include <fstream>
#include <sstream>
#include <cstring>
#include <string>
#include <vector>
#include <cmath>
#include <time.h>
#include <map>
using namespace std;
struct train_data {
int index; //训练文本序号
int emotion_value; //情感值
string emotion; //情感状态
vector<string> word; //训练文本单词
int onehot[1000]; //onehot矩阵中的值
double distance; //距离
train_data(int a = 0, int b = 0, string c = "", double d = 0.0) {
index = a;
emotion_value = b;
emotion = c;
distance = d;
word.clear();
for (int i = 0; i < 1000; i ++)
onehot[i] = 0;
}
};
vector<string> train_text; //每个完整的训练文本
vector<string> all_words; //所有不同的单词 ,纵轴
vector<train_data> all_trains; //所有训练文本,横轴
int right_sum; //预测正确的个数
//NB adding part
struct each_word {
string name;
double time;
each_word(string a = "", double b = 0) {
name = a;
time = b;
}
};
//构建词带
struct emotion {
string emotion2;
vector<each_word> word_bag;
double sum_words;
double probability;
double times;
emotion(double a = 0, double b = 0, double c = 0) {
word_bag.clear();
sum_words = a;
probability = b;
times = c;
}
};
emotion each_emotion[7];
//NB adding part
// youhua part
double add[7] = {0};
//youhua part
void reading_file(void );
void class_calculating();
bool cmp(const emotion & , const emotion & );
int main() {
train_text.clear();
all_words.clear();
all_trains.clear();
each_emotion[1].emotion2 = "anger";
each_emotion[2].emotion2 = "disgust";
each_emotion[3].emotion2 = "fear";
each_emotion[4].emotion2 = "joy";
each_emotion[5].emotion2 = "sad";
each_emotion[6].emotion2 = "surprise";
reading_file();
class_calculating();
return 0;
}
void reading_file() {
ifstream train("train.txt");
char read[100];
string temp;
train.getline(read, 100);
while (!train.eof()) {
train.getline(read, 100);
temp = read;
train_text.push_back(temp);
}
train.close();
stringstream s;
int index;
int emotion_value;
string emotion;
string word;
for (int i = 0; i < train_text.size(); i ++) {
s.str(train_text[i]);
s >> index;
s >> emotion_value;
s >> emotion;
each_emotion[emotion_value].times ++;
while (s != NULL) {
s >> word;
each_emotion[emotion_value].sum_words ++;
bool flag = true;
for (int j = 0; j < each_emotion[emotion_value].word_bag.size(); j ++) {
if (each_emotion[emotion_value].word_bag[j].name == word) {
each_emotion[emotion_value].word_bag[j].time ++;
flag = false;
break;
}
else
continue;
}
if (flag) {
each_word new_each_word(word, 1);
each_emotion[emotion_value].word_bag.push_back(new_each_word);
}
bool flag1 = true;
for (int i = 0; i < all_words.size(); i ++) {
if (all_words[i] == word) {
flag1 = false;
break;
}
else
continue;
}
if (flag1)
all_words.push_back(word);
}
s.clear();
}
}
void class_calculating() {
ifstream t("test.txt");
right_sum = 0;
char c[100];
string temp;
t.getline(c, 100);
while (t.getline(c, 100)) {
train_data test_train;
char *p = strtok(c, " ");
p = strtok(NULL, " ");
p = strtok(NULL, " ");
temp = p;
test_train.emotion = temp;
int new_index;
for (new_index = 1; new_index < 7; new_index ++) {
if (each_emotion[new_index].emotion2 == temp)
break;
}
p = strtok(NULL, " ");
while (p != NULL) {
temp = p;
test_train.word.push_back(temp);
// each_emotion[new_index].sum_words ++;
// bool flag = true;
// for (int j = 0; j < each_emotion[new_index].word_bag.size(); j ++) {
// if (each_emotion[new_index].word_bag[j].name == temp) {
// each_emotion[new_index].word_bag[j].time ++;
// flag = false;
// break;
// }
// else
// continue;
// }
// if (flag) {
// each_word new_each_word(temp, 1);
// each_emotion[new_index].word_bag.push_back(new_each_word);
// }
bool flag1 = true;
for (int i = 0; i < all_words.size(); i ++) {
if (all_words[i] == temp) {
flag1 = false;
break;
}
else
continue;
}
if (flag1)
all_words.push_back(temp);
p = strtok(NULL, " ");
}
for (int i = 1; i <= 7; i ++) {
double pro = each_emotion[i].times / train_text.size();
for (int j = 0; j < test_train.word.size(); j ++) {
bool flag = true;
for (int k = 0; k < each_emotion[i].word_bag.size(); k ++) {
if (each_emotion[i].word_bag[k].name == test_train.word[j]) {
pro *= (each_emotion[i].word_bag[k].time / (each_emotion[i].sum_words + all_words.size()));
flag = false;
break;
}
else
continue;
}
if (flag) {
pro *= (1.0 / (each_emotion[i].sum_words + all_words.size()));
}
}
each_emotion[i].probability = pro * (1.0 / (abs(each_emotion[i].times - train_text.size())));
}
sort(each_emotion + 1, each_emotion + 7, cmp);
cout << each_emotion[6].emotion2 << endl;
if (each_emotion[6].emotion2 == test_train.emotion) {
right_sum ++;
}
}
cout << "正确个数: " << right_sum << endl;
// for (int i = 1; i < 7 ; i ++)
// cout << each_emotion[i].times << endl;
}
bool cmp(const emotion &a, const emotion & b) {
return a.probability < b.probability
}
回归:
#include <iostream>
#include <algorithm>
#include <iomanip>
#include <fstream>
#include <sstream>
#include <cstring>
#include <string>
#include <vector>
#include <cmath>
#include <time.h>
#include <map>
using namespace std;
struct ct { //每个训练样本中的每个单词的数据
string s;
double num;
ct(string a = "", double b = 0) {
s = a;
num = b;
}
};
struct train_data {
int index; //训练文本序号
int emotion_value; //情感值
string emotion; //情感状态
vector<ct> word; //训练文本单词
int onehot[1000]; //onehot矩阵中的值
double distance; //距离
double sum;
vector<double> fre_set;
vector<double> tf;
train_data(int a = 0, int b = 0, string c = "", double d = 0.0, double e = 0) {
index = a;
emotion_value = b;
emotion = c;
distance = d;
sum = e;
word.clear();
fre_set.clear();
for (int i = 0; i < 1000; i ++)
onehot[i] = 0;
}
};
vector<string> train_text; //每个完整的训练文本
vector<string> all_words; //所有不同的单词 ,纵轴
vector<train_data> all_trains; //所有训练文本,横轴
int right_sum; //预测正确的个数
void reading_file(void );
void get_TF(void );
void regre_calculating(void );
int main() {
train_text.clear();
all_words.clear();
all_trains.clear();
reading_file();
//cout << all_words.size() << endl; 904
get_TF();
regre_calculating();
//for (int i = 0; i <= all_trains[2].onehot.size(); i ++)
// cout << all_trains[2].onehot[i] << endl;
return 0;
}
void reading_file() {
ifstream t("Dataset_train.csv");
char c[150];
string temp;
t.getline(c, 150);
while (t.getline(c, 150)) {
train_data new_train;
char d[150];
strcpy(d, c);
char *p = strtok(c, ",");
p = strtok(NULL, ",");
//cout << p << endl;
char *p2 = strtok(p, " ");
while (p2 != NULL) {
new_train.sum ++;
string word = p2;
//统计所有单词
bool flag1 = true;
for (int i = 0; i < all_words.size(); i ++) {
if (all_words[i] == word) {
flag1 = false;
break;
}
else
continue;
}
if (flag1)
all_words.push_back(word);
//统计每个训练文本中的单词
bool flag2 = true;
for (int i = 0; i < new_train.word.size(); i ++) {
if (new_train.word[i].s == word) {
new_train.word[i].num ++;
flag2 = false;
break;
}
else
continue;
}
if (flag2) {
ct new_ct(word, 1);
new_train.word.push_back(new_ct);
}
p2 = strtok(NULL, " ");
}
char *p3 = strtok(d, ",");
p3 = strtok(NULL, ",");
p3 = strtok(NULL, ",");
stringstream ss;
double fre;
while (p3 != NULL) {
temp = p3;
ss.str(temp);
ss >> fre;
new_train.fre_set.push_back(fre);
ss.clear();
p3 = strtok(NULL, ",");
}
all_trains.push_back(new_train);
}
}
void get_TF() {
for (int i = 0; i < all_trains.size(); i ++) {
for (int j = 0; j < all_words.size(); j ++) {
bool flag = true;
for (int k = 0; k < all_trains[i].word.size(); k ++) {
if (all_words[j] == all_trains[i].word[k].s) {
all_trains[i].tf.push_back(all_trains[i].word[k].num/all_trains[i].sum);
flag = false;
break;
}
}
if (flag)
all_trains[i].tf.push_back(0);
}
}
}
void regre_calculating() {
ifstream t("Dataset_validation.csv");
char c[150];
string temp;
t.getline(c, 150);
ofstream out("14353324_xiangketing_regression.txt");
while (t.getline(c, 150)) {
train_data test_train;
char *p = strtok(c, ",");
p = strtok(NULL, ",");
//cout << p << endl;
char *p2 = strtok(p, " ");
while (p2 != NULL) {
test_train.sum ++;
temp = p2;
bool flag = true;
for (int i = 0; i < test_train.word.size(); i ++) {
if (test_train.word[i].s == temp) {
test_train.word[i].num ++;
flag = false;
break;
}
}
if (flag) {
ct new_ct(temp, 1);
test_train.word.push_back(new_ct);
}
p2 = strtok(NULL, " ");
}
map<string, double> m;
for (int i = 0; i < test_train.word.size(); i ++) {
m[test_train.word[i].s] = test_train.word[i].num / test_train.sum;
}
map<int, double> fre;
fre[1] = 0; fre[2] = 0; fre[3] = 0;
fre[4] = 0; fre[5] = 0; fre[6] = 0;
for (int i = 0; i < all_trains.size(); i ++) {
double temp_fre = 1;
vector<double> v;
for (int j = 0; j < test_train.word.size(); j ++) {
bool flag = true;
for (int k = 0; k < all_words.size(); k ++) {
if (test_train.word[j].s == all_words[k]) {
if (all_trains[i].tf[k] != 0)
v.push_back(all_trains[i].tf[k]);
else
v.push_back(1.0 / (all_trains[i].sum + all_words.size()));
flag = false;
break;
}
}
if (flag)
v.push_back((1.0 + test_train.word[j].num) / (test_train.sum + all_words.size()));
}
for (int t = 0; t < v.size(); t ++) {
temp_fre *= v[t];
}
fre[1] += temp_fre * all_trains[i].fre_set[0];
fre[2] += temp_fre * all_trains[i].fre_set[1];
fre[3] += temp_fre * all_trains[i].fre_set[2];
fre[4] += temp_fre * all_trains[i].fre_set[3];
fre[5] += temp_fre * all_trains[i].fre_set[4];
fre[6] += temp_fre * all_trains[i].fre_set[5];
}
double all_fre = fre[1] + fre[2] + fre[3] + fre[4] + fre[5] + fre[6];
out << fre[1] / all_fre << '\t' << fre[2] / all_fre << '\t'
<< fre[3] / all_fre << '\t' << fre[4] / all_fre << '\t'
<< fre[5] / all_fre << '\t' << fre[6] / all_fre << endl;
}
}