原理:
进行缩放的原因和使用神经网络时的考虑是一样的,由于RBF网络中采用样本数据的欧式距离来计算。主要优点就是避免数值范围较大的属性控制数值范围较小的属性。另一个优点就是避免计算时的numerical difficulties. 因为核值通常依赖特征向量的内积(inner product),而较大的属性值可能导致numerical问题。因此推荐把每个属性缩放到[-1, 1]或者[0, 1]之间,而且前一个范围要比后一个好,即对列向量进行规范化,其详细解释和计算公式见http://www.faqs.org/faqs/ai-faq/neural-nets/part2/中的“Should I standardize the input variables (column vectors)?”。libsvm中没有考虑属性的类型(效益、成本、固定、偏离、区间、偏离区间 6 种不同的属性类型的规范化计算公式是不一样的,详见:徐泽水,《不确定多属性决策方法及应用》,清华大学出版社,2004。)而采用了统一的线性缩放,作者以为此处可以改进一下。
需要注意的是,在进行测试之前,要对测试数据进行同样的缩放操作。其实在libsvm中有程序(svmscale.exe)来进行缩放操作。
上面这两种方法基本上可以完成所有的样本的预处理了。
解决方法:
源:A[]
结果:B[]
A的最大最小值 MaxVal,MinVal
B中希望的最大最小值 MaxOut,MinOut
循环
{
B[] = (a[] - MinVal) / (MaxVal - MinVal);
}
思路:准备把数据从txt读入vector二维数组进行处理!
一点想法,想保存起来:
主要是得到了转置矩阵,不过后来想想貌似不起神马作用:
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <exception>
#include <stdexcept>
#include<vector>
using namespace std;
template <class T>
ostream& operator << (ostream& os, const vector<T> vec)
{
copy(vec.begin(), vec.end(), ostream_iterator<T>(os, " "));
return os;
}
template <class T>
ostream& operator << (ostream& os, vector<vector<T> > vec)
{
for (vector<vector<T> >::iterator iter = vec.begin(); iter!=vec.end(); iter++)
{
os << *iter << endl;
}
return os;
}
template <class T>
T normalization(T *minval,T *maxval,int *data)
{
return (*data-*minval)/(maxval-minval);
}
template <class T>
vector<vector<T> > InverseMatrix(vector<vector<T> > ivecvec)
{
//存储数列行数row
int row = ivecvec.size();
//数列列数
int line = ivecvec[0].size();
vector<vector<T> > invers_vec(line,row);
for (int i = 0; i < line; ++i)
{
for (int j = 0; j < row; ++j)
{
invers_vec[i][j] = ivecvec[j][i];
}
}
return invers_vec;
}
int main()
{
vector<double> ivec;
vector<double>::iterator iter;
vector<vector<double> > ivecvec;
vector<vector<double> >::iterator iiter;
ifstream infile("e:\\test_data.txt");
string temp;
double a;
while(getline(infile, temp))
{
stringstream line(temp);
while(line >> a)
{
ivec.push_back(a);
}
ivecvec.push_back(ivec);
ivec.clear();
}
//存储数列行数row
int row = ivecvec.size();
//数列列数
int line = ivecvec[0].size();
//存储每列的最值
vector<vector<double> > m_val(line-1,2);
cout << InverseMatrix(ivecvec);
return 0;
}
修改思路,终于搞定,就是麻烦点,算法效率低点吧,继续改进!
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <exception>
#include <stdexcept>
#include<vector>
using namespace std;
template <class T>
ostream& operator << (ostream& os, const vector<T> vec)
{
copy(vec.begin(), vec.end(), ostream_iterator<T>(os, " "));
return os;
}
template <class T>
ostream& operator << (ostream& os, vector<vector<T> > vec)
{
for (vector<vector<T> >::iterator iter = vec.begin(); iter!=vec.end(); iter++)
{
os << *iter << endl;
}
return os;
}
template <class T>
T normalization(T minval,T maxval,T data)
{
return (data-minval)/(maxval-minval);
}
template <class T>
vector<vector<T> > get_vec_normalization(vector<vector<T> > ivecvec)
{
//存储数列行数row
int row = ivecvec.size();
//数列列数
int line = ivecvec[0].size();
vector<vector<T> > m_val(line,2);
T max_val,min_val,temp;
//计算每列最值
for (int i = 0; i < line; ++i)
{
max_val = min_val = 0;
for (int j = 0; j < row; ++j)
{
temp = ivecvec[j][i];
if (max_val<temp)
max_val = temp;
else
if(min_val > temp)
min_val = temp;
}
m_val[i][0]=min_val;
m_val[i][1] = max_val;
}
//归一化
for (int i = 0; i < line; ++i)
{
for (int j = 0; j < row; ++j)
{
ivecvec[j][i] = normalization(m_val[i][0],m_val[i][1],ivecvec[j][i]);
}
}
return ivecvec;
}
int main()
{
vector<double> ivec;
vector<double>::iterator iter;
vector<vector<double> > ivecvec;
vector<vector<double> >::iterator iiter;
ifstream infile("e:\\train.txt");
string temp;
double a;
while(getline(infile, temp))
{
stringstream line(temp);
while(line >> a)
{
ivec.push_back(a);
}
ivecvec.push_back(ivec);
ivec.clear();
}
cout << get_vec_normalization(ivecvec);
return 0;
}
还是存储到文件比较以后进行处理,继续改:
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <exception>
#include <stdexcept>
#include<vector>
using namespace std;
template <class T>
ostream& operator << (ostream& os, const vector<T> vec)
{
copy(vec.begin(), vec.end(), ostream_iterator<T>(os, " "));
return os;
}
template <class T>
ostream& operator << (ostream& os, vector<vector<T> > vec)
{
for (vector<vector<T> >::iterator iter = vec.begin(); iter!=vec.end(); iter++)
{
os << *iter << endl;
}
return os;
}
template <class T>
T normalization(T minval,T maxval,T data)
{
return (data-minval)/(maxval-minval);
}
template <class T>
vector<vector<T> > get_vec_normalization(vector<vector<T> > ivecvec)
{
ofstream outfile("e:\\outfile.txt");
if(!outfile)
throw runtime_error("openfile error");
//存储数列行数row
int row = ivecvec.size();
//数列列数
int line = ivecvec[0].size();
vector<vector<T> > m_val(line,2);
T max_val,min_val,temp;
//计算每列最值
for (int i = 0; i < line; ++i)
{
max_val = min_val = 0;
for (int j = 0; j < row; ++j)
{
temp = ivecvec[j][i];
if (max_val<temp)
max_val = temp;
else
if(min_val > temp)
min_val = temp;
}
m_val[i][0]=min_val;
m_val[i][1] = max_val;
}
//归一化
for (int i = 0; i < line; ++i)
{
for (int j = 0; j < row; ++j)
{
ivecvec[j][i] = normalization(m_val[i][0],m_val[i][1],ivecvec[j][i]);
}
}
for (int i = 0; i < row; ++i)
{
for (int j = 0; j < line; ++j)
{
outfile << ivecvec[i][j] << " ";
}
outfile << endl;
}
outfile.close();
return ivecvec;
}
int main()
{
vector<double> ivec;
vector<double>::iterator iter;
vector<vector<double> > ivecvec;
vector<vector<double> >::iterator iiter;
ifstream infile("e:\\train.txt");
string temp;
double a;
while(getline(infile, temp))
{
stringstream line(temp);
while(line >> a)
{
ivec.push_back(a);
}
ivecvec.push_back(ivec);
ivec.clear();
}
cout << get_vec_normalization(ivecvec);
return 0;
}
根据公式
对原来的代码进行修改:
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <exception>
#include <stdexcept>
#include<vector>
using namespace std;
template <class T>
ostream& operator << (ostream& os, const vector<T> vec)
{
copy(vec.begin(), vec.end(), ostream_iterator<T>(os, " "));
return os;
}
template <class T>
ostream& operator << (ostream& os, vector<vector<T> > vec)
{
for (vector<vector<T> >::iterator iter = vec.begin(); iter!=vec.end(); iter++)
{
os << *iter << endl;
}
return os;
}
template <class T>
T normalization(T y_min,T y_max,T value,int y_upper,int y_lower)
{
if(value == y_min)
value = y_lower;
else if(value == y_max)
value = y_upper;
else value = y_lower + (y_upper-y_lower) *
(value - y_min)/(y_max-y_min);
return value;
}
template <class T>
vector<vector<T> > get_vec_normalization(vector<vector<T> > ivecvec)
{
ofstream outfile("e:\\outfile.txt");
if(!outfile)
throw runtime_error("openfile error");
//存储数列行数row
int row = ivecvec.size();
//数列列数
int line = ivecvec[0].size();
vector<vector<T> > m_val(line,2);
T max_val,min_val,temp;
//计算每列最值
for (int i = 0; i < line; ++i)
{
max_val = min_val = 0;
for (int j = 0; j < row; ++j)
{
temp = ivecvec[j][i];
if (max_val<temp)
max_val = temp;
else
if(min_val > temp)
min_val = temp;
}
m_val[i][0]=min_val;
m_val[i][1] = max_val;
}
//归一化
for (int i = 0; i < line; ++i)
{
for (int j = 0; j < row; ++j)
{
ivecvec[j][i] = normalization(m_val[i][0],m_val[i][1],ivecvec[j][i],1,-1);
}
}
for (int i = 0; i < row; ++i)
{
for (int j = 0; j < line; ++j)
{
outfile << ivecvec[i][j] << " ";
}
outfile << endl;
}
outfile.close();
return ivecvec;
}
int main()
{
vector<double> ivec;
vector<double>::iterator iter;
vector<vector<double> > ivecvec;
vector<vector<double> >::iterator iiter;
ifstream infile("e:\\features.txt");
string temp;
double a;
while(getline(infile, temp))
{
stringstream line(temp);
while(line >> a)
{
ivec.push_back(a);
}
ivecvec.push_back(ivec);
ivec.clear();
}
cout << get_vec_normalization(ivecvec);
return 0;
}