// test1.cpp : Defines the entry point for the console application.
//
#include <vector>
#include <cstring>
#include <fstream>
#include <string>
#include <iostream>
#include <algorithm>
#include <cstdio>
#include <cstdlib>
#include <map>
#include <ctime>
#include <iomanip>
#include <cmath>
#include <queue>
using namespace std;
struct serie
{
vector<double> vec;
string label;
double len;
};
double speed = 1.6;
double INF = 1e10;
serie series[10000];
double dpA[2000][2000];
double dpB[2000][2000];
int num = 637;
double coe=0.2;
int WIN = 0;
double down_ratio;
map<int, string> names;
vector<int> lens(38, 0);
vector<int> num1(38, 0);
vector<int> num2(38, 0);
vector<int> classes(38,0);
vector<double> dev_mean(38, 0);
string to_string(int value)
{
char help[10];
sprintf(help,"%d",value);
return string(help);
}
double DPA(int i, int j, int win)
{
if ((i == 0 && j == win + 1) || (j == 0 && i == win + 1))
{
return dpA[i][j];;
}
if (abs(i - j) <= win)
return dpA[i][j];
return INF;
}
double DPB(int i, int j, int win)
{
if ((i == 0 && j == win + 1) || (j == 0 && i == win + 1))
{
return dpB[i][j];;
}
if (abs(i - j) <= win)
return dpB[i][j];
return INF;
}
double getdis(int one, int two, int i, int j)
{
vector<double>& vec1 = series[one].vec;
vector<double>& vec2 = series[two].vec;
int n = vec1.size();
double ans = (vec1[i * 2] - vec2[j * 2])*(vec1[i * 2] - vec2[j * 2]) + (vec1[i * 2 + 1] - vec2[j * 2 + 1])*(vec1[i * 2 + 1] - vec2[j * 2 + 1]);
return sqrt(ans);
}
double getdistance00(int one, int two)
{
int n = series[two].vec.size() + 1;
dpA[0][0] = 0;
dpB[0][0] = 0;
dpA[1][0] = 0;
dpB[0][1] = 0;
n /= 2;
for (int i = 2; i <= min(n - 1, WIN + 1); i++)
{
dpA[i][0] = dpA[i - 1][0] + getdis(one, one, i - 1, i - 1 - 1);// fabs(series[one].vec[i - 1] - series[one].vec[i - 1 - 1]);
dpB[0][i] = dpB[0][i - 1] + getdis(two, two, i - 1, i - 1 - 1);// fabs(series[two].vec[i - 1] - series[two].vec[i - 1 - 1]);getdis(two, two, i - 1, j - 1-1)
}
int win = WIN;
for (int i = 1; i < n; i++)
for (int j = max(1, i - win); j < min(n, i + win + 1); j++)
{
if (i != 1)
dpA[i][j] = min(DPA(i - 1, j, win) + getdis(one, one, i - 1, i - 1 - 1), DPB(i - 1, j, win) + coe*getdis(one, two, i - 1, j - 1));
else
dpA[i][j] = DPB(i - 1, j, win) + coe*getdis(one, two, i - 1, j - 1);
if (j != 1)
dpB[i][j] = min(DPA(i, j - 1, win) + coe*getdis(one, two, i - 1, j - 1), DPB(i, j - 1, win) + getdis(two, two, i - 1, j - 1 - 1));
else
dpB[i][j] = DPA(i, j - 1, win) + coe*getdis(one, two, i - 1, j - 1);
}
return min(dpA[n - 1][n - 1], dpB[n - 1][n - 1]);
}
double getdistance0t(int one, int two)
{
int n = series[two].vec.size() + 1;
dpA[0][0] = 0;
dpB[0][0] = 0;
dpA[1][0] = 0;
dpB[0][1] = 0;
for (int i = 2; i <= min(n-1,WIN + 1); i++)
{
dpA[i][0] = dpA[i - 1][0] + fabs(series[one].vec[i - 1] - series[one].vec[i - 1 - 1]);
dpB[0][i] = dpB[0][i - 1] + 0*fabs(series[two].vec[i - 1] - series[two].vec[i - 1 - 1]);
}
int win = WIN;
for (int i = 1; i < n; i++)
for (int j = max(1, i - win); j < min(n, i + win + 1); j++)
{
if (i != 1)
dpA[i][j] = min(DPA(i - 1, j, win) + fabs(series[one].vec[i - 1] - series[one].vec[i - 1 - 1]), DPB(i - 1, j, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1]));
else
dpA[i][j] = DPB(i - 1, j, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1]);
if (j != 1)
dpB[i][j] = min(DPA(i, j - 1, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1]), DPB(i, j - 1, win) + 0*fabs(series[two].vec[j - 1] - series[two].vec[j - 1 - 1]));
else
dpB[i][j] = DPA(i, j - 1, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1]);
}
return min(dpA[n - 1][n - 1], dpB[n - 1][n - 1]) ;
}
double getdistance0(int one, int two)
{
int n = series[two].vec.size() + 1;
dpA[0][0] = 0;
dpB[0][0] = 0;
dpA[1][0] = 0;
dpB[0][1] = 0;
for (int i = 2; i <= min(n - 1, WIN + 1); i++)
{
dpA[i][0] = dpA[i - 1][0] + fabs(series[one].vec[i - 1] - series[one].vec[i - 1 - 1]);
dpB[0][i] = dpB[0][i - 1] + fabs(series[two].vec[i - 1] - series[two].vec[i - 1 - 1]);
}
int win = WIN;
for (int i = 1; i < n; i++)
for (int j = max(1, i - win); j < min(n, i + win + 1); j++)
{
if (i != 1)
dpA[i][j] = min(DPA(i - 1, j, win) + fabs(series[one].vec[i - 1] - series[one].vec[i - 1 - 1]), DPB(i - 1, j, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1]));
else
dpA[i][j] = DPB(i - 1, j, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1]);
if (j != 1)
dpB[i][j] = min(DPA(i, j - 1, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1]), DPB(i, j - 1, win) + fabs(series[two].vec[j - 1] - series[two].vec[j - 1 - 1]));
else
dpB[i][j] = DPA(i, j - 1, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1]);
}
return min(dpA[n - 1][n - 1], dpB[n - 1][n - 1]);
}
double dp[2000][2000];
double getdistance1(int one, int two)
{
int n = series[two].vec.size();
dp[0][0] = fabs(series[one].vec[0] - series[two].vec[0]);
int win = WIN;
for (int i = 1; i < min(i+win,n); i++)
{
dp[i][0] = dp[i-1][0]+fabs(series[one].vec[i]-series[two].vec[0]);
dp[0][i] = dp[0][i-1]+fabs(series[one].vec[0] - series[two].vec[i]);
}
for (int i = 1; i < n; i++)
for (int j = max(1,i-win); j < min(i+win+1,n); j++)
{
if (j==i-win)
dp[i][j] = fabs(series[one].vec[i] - series[two].vec[j]) + min(dp[i - 1][j], dp[i - 1][j - 1]);
else if (j==i+win)
dp[i][j] = fabs(series[one].vec[i] - series[two].vec[j]) + min(dp[i][j - 1], dp[i - 1][j - 1]);
else
dp[i][j] = fabs(series[one].vec[i] - series[two].vec[j]) +min(dp[i - 1][j], min(dp[i][j - 1], dp[i - 1][j - 1]));
}
return dp[n - 1][n - 1];
return 0;
}
double getdistance2(int one, int two)
{
int n = series[two].vec.size();
double ans = 0;
for (int i = 0; i < series[one].vec.size(); i++)
{
ans += fabs(series[one].vec[i] - series[two].vec[i]);
}
return ans;
}
double getdistance3(int one, int two)
{
int n = series[one].vec.size();
double ans = 0;
for (int i = 0; i < n - 1; i++)
{
ans += abs(series[one].vec[i] - series[two].vec[i]);
ans += abs(series[two].vec[i] - series[one].vec[i + 1]);
ans += abs(series[one].vec[i] - series[two].vec[i + 1]);
}
ans += abs(series[one].vec[n - 1] - series[two].vec[n - 1]);
return ans;
}
double getdistance4(int one, int two)
{
int n = series[two].vec.size();
double ans = 0;
dp[0][0] = abs(series[one].vec[0] - series[two].vec[0]);
for (int i = 1; i < n; i++)
{
dp[i][0] = dp[i - 1][0] + abs(series[one].vec[i - 1] - series[one].vec[i]);
dp[0][i] = dp[0][i - 1] + abs(series[two].vec[i - 1] - series[two].vec[i]);
}
int win = WIN;
for (int i = 1; i < n; i++)
//for (int j = 1; j < n; j++)
for (int j = max(1, i - win); j < min(i + win + 1, n); j++)
{
dp[i][j] = INF;
if (j - i + 1 <= win)
dp[i][j] = min(dp[i][j],dp[i - 1][j] + abs(series[one].vec[i - 1] - series[one].vec[i]));
if (i - j + 1 <= win)
dp[i][j] = min(dp[i][j], dp[i][j - 1] + abs(series[two].vec[j - 1] - series[two].vec[j]));
if (j - i + 1 <= win)
dp[i][j] = min(dp[i][j], dp[i - 1][j ] + abs(series[one].vec[i] - series[two].vec[j])*coe);
if (i - j + 1 <= win)
dp[i][j] = min(dp[i][j], dp[i ][j - 1] + abs(series[one].vec[i] - series[two].vec[j])*coe);
}
return dp[n - 1][n - 1];
}
double getdistance5(int one, int two)
{
int n = series[one].vec.size();
double ans = 0;
ans += abs(series[one].vec[0] - series[two].vec[0]);
ans += abs(series[one].vec[n - 1] - series[two].vec[n - 1]);
for (int i = 1; i < n - 1; i++)
{
ans += min(abs(series[one].vec[i] - series[two].vec[i - 1]), min(abs(series[one].vec[i] - series[two].vec[i]), abs(series[one].vec[i] - series[two].vec[i + 1])));
}
ans += abs(series[one].vec[n - 1] - series[two].vec[n - 1]);
return ans;
}
void readInfo()
{
ifstream file("/home/xiefubao/myproject/experiment/vldb_dataset/numhelp.txt");
if (!file.is_open())
{
cout << "num.txt not open!" << endl;
exit(0);
}
for (int i = 0; i < 38; i++)
{
string now;
int no;
file >> no;
file >> now;
file >> lens[i];
file >> num1[i];
file >> num2[i];
file >> classes[i];
file >> dev_mean[i];
names[i] = now;
}
}
void down_sample(int counter)
{
int newnum = num*(1 - down_ratio);
int getout = num - newnum;
for (int i = 0; i < counter; i++)
{
vector<double> down;
vector<int> help(num, 0);
vector<bool> rem(num, 1);
for (int j = 0; j < num; j++)
{
help[j] = j;
}
for (int j = 0; j < getout; j++)
{
int position = rand() % (num - j);
rem[help[position]] = 0;
swap(help[position], help[num-j-1]);
}
vector<double> now(newnum, 0);
int add = 0;
for (int j = 0; j < num; j++)
{
if (rem[j])
now[add++] = series[i].vec[j];
}
swap(series[i].vec, now);
}
num = newnum;
}
void readFile(int filenum , bool train)
{
num = lens[filenum - 1];
string filepath = "/home/xiefubao/myproject/experiment/vldb_dataset/" + to_string(filenum) + "/" + names[filenum - 1] + (train ? "_TRAIN" : "_TEST") + ".txt";
cout << filepath << endl;
string now;
ifstream in(filepath.c_str());
if (!in.is_open())
{
cout << "not open" << endl;
exit(1);
}
int number = train ? num2[filenum - 1] : num1[filenum - 1];
for(int u = 0;u < number;u++)
{
in >> now;
series[u].label = now;
double len = 0;
series[u].vec.clear();
for (int i = 0; i < num; i++)
{
double point;
in >> point;
point = point;
series[u].vec.push_back(point);
if (i != 0)
len += fabs(point - series[u].vec[i - 1]);
}
series[u].len = len;
}
cout<<"done 1"<<endl;
//down_sample(number);
}
/*bool operator<(pair<double,string> n1,pair<double,string> n2) {
return n1.first < n2.first;
}*/
int getvalue(int filenum,int wlen,double c,int counter,double(*distance) (int, int))
{
int ans = 0;
WIN = wlen;
coe = c;
for (int i = 0; i < counter; i++)
{
//cout << counter << " " << ans <<endl;
priority_queue<pair<double,string> > pri;
int prinum = 3;
for(int j = 0;j < counter; j++)
{
if (i == j) continue;
if (((i+3737)*(j+4343)) %(max(1,counter/classes[filenum]/5)) != 0) continue;
double dis = distance(i,j);
if(pri.size() < prinum)
pri.push(make_pair(dis,series[j].label));
else if(pri.top().first > dis)
{
pri.pop();
pri.push(make_pair(dis,series[j].label));
}
}
for(int u = 1;u <= prinum && !pri.empty();u++)
{
if(pri.top().second == series[i].label)
{
ans += u*u;
}
pri.pop();
}
}
cout << "ans " << ans <<endl;
return ans;
}
int find_win_size(int filenum,double c,int counter,double(*distance) (int, int))
{
int max_value = -1;
int len = -1;
for(int wlen = lens[filenum] / 2; wlen >= 2 ; wlen /= speed)
{
cout<<wlen<<endl;
int value = getvalue(filenum,wlen,c,counter,distance);
if(value >= max_value)
{
max_value = value;
len = wlen;
}
}
return len;
}
double find_ceo(int filenum,int counter, double(*distance) (int, int))
{
int max_value = -1;
double ans = -1;
for(double rat = 1; rat > 0.001 ; rat /= 1.8)
{
int value = getvalue(filenum,WIN,rat,counter,distance);
if(value >= max_value)
{
max_value = value;
ans = rat;
}
}
return ans;
}
int main()
{
//cout << "xie" << << 123 << "123" << endl; getchar();
readInfo();
ofstream result;
result.open("/home/xiefubao/myproject/experiment/vldb_dataset/compare.txt",ios::app);
//file location
//int filenum = 31;
bool train = false;
for (int filenum = 38; filenum <= 38; filenum++)
{
/*down_ratio = 0.2;
readFile(filenum, false);
cout << "start find winsize" << endl;
WIN = find_win_size(filenum - 1,dev_mean[filenum-1]/2,num2[filenum -1],getdistance0);
cout << "start find coe" << endl;
coe = find_ceo(filenum - 1,num2[filenum -1],getdistance0);
cout << "have done" << endl;
cout << "WIN && coe : " << WIN << " " << coe <<endl;
*/
clock_t start, finish;
start = clock();
WIN = 5;
coe = dev_mean[filenum - 1];
vector<int> hitnum(5, 0);
int counter = num1[filenum - 1];
cout << "reading" << endl;
readFile(filenum, train);
cout << "have read" << endl;
cout << "xiefubao " << WIN << coe <<endl;
//double(*distance[5]) (int, int) = { getdistance00, getdistance0, getdistance1, getdistance2, getdistance3 };
double(*distance[5]) (int, int) = { getdistance0,getdistance1, getdistance2 };
vector<int> computeDis;
computeDis.push_back(0);
computeDis.push_back(1);
computeDis.push_back(2);
//computeDis.push_back(3);
//computeDis.push_back(4);
for (int i = 0; i < counter; i++)
{
vector<double> mist(5, INF);
vector<int> bestnum(5, -1);
//cout << i << endl;
for (int j = 0; j < counter; j++)
{
if (i == j) continue;
if (rand() %(max(1,counter/classes[filenum-1]/5)) != 0) continue;
vector<double> dist(5, 0);
for (int k = 0; k < computeDis.size(); k++)
{
dist[computeDis[k]] = distance[computeDis[k]](i, j);
}
for (int k = 0; k < computeDis.size(); k++)
if (dist[computeDis[k]] < mist[computeDis[k]])
{
mist[computeDis[k]] = dist[computeDis[k]];
bestnum[computeDis[k]] = j;
}
}
if (i == counter - 1)
{
result << setw(2) << setfill(' ') << filenum << " ";
}
for (int k = 0; k < computeDis.size(); k++)
{
if (series[i].label == series[bestnum[computeDis[k]]].label)
{
hitnum[computeDis[k]]++;
}
if (i % 100 == 0 || i == counter - 1)
cout << "distance" << computeDis[k] << " hitsnum:" << hitnum[computeDis[k]] << " / " << i + 1 << " " << counter << endl;
if (i == counter - 1)
{
result << setw(5) << setfill(' ')<< hitnum[computeDis[k]] << " ";
}
}
if (i % 100 == 0 || i == counter - 1)
cout << endl;
if (i == counter - 1)
{
result << setw(5) << setfill(' ') << counter <<" ";
result << setw(4) << setfill(' ')<< WIN << " " << setw(7) << setfill(' ') << coe << endl;
}
}
finish = clock();
//cout << "timeofcost: "<< finish - start << endl;
cout << "series length: " << num << endl << endl;
}
return 0;
}
相似度(参数调节代码)
最新推荐文章于 2023-03-01 21:37:29 发布