数据分析在实际项目上的应用

最新推荐文章于 2024-05-22 09:26:49 发布

小雨姑娘

最新推荐文章于 2024-05-22 09:26:49 发布

阅读量3k

点赞数 1

分类专栏： DataMining学习笔记文章标签：数据 C 实际应用

本文链接：https://blog.csdn.net/qq_36988549/article/details/70186284

版权

DataMining学习笔记专栏收录该内容

5 篇文章 1 订阅

订阅专栏

前几天开会拿到了某个项目的一部分数据

在会上大家发现了这些数据中出现了很多问题

并进行了讨论

由于保密的原因数据不能贴出来

但是大体的情况是这样的：

某个人流量采集系统，当一个人进入系统时，计数+1；当一个人出去时计数-1.

随着时间的发展系统会不停有人进进出出，但是当系统重置的时候所有人都应该从系统离开。

采集的数据：

1.当前时刻进入量

2.当前时刻离开量

3.当前时刻系统内人数

4.当前时刻总进入量

而这个系统现在还不是特别的准确，所以在某些时候这个系统会出一些问题，会漏记一些人数

但是现在还没有特别好的分析问题的方法，于是大家采用了找到原始视频一个个找错误的方法

从原始视频寻找错误的方法是可行的，但也有一定的局限性。

优点：

1.能精确地找到问题所在，用人眼观测的话基本不存在误差。

2.能够发现当时现场的很多复杂的环境因素。

缺点：

1.耗费大量的人力物力，尤其是有很多没有出现问题的视频看一遍没有什么作用。

2.无法在宏观的方面上衡量系统的精确性，很难得到定性的数据。

我想是不是可以利用这些文本数据来对这个系统进行分析呢

后来一个学长提供了一个非常好的计算误差率的算法

算法核心如下：

当前系统内人数类似于一个迭代的过程

当前系统人数 += 进 - 初

所有进入系统的人在系统重置前都将离开系统

所以在每次系统重置的前一时刻系统内人数应该等于零

因此若不为0则该系统出现了误差

所以基于这个模型，我们完全可以实现：

找到所有重置前当前系统内人数不为0的样本，该样本必为出现错误的样本，这样就可以知道应该看哪些视频了

然后基于这个数据可以做的事情还有很多

比如计算该系统进行检测的准确率

但是后来我考虑到可能会出现进入系统时未检测到，离开系统时未检测到，正好造成最后系统内剩余人数等于0的情况

所以出现检测失误的情况有

{

+1 +1

+1 - 1

-1 + 1

-1 - 1

}

所以系统误差率大致可以用末时刻

当前系统人数 / 当前系统总进出量 * 2

但是当前的模型只能计算在系统重置的中间的属性

而无法对某一时刻的属性进行分析

下面贴出C源码：

因为给的数据没有划分系统重置的时间点，所以判断系统重置点和进行预处理占用了大量时间

#include"iostream"
#include"cstdio"
#include"vector"
using namespace std;

struct node
{
	int cut = 0;

	int year, mouth, day, hours, minutes, seconds;
	int frontIn, frontOut, backIn, backOut;
	int currentAmount, allAmount;

	int isWrong = 0;
	double errorValue;
};

vector<node> data;

void getData()
{
	FILE *fp;
	if (fopen_s(&fp, "C:\\Users\\XueChuanyu\\Desktop\\PointLabEdit.txt", "r") != 0)
	{
		perror("PointLabEdit opened error");
		exit(0);
	}

	int count = 0;
	bool flag = true;
	while (flag)
	{
		node temp;
		if (fscanf_s(fp, "%d/%d/%d  %d:%d:%d", &temp.year, &temp.mouth, &temp.day, &temp.hours, &temp.minutes, &temp.seconds) == EOF)
		{
			flag = false;
		}

		fscanf_s(fp, "%d", &temp.frontIn);
		fscanf_s(fp, "%d", &temp.frontOut);
		fscanf_s(fp, "%d", &temp.backIn);
		fscanf_s(fp, "%d", &temp.backOut);
		fscanf_s(fp, "%d", &temp.currentAmount);
		fscanf_s(fp, "%d", &temp.allAmount);
		data.push_back(temp);
	}
}


void cutData()
{
	int amount = 1;
	for (auto it = data.begin(); it != data.end() - 1; it++)
	{
		if (it->currentAmount + (it + 1)->frontIn + (it + 1)->backIn - (it + 1)->frontOut - (it + 1)->backOut != (it + 1)->currentAmount || (it + 1)->hours - it->hours > 1)
		{
			it->cut = amount;
			amount++;
		}
	}
	data[data.size() - 1].cut = amount;

	/*amount = 1;
	for (auto it = data.begin(); it != data.end() - 2; it++)
	{
		it->cut = amount;
		if ((it + 1)->cut != 0)
		{
			amount++;
			it++;
		}

	}*/
}

void findWrong()
{
	for (auto it = data.begin(); it != data.end(); it++)
	{
		if (it->cut != 0)
		{
			if (it->currentAmount == 0)
			{
				it->isWrong = 0;
				it->errorValue = 0;
			}
			else
			{
				if (abs(it->currentAmount) * 2 / it->allAmount < 0.1)
				{
					it->isWrong = 1;
					it->errorValue = static_cast<double>(abs(it->currentAmount) * 2) / static_cast<double>(it->allAmount * 2);
				}
				else
				{
					it->isWrong = 2;
					it->errorValue = static_cast<double>(abs(it->currentAmount) * 2) / static_cast<double>(it->allAmount * 2);
				}
			}
		}
	}
}

void printWrong()
{
	for (auto it = data.begin(); it != data.end(); it++)
	{
		if (it->cut != 0)
		{
			if (it->isWrong != 0)
			{
				cout << "Mounth: " << it->mouth << " Day:" << it->day << " Hour:" << it->hours << " Min:" << it->minutes << " Senc:" << it->seconds << endl;
			}
		}
	}
}

double errorPosibility()
{
	double sum = 0;
	int amount = 0;
	for (auto it = data.begin(); it != data.end(); it++)
	{
		if (it->cut != 0 /*&& it->isWrong != 2*/)
		{
			sum += it->errorValue;
			amount++;
			
		}
		
	}

	return sum / amount ;
}

void check()
{
	for (auto it = data.begin(); it != data.end(); it++)
	{
		if (it->cut != 0)
		{
			if (it->errorValue < 0 || it->errorValue > 1)
			{
				cout << "Mounth: " << it->mouth << " Day:" << it->day << " Hour:" << it->hours << " Min:" << it->minutes << " Senc:" << it->seconds << endl;
				cout << it->errorValue << endl;
			}
		}
	}
}

int main()
{
	getData();
	cutData();
	findWrong();
	//printWrong();
	cout << errorPosibility();
	//check();
	system("pause");
	return 0;
}

小雨姑娘

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
数据分析在实际项目上的应用

前几天开会拿到了某个项目的一部分数据在会上大家发现了这些数据中出现了很多问题并进行了讨论由于保密的原因数据不能贴出来但是大体的情况是这样的：某个人流量采集系统，当一个人进入系统时，计数+1；当一个人出去时计数-1.随着时间的发展系统会不停有人进进出出，但是当系统重置的时候所有人都应该从系统离开。采集的数据：1.当前时刻进入量2.当前时刻离开量3.
复制链接

扫一扫