一、方差和标准差的定义
方差是各样本值与均值之差的平方值的平均数。标准差是方差的算术平方根。
标准差/方差反映一个数据集的离散程度。标准差/方差越大,表示数据的波动越大;标准差/方差越小,表示数据的波动越小。
二、关键代码
// 计算均值
auto size = std::distance(first, last); // 两个迭代器/指针之间的距离,即样本数量
double avg = std::accumulate(first, last, 0.0) / size;
// 计算方差
double variance(0);
std::for_each(first, last, [avg, &variance](const ValueType &num) { variance += (num - avg) * (num - avg); });
variance /= size;
// 计算标准差
auto standardDeviation = std::sqrt(variance);
三、代码示例
1)utils.h
/*
* Copyright (c) 2023 All rights reserved
* Filename: utils.h
* Brief: 工具函数
* Depend: C++14
*
* Version: V1.0.0
* Date: 2023/03/07
* Author: LucianY(https://blog.csdn.net/LucainY)
* Note: 初次版本。
*
*/
#include <cmath>
#include <algorithm>
#include <numeric>
#include <type_traits>
#ifndef LY_UTILS_H
#define LY_UTILS_H
namespace Ly {
//! 计算方差
template<typename ForwardIt>
inline double ComputeVariance(ForwardIt first, ForwardIt last)
{
using ValueType = typename std::iterator_traits<ForwardIt>::value_type;
auto size = std::distance(first, last);
if (size <= 1) { // 样本数量小于或等于1时,直接返回0
return 0;
}
double avg = std::accumulate(first, last, 0.0) / size; // 计算均值
double variance(0);
std::for_each(first, last, [avg, &variance](const ValueType &num) { variance += (num - avg) * (num - avg); });
return variance / size;
}
//! 计算标准差
template<typename ForwardIt>
inline double ComputeStandardDeviation(ForwardIt first, ForwardIt last)
{
return std::sqrt(ComputeVariance(first, last));
}
} // namespace Ly
#endif // LY_UTILS_H
2)main.cpp
/*
* Copyright (c) 2023 All rights reserved
* Filename: main.cpp
* Author: LucianY(https://blog.csdn.net/LucainY)
* Brief: 方差/标准差计算函数测试
*/
#include <cstdio>
#include <vector>
#include "utils.h"
using namespace Ly;
void Test()
{
//! 容器
std::vector<int> nums{ 1, 2, 3 };
printf("nums variance: %lf\n", ComputeVariance(nums.begin(), nums.end()));
printf("nums standard deviation: %lf\n", ComputeStandardDeviation(nums.begin(), nums.end()));
//! 栈数组
float nums2[5]{ 1, 2, 3, 5, 7 };
printf("nums2 variance: %lf\n", ComputeVariance(std::begin(nums2), std::end(nums2)));
printf("nums2 standard deviation: %lf\n", ComputeStandardDeviation(std::begin(nums2), std::end(nums2)));
//! 堆数组
constexpr int size = 7;
double *nums3 = new double[size]{ 1, 2, 3, 5, 7, 11, 13};
printf("nums3 variance: %lf\n", ComputeVariance(nums3, nums3 + size));
printf("nums3 standard deviation: %lf\n", ComputeStandardDeviation(nums3, nums3 + size));
delete[] nums3;
}
int main()
{
Test();
getchar();
return 0;
}
3)测试输出
nums variance : 0.666667
nums standard deviation : 0.816497
nums2 variance : 4.640000
nums2 standard deviation : 2.154066
nums3 variance : 18.000000
nums3 standard deviation : 4.242641