大数据-----------解决了超出内存表示范围的大数据无法计算的问题,即溢出问题
提出问题:在计算机中,每一种类型都有自己的表示范围,比如int的表示范围【0x8000000000000000,0x7fffffffffffffff】,超出范围之后,将会是一个毫无意义的数字。
eg:999999999999999999999999999999999999999999999用内置类型无法保存,更不用说进行基本的四则运算,咋么办?
解决方法:字符串表示整型大数据,模拟整型计算,实现大数据的加、减、乘、除运算。
BigData:
1、普通类型:long long范围:【0x8000000000000000,0x7fffffffffffffff】;
2、大数据:直接string来保存。
实现新的数据类型BigData,在内置类型操作效率上基本没有影响,同时又要支持大数据的四则运算,操作效率应尽可能高。
具体实现细节在代码中的注释中,代码如下:
BigData.h
#ifndef __BigData__
#define __BigData__
#include<iostream>
using namespace std;
#include<string>
#include<assert.h>
//实现对超过内存表示范围的大数据无法计算的问题
#define UN_INIT 0xcccccccccccccccc //未初始化的随机值
#define MAX_INT64 0x7fffffffffffffff //内置类型最大值
#define MIN_INT64 0x8000000000000000 //内置类型最小值
typedef long long INT64;
class BigData
{
public:
//===================================================
//1.构造函数及输出函数
BigData(INT64 data = UN_INIT); //内置类型构造函数
BigData(const char* pData); //大数据构造函数
//===================================================
//2.四则运算
BigData operator+(const BigData& data);
BigData operator-(const BigData& data);
BigData operator*(const BigData& data);
BigData operator/(const BigData& data);
protected:
//===================================================
//3.内部调用函数
friend ostream& operator<<(ostream& os, const BigData& data);
bool IsOverFlow()const;
void INT64toString();
bool IsLeftNumBig(const char* left, int LeftSize,const char* right,int RightSize);
char LoopSub(char* left, int LeftSize,char* right, int RightSize);
string Add(string left, string right);
string Sub(string left, string right);
string Mul(string left, string right);
string Div(string left, string right);
private:
//===================================================
//4.主要数据
INT64 m_llData; //内置类型
string m_sData; //大数据
};
#endif
BigData.cpp
#include"BigData.h"
//==========================================================
//内置类型构造
BigData::BigData(INT64 data)
:m_llData(data)
{
INT64toString();
}
//字符串构造
BigData::BigData(const char* pData)
: m_llData(0)
, m_sData("")
{
if (pData == NULL)
{
assert(pData);
return;
}
//"1234567890" "+123456789" "00001234" "123456ahdks"
//"+" " " "-123456" "adsd2314"
//1.处理首位
char symbol = pData[0];
char* str = (char*)pData;
if (symbol == '+' || symbol == '-')
{
++str;
}
else if (symbol >= '0' && symbol <= '9')
{
symbol = '+';
}
else
{
//非法字符
return;
}
//去掉前置0 "00001234"
while (*str == '0')
++str;
//开辟空间将处理的数据保存在m_sData,且加入了一个符号位,'\0'是库中自己维护的
//统一处理,第一位是符号位
m_sData.resize(strlen(pData)+1);
m_sData[0] = symbol;
//2.处理后续字符
//进行转化 123456ahdks
int m_sIndex = 1;
while (*str >= '0' && *str <= '9')
{
m_llData = m_llData * 10 + (*str - '0');
m_sData[m_sIndex++] = *str++;
}
//处理完之后重新分配空间,以免浪费空间
m_sData.resize(m_sIndex);
//3.处理后续字符的时候需要考虑m_llData的符号问题
if (symbol == '-')
m_llData = 0 - m_llData;
}
//======================================================================
//4.处理后续字符的时候需要考虑m_llData的溢出问题
//为什么要用到const函数?
bool BigData::IsOverFlow()const
{
//max(0x7fffffffffffffff)9223372036854775807
//min(0x8000000000000000)-9223372036854775808
//1.处理符号问题
string tmp("+9223372036854775807");
if (m_sData[0] == '-')
tmp = "-9223372036854775808";
//2.字符串的比较
if (m_sData.size() < tmp.size())
{
return false;
}
else if (m_sData.size() == tmp.size() && tmp >= m_sData)
{
return false;
}
else
{
return true;
}
}
//为什么是友元函数,const&
//避免this指针问题
ostream& operator<<(ostream& os, const BigData& data)
{
//1.未溢出时
if (!data.IsOverFlow())
{
os << data.m_llData;
}
//2.溢出时
else
{
const char* str = data.m_sData.c_str();
if (str[0] == '+')
++str;
os << str;
}
return os;
}
//内置类型时构造函数中string为“”,则需要将数据INT64转换为String类型
void BigData::INT64toString()
{
INT64 tmp = m_llData;
char symbol = '+';
if (m_llData < 0)
{
symbol = '-';
}
m_sData.append(1, symbol);
while (tmp)
{
int ptmp = tmp % 10;
if (ptmp < 0)
{
ptmp = 0 - ptmp;
}
char ch = ptmp + '0';
m_sData.append(1, ch);
tmp /= 10;
}
char* leftStr = (char*)m_sData.c_str() + 1;
char* rightStr = leftStr + m_sData.size() - 2;
while (leftStr < rightStr)
{
char exchange = *leftStr;
*leftStr++ = *rightStr;
*rightStr-- = exchange;
}
}
bool BigData::IsLeftNumBig(const char* left, int LeftSize, const char* right, int RightSize)
{
if (LeftSize > RightSize ||
(LeftSize == RightSize && strncmp(left, right,RightSize)>= 0))
return true;
return false;
}
char BigData::LoopSub(char* left, int LeftSize, char* right, int RightSize)
{
assert(left != NULL && right !=NULL);
char ret = '0';
while (1)
{
if (!IsLeftNumBig(left, LeftSize, right, RightSize))
{
break;
}
//循环相减
int indexL = LeftSize - 1;
int indexR = RightSize - 1;
while (indexL >= 0 && indexR >= 0)
{
char retch = left[indexL] - '0';
retch -= right[indexR] - '0';
if (retch < 0)
{
left[indexL - 1] -= 1;
retch += 10;
}
left[indexL] = retch + '0';
--indexL;
--indexR;
}
//33333333000000000000000000 / 33333333
while (*left == '0' && LeftSize > 0)
{
++left;
--LeftSize;
}
++ret;
}
return ret;
}
//===============================================================
BigData BigData::operator+(const BigData& data)
{
//1.俩个操作数没有溢出的情况下,结果未溢出
if (!IsOverFlow() && !data.IsOverFlow())
{
//+ -
if (m_sData[0] != data.m_sData[0])
{
return BigData(m_llData + data.m_llData);
}
else//+ + || - -
{
//??????
/error C4146: 一元负运算符应用于无符号类型,结果仍为无符号类型/
INT64 max = MAX_INT64 - m_llData;
INT64 min = MIN_INT64 - m_llData;
if ((m_sData[0] == '+' && max >= data.m_llData) ||
(m_sData[0] == '-' && min <= data.m_llData))
{
return BigData(m_llData + data.m_llData);
}
}
}
//2.俩个操作数至少有一个操作数溢出的情况下,同号:Add;异号:Sub,结果溢出
if (m_sData[0] == data.m_sData[0])
return BigData(Add(m_sData, data.m_sData).c_str());
return BigData(Sub(m_sData, data.m_sData).c_str());
}
BigData BigData::operator-(const BigData& data)
{
//1.俩个操作数没有溢出的情况下,结果未溢出
if (!IsOverFlow() && !data.IsOverFlow())
{
//+ + || - -
if (m_sData[0] == data.m_sData[0])
{
return BigData(m_llData - data.m_llData);
}
else //+ -
{
INT64 max = MAX_INT64 + data.m_llData;
INT64 min = MIN_INT64 + data.m_llData;
if ((m_sData[0] == '+' && max >= m_llData) ||
m_sData[0] == '-' && min <= m_llData)
{
return BigData(m_llData - data.m_llData);
}
}
}
//2.俩个操作数至少有一个操作数溢出的情况下,同号:Sub;异号:Add,结果溢出
if (m_sData[0] != data.m_sData[0])
return BigData(Add(m_sData, data.m_sData).c_str());
return BigData(Sub(m_sData, data.m_sData).c_str());
}
BigData BigData::operator*(const BigData& data)
{
//0.特殊情况的处理
if (m_llData == 0 || data.m_llData == 0)
return BigData(INT64(0));
//1.俩个操作数没有溢出,结果也未溢出
if (!IsOverFlow() && !data.IsOverFlow())
{
if (m_sData[0] == data.m_sData[0])
{
if ((m_sData[0] == '+' && MAX_INT64 / m_llData >= data.m_llData) ||
(m_sData[0] == '-' && MAX_INT64 / m_llData <= data.m_llData))
return BigData(m_llData * data.m_llData);
}
else
{
if ((m_sData[0] == '+' && MIN_INT64 / m_llData <= data.m_llData) ||
(m_sData[0] == '-' && MIN_INT64 / m_llData >= data.m_llData))
return BigData(m_llData * data.m_llData);
}
}
//2.俩个操作数中至少有一个数溢出,结果溢出
return BigData((Mul(m_sData, data.m_sData)).c_str());
}
BigData BigData::operator/(const BigData& data)
{
//特殊情况的处理 0-3
//0.invalid
if (data.m_sData.size() <= 2 && data.m_llData == 0)
cout << "除数 invalid!" << endl;
//1.结果为0的情况
if (m_sData.size() <= 2 && m_llData == 0)
return BigData(INT64(0));
if (m_sData.size() < data.m_sData.size() ||
(m_sData.size() == data.m_sData.size() &&
strcmp(m_sData.c_str() + 1, data.m_sData.c_str() + 1) < 0))
//(m_sData.size() == data.m_sData.size() && left < right)) no
{
return BigData(INT64(0));
}
//2.结果为+1 || -1的情况
if (strcmp(m_sData.c_str() + 1, data.m_sData.c_str() + 1) == 0)
{
if (m_sData[0] == data.m_sData[0])
return BigData(INT64(1));
else
return BigData(INT64(-1));
}
//3.除数为1或者-1时
if (data.m_sData == "-1" || data.m_sData == "+1")
{
string result = m_sData;
if (m_sData[0] == data.m_sData[0])
result[0] = '+';
else
result[0] = '-';
return BigData(result.c_str());
}
//4.俩个操作数都没有溢出,结果不溢出
if (!IsOverFlow() && !data.IsOverFlow())
return BigData(m_llData / data.m_llData);
//5.俩个操作数至少有一个溢出,结果是否溢出待定,自己实现的函数
return BigData((Div(m_sData, data.m_sData)).c_str());
}
//================================================================
string BigData::Add(string left, string right)
{
int leftSize = left.size();
int rightSize = right.size();
//1.顺序问题
if (leftSize < rightSize)
{
swap(left, right);
swap(leftSize, rightSize);
}
//2.结果空间的分配及符号位的处理
string result;
result.resize(leftSize + 1);
result[0] = left[0];
//3.处理结果的过程及进位问题
char carryBit = '0';
int index = 1;
for (; index < leftSize; ++index)
{
char retch = left[leftSize - index] - '0' + carryBit;
if (index < rightSize)
{
retch += right[rightSize - index] - '0';
}
result[leftSize - index + 1] = retch % 10 + '0';
carryBit = retch / 10;
}
result[1] = carryBit + '0';
return result;
}
string BigData::Sub(string left, string right)
{
//1.顺序问题以及符号位的问题
char symbol = left[0];
int leftSize = left.size();
int rightSize = right.size();
if (leftSize < rightSize || (leftSize == rightSize && left < right))
{
swap(left, right);
swap(leftSize, rightSize);
if (left[0] == '+')
symbol = '-';
else
symbol = '+';
}
//2.结果空间的分配及符号位的处理
string result;
result.resize(leftSize);
result[0] = symbol;
//3.处理结果的过程及借位问题
int index = 1;
for (; index < leftSize; ++index)
{
char retch = left[leftSize - index] - '0';
if (index < rightSize)
{
retch -= right[rightSize - index] - '0';
}
if (retch < 0)
{
left[leftSize - index - 1] -= 1;
retch += 10;
}
result[leftSize - index] = retch + '0';
}
return result;
}
string BigData::Mul(string left, string right)
{
//1.符号位的确定
char symbol = '+';
if (left[0] != right[0])
{
symbol = '-';
}
int leftSize = left.size();
int rightSize = right.size();
if (leftSize > rightSize)
{
swap(left, right);
swap(leftSize, rightSize);
}
string result;
//result.resize(rightSize + leftSize - 1);
result.assign(rightSize + leftSize - 1, '0');
result[0] = symbol;
int resultSize = result.size();
int indexL = 1;
int indexR = 1;
int offset = 0;
for (indexL = 1; indexL < leftSize; ++indexL)
{
char carryBit = '0';
char retchL = left[leftSize - indexL] - '0';
//1.跳过左操作数为0的位
if (retchL == '0')
{
++offset;
continue;
}
for (indexR = 1; indexR < rightSize; ++indexR)
{
char retchR = retchL * (right[rightSize - indexR] - '0') + carryBit;
//result[resultSize - indexR - offset] += retchR % 10 + '0';//?more+'0'
retchR += result[resultSize - indexR - offset] - '0';
result[resultSize - indexR - offset] = retchR % 10 + '0';
carryBit = retchR / 10;
}
//+=??no
result[resultSize - indexR - offset] = carryBit + '0';
++offset;
}
return result;
}
string BigData::Div(string left, string right)
{
char symbol = '+';
if (left[0] != right[0])
symbol = '-';
string result;
//result.resize(left.size());
//result[0] = symbol;
result.append(1, symbol);
char* Left = (char*)left.c_str() + 1;
char* Right = (char*)right.c_str() + 1;
int Lsize = right.size() - 1;
for (int index = 0; index < left.size() - 1; ++index)
{
//跳过‘0’
if (*Left == '0')
{
++Left;
--Lsize;
continue;
}
if (!IsLeftNumBig(Left, Lsize, Right, right.size()-1))
{
result.append(1, '0');
Lsize++;
if (Lsize + index > left.size() - 1)
{
break;
}
}
else
{
//循环相减
result.append(1, LoopSub(Left, Lsize, Right, right.size() - 1));
//此处不跳他会在if语句处跳,continue,导致index多增加了一倍
while (*Left == '0' && Lsize > 0)
{
++Left;
--Lsize;
}
//循环相减后,左Lsize需要加1
// 222
// 33
///-----
// 24
++Lsize;
//此处限制同上面if语句相同
if (index + Lsize > left.size() - 1)
break;
}
}
return result;
}
测试用例如下:
#include"BigData.h"
void funAdd()
{
//=============================================
//内置类型INT64检测
BigData left(1234);
BigData right(789);
cout << left + right << endl;
BigData left1(MAX_INT64);
BigData right1(3);
cout << left1 + right1 << endl;
/error C4146: 一元负运算符应用于无符号类型,结果仍为无符号类型/
BigData left2(MIN_INT64);
BigData right2(-2);
cout << left2 + right2 << endl;
BigData left3(MAX_INT64);
BigData right3(-999);
cout << left3 + right3 << endl;
//===============================================
//字符串的形式检测
BigData left4("99999999999999999999999999999999999999999999");
BigData right4("11111111111111111111111111111111111111111111");
cout << left4 + right4 << endl;
BigData left5("11111111111111111111111111111111");
BigData right5("999999999999999999999999999999999");
cout << left5 + right5 << endl;
}
void funSub()
{
//==================================================
//内置类型INT64检测
BigData left(1234);
BigData right(34);
cout << left - right << endl;
BigData left1(MAX_INT64);
BigData right1(-999);
cout << left1 - right1 << endl;
BigData left2(MIN_INT64);
BigData right2(-999);
cout << left2 - right2 << endl;
//===============================================
//字符串的形式检测
BigData left3("111111111111111111111111111111111111111111111");
BigData right3("9999");
cout << left3 - right3 << endl;
BigData left4("1111");
BigData right4("999999999999999999999999999999999");
cout << left4 - right4 << endl;
BigData left5("111111111111111111111111111111111");
BigData right5("999999999999999999999999999999999");
cout << left5 - right5 << endl;
}
void funMul()
{
//==================================================
//内置类型INT64检测
BigData left(88);
BigData right(999);
cout << left * right << endl;
BigData left1(MAX_INT64);
BigData right1(22);
cout << left1 * right1 << endl;
BigData left2(MIN_INT64 + 1);
BigData right2(-22);
cout << left2 * right2 << endl;
BigData left3(999);
BigData right3("9999999999999999999");
cout << left3 * right3 << endl;
//===============================================
//字符串的形式检测
BigData left4("999");
BigData right4("99999999999999999999999999999999999999999");
cout << left4 * right4 << endl;
BigData left5("111111111111111111111111111111111111111111");
BigData right5(-999);
cout << left5 * right5 << endl;
BigData left6(110011);
BigData right6("22222222222222222222222222222222222222222222");
cout << left6 * right6 << endl;
BigData left7(INT64(0));
BigData right7("22222222222222222222222222222222222222222222");
cout << left7 * right7 << endl;
}
void funDiv()
{
//==================================================
//内置类型INT64检测
BigData left(222);
BigData right(33);
cout << left / right << endl;
BigData left1(33);
BigData right1(34);
cout << left1 / right1 << endl;
BigData left2(33);
BigData right2(-33);
cout << left2 / right2 << endl;
BigData left3(MIN_INT64); //?no
BigData right3(-1);
cout << left3 / right3 << endl;
//===============================================
//字符串的形式检测
BigData left4("22222222222222222222222222");
BigData right4("33");
cout << left4 / right4 << endl;
BigData left5("22222220000000000000000000");
BigData right5("33");
cout << left5 / right5 << endl;
BigData left6("22222000000000222222222222");
BigData right6("33");
cout << left6 / right6 << endl;
}
int main()
{
cout << "===========================库函数测试============================" << endl;
//========================================================
//库函数测试
cout << atoi("1234567890") << endl;
cout << atoi("+123456789") << endl;
cout << atoi("00001234") << endl;
cout << atoi("123456ahdks") << endl;
cout << atoi("+") << endl;
cout << atoi(" ") << endl;
cout << atoi("-123456") << endl;
cout << atoi("0000001212324243fdjvkfdjv") << endl;
cout << "==========================实现构造函数测试============================" << endl;
cout << "测试结果与库函数测试的结果做出比较是否相同" << endl;
//========================================================
//实现构造函数测试,测试结果与库函数测试的结果做出比较是否相同
BigData b1("1234567890");
BigData b2("+123456789");
BigData b3("00001234");
BigData b4("123456ahdks");
BigData b5("+");
BigData b6(" ");
BigData b7("-123456");
BigData b8("0000001212324243fdjvkfdjv");
cout << b1 << endl;
cout << b2 << endl;
cout << b3 << endl;
cout << b4 << endl;
cout << b5 << endl;
cout << b6 << endl;
cout << b7 << endl;
cout << b8 << endl;
//=======================================================
//+ - * / 测试用例
cout << "======================+ - * /测试用例=================================" << endl;
cout << "======================Add测试用例=================================" << endl;
funAdd();
cout << "======================Sub测试用例=================================" << endl;
funSub();
cout << "======================Mul测试用例=================================" << endl;
funMul();
cout << "======================Div测试用例=================================" << endl;
funDiv();
return 0;
}
测试结果如下图:
项目中遇到的问题:
1.#ifndef 宏 与 #pragma once 都是防止头文件重复包含,区别是什么呢;
#define 宏
#endif
2.构造函数传入的是内置类型,需要将整型转换为string,以便于在处理数据时方便处理,即函数INT64toString;
3.数据为负数时,函数INT64toString中tmp%10=负数,导致String成为乱码。所以往进放结果时需要处理为正数;
4.程序中在语句min = MIN_INT64 - m_llData;的处理中,必须要使用临时变量min去当做限制条件,否则会出现错 误:error C4146: 一元负运算符应用于无符号类型,结果仍为无符号类型;
5.operator+ 同号:Add 异号:Sub
operator- 同号:Sub 异号:Add
6.函数Mul中,某注释处‘0’加了多次,所以处理时注意;
7.string的resize(size)函数与函数assign(size,'0')的应用场景,及函数append(size,char);
8.注意Div中的函数调用IsLeftBig(参数处理细节)与函数调用LoopSub(函数处理)。