大数据

2 篇文章 0 订阅
2 篇文章 0 订阅

大数据-----------解决了超出内存表示范围的大数据无法计算的问题,即溢出问题


提出问题:在计算机中,每一种类型都有自己的表示范围,比如int的表示范围【0x8000000000000000,0x7fffffffffffffff】,超出范围之后,将会是一个毫无意义的数字。
eg:999999999999999999999999999999999999999999999用内置类型无法保存,更不用说进行基本的四则运算,咋么办?

解决方法:字符串表示整型大数据,模拟整型计算,实现大数据的加、减、乘、除运算。

BigData:
1、普通类型:long long范围:0x8000000000000000,0x7fffffffffffffff】;
2、大数据:直接string来保存。

实现新的数据类型BigData,在内置类型操作效率上基本没有影响,同时又要支持大数据的四则运算,操作效率应尽可能高。

具体实现细节在代码中的注释中,代码如下:

BigData.h

#ifndef __BigData__
#define __BigData__
#include<iostream>
using namespace std;
#include<string>
#include<assert.h>

//实现对超过内存表示范围的大数据无法计算的问题

#define UN_INIT 0xcccccccccccccccc				//未初始化的随机值
#define MAX_INT64 0x7fffffffffffffff			//内置类型最大值
#define MIN_INT64 0x8000000000000000			//内置类型最小值
typedef long long INT64;

class BigData
{
public:
	//===================================================
	//1.构造函数及输出函数
	BigData(INT64 data = UN_INIT);	//内置类型构造函数
	BigData(const char* pData);		//大数据构造函数

	//===================================================
	//2.四则运算
	BigData operator+(const BigData& data);
	BigData operator-(const BigData& data);
	BigData operator*(const BigData& data);
	BigData operator/(const BigData& data);

protected:
	//===================================================
	//3.内部调用函数
	friend ostream& operator<<(ostream& os, const BigData& data);
	bool IsOverFlow()const;
	void INT64toString();
	bool IsLeftNumBig(const char* left, int LeftSize,const char* right,int RightSize);
	char LoopSub(char* left, int LeftSize,char* right, int RightSize);
	string Add(string left, string right);
	string Sub(string left, string right);
	string Mul(string left, string right);
	string Div(string left, string right);

private:
	//===================================================
	//4.主要数据
	INT64 m_llData;			//内置类型
	string m_sData;			//大数据
};
#endif

BigData.cpp

#include"BigData.h"

//==========================================================
//内置类型构造
BigData::BigData(INT64 data)
	:m_llData(data)
{
	INT64toString();
}

//字符串构造 
BigData::BigData(const char* pData)
: m_llData(0)
, m_sData("")
{
	if (pData == NULL)
	{
		assert(pData);
		return;
	}

	//"1234567890"   "+123456789"  "00001234"  "123456ahdks"  
	//"+"    "    "     "-123456"   "adsd2314"
	//1.处理首位
	char symbol = pData[0];
	char* str = (char*)pData;
	if (symbol == '+' || symbol == '-')
	{
		++str;
	}
	else if (symbol >= '0' && symbol <= '9')
	{
		symbol = '+';
	}
	else
	{
		//非法字符
		return;
	}

	//去掉前置0   "00001234"
	while (*str == '0')
		++str;
	//开辟空间将处理的数据保存在m_sData,且加入了一个符号位,'\0'是库中自己维护的
	//统一处理,第一位是符号位
	m_sData.resize(strlen(pData)+1);
	m_sData[0] = symbol;

	//2.处理后续字符
	//进行转化  123456ahdks
	int m_sIndex = 1;
	while (*str >= '0' && *str <= '9')
	{
		m_llData = m_llData * 10 + (*str - '0');
		m_sData[m_sIndex++] = *str++;
	}

	//处理完之后重新分配空间,以免浪费空间
	m_sData.resize(m_sIndex);

	//3.处理后续字符的时候需要考虑m_llData的符号问题
	if (symbol == '-')
		m_llData = 0 - m_llData;
}

//======================================================================
//4.处理后续字符的时候需要考虑m_llData的溢出问题
//为什么要用到const函数?
bool BigData::IsOverFlow()const
{
	//max(0x7fffffffffffffff)9223372036854775807
	//min(0x8000000000000000)-9223372036854775808

	//1.处理符号问题
	string tmp("+9223372036854775807");
	if (m_sData[0] == '-')
		tmp = "-9223372036854775808";

	//2.字符串的比较
	if (m_sData.size() < tmp.size())
	{
		return false;
	}
	else if (m_sData.size() == tmp.size() && tmp >= m_sData)
	{
		return false;
	}
	else
	{
		return true;
	}
}

//为什么是友元函数,const&
//避免this指针问题
ostream& operator<<(ostream& os, const BigData& data)
{
	//1.未溢出时
	if (!data.IsOverFlow())
	{
		os << data.m_llData;
	}
	//2.溢出时
	else
	{
		const char* str = data.m_sData.c_str();
		if (str[0] == '+')
			++str;
		os << str;
	}
	return os;
}

//内置类型时构造函数中string为“”,则需要将数据INT64转换为String类型
void BigData::INT64toString()
{
	INT64 tmp = m_llData;
	char symbol = '+';
	if (m_llData < 0)
	{
		symbol = '-';
	}

	m_sData.append(1, symbol);
	while (tmp)
	{
		int ptmp = tmp % 10;
		if (ptmp < 0)
		{
			ptmp = 0 - ptmp;
		}
		char ch = ptmp + '0';
		m_sData.append(1, ch);
		tmp /= 10;
	}

	char* leftStr = (char*)m_sData.c_str() + 1;
	char* rightStr = leftStr + m_sData.size() - 2;

	while (leftStr < rightStr)
	{
		char exchange = *leftStr;
		*leftStr++ = *rightStr;
		*rightStr-- = exchange;
	}
}

bool BigData::IsLeftNumBig(const char* left, int LeftSize, const char* right, int RightSize)
{
	if (LeftSize > RightSize ||
		(LeftSize == RightSize && strncmp(left, right,RightSize)>= 0))
		return true;
	return false;
}

char BigData::LoopSub(char* left, int LeftSize, char* right, int RightSize)
{
	assert(left != NULL && right !=NULL);
	char ret = '0';
	while (1)
	{
		if (!IsLeftNumBig(left, LeftSize, right, RightSize))
		{
			break;
		}
		//循环相减
		int indexL = LeftSize - 1;
		int indexR = RightSize  - 1;
		while (indexL >= 0 && indexR >= 0)
		{
			char retch = left[indexL] - '0';
			retch -= right[indexR] - '0';
			if (retch < 0)
			{
				left[indexL - 1] -= 1;
				retch += 10;
			}
			left[indexL] = retch + '0';
			--indexL;
			--indexR;
		}

		//33333333000000000000000000 / 33333333
		while (*left == '0' && LeftSize > 0)
		{
			++left;
			--LeftSize;
		}
		++ret;
	}
	return ret;
}

//===============================================================
BigData BigData::operator+(const BigData& data)
{
	//1.俩个操作数没有溢出的情况下,结果未溢出
	if (!IsOverFlow() && !data.IsOverFlow())
	{
		//+ -
		if (m_sData[0] != data.m_sData[0])
		{
			return BigData(m_llData + data.m_llData);
		}
		else//+ +     ||    - -
		{
			//??????
			/error C4146: 一元负运算符应用于无符号类型,结果仍为无符号类型/
			INT64 max = MAX_INT64 - m_llData;
			INT64 min = MIN_INT64 - m_llData;
			if ((m_sData[0] == '+' && max >= data.m_llData) ||
				(m_sData[0] == '-' && min <= data.m_llData))
			{
				return BigData(m_llData + data.m_llData);
			}
		}
	}

	//2.俩个操作数至少有一个操作数溢出的情况下,同号:Add;异号:Sub,结果溢出
	if (m_sData[0] == data.m_sData[0])
		return BigData(Add(m_sData, data.m_sData).c_str());
	return BigData(Sub(m_sData, data.m_sData).c_str());
}

BigData BigData::operator-(const BigData& data)
{
	//1.俩个操作数没有溢出的情况下,结果未溢出
	if (!IsOverFlow() && !data.IsOverFlow())
	{
		//+ +  || - -
		if (m_sData[0] == data.m_sData[0])
		{
			return BigData(m_llData - data.m_llData);
		}
		else //+ -
		{
			INT64 max = MAX_INT64 + data.m_llData;
			INT64 min = MIN_INT64 + data.m_llData;
			if ((m_sData[0] == '+' && max >= m_llData) ||
				m_sData[0] == '-' && min <= m_llData)
			{
				return BigData(m_llData - data.m_llData);
			}
		}
	}
	//2.俩个操作数至少有一个操作数溢出的情况下,同号:Sub;异号:Add,结果溢出
	if (m_sData[0] != data.m_sData[0])
		return BigData(Add(m_sData, data.m_sData).c_str());
	return BigData(Sub(m_sData, data.m_sData).c_str());
}

BigData BigData::operator*(const BigData& data)
{
	//0.特殊情况的处理
	if (m_llData == 0 || data.m_llData == 0)
		return BigData(INT64(0));

	//1.俩个操作数没有溢出,结果也未溢出
	if (!IsOverFlow() && !data.IsOverFlow())
	{
		if (m_sData[0] == data.m_sData[0])
		{
			if ((m_sData[0] == '+' && MAX_INT64 / m_llData >= data.m_llData) ||
				(m_sData[0] == '-' && MAX_INT64 / m_llData <= data.m_llData))
				return BigData(m_llData * data.m_llData);
		}
		else
		{
			if ((m_sData[0] == '+' && MIN_INT64 / m_llData <= data.m_llData) ||
				(m_sData[0] == '-' && MIN_INT64 / m_llData >= data.m_llData))
				return BigData(m_llData * data.m_llData);
		}
	}
	//2.俩个操作数中至少有一个数溢出,结果溢出
	return BigData((Mul(m_sData, data.m_sData)).c_str());
}

BigData BigData::operator/(const BigData& data)
{
	//特殊情况的处理 0-3
	//0.invalid
	if (data.m_sData.size() <= 2 && data.m_llData == 0)
		cout << "除数 invalid!" << endl;

	//1.结果为0的情况
	if (m_sData.size() <= 2 && m_llData == 0)
		return BigData(INT64(0));

	if (m_sData.size() < data.m_sData.size() ||
		(m_sData.size() == data.m_sData.size() &&
		strcmp(m_sData.c_str() + 1, data.m_sData.c_str() + 1) < 0))
		//(m_sData.size() == data.m_sData.size() && left < right)) no
	{
		return BigData(INT64(0));
	}

	//2.结果为+1 || -1的情况
	if (strcmp(m_sData.c_str() + 1, data.m_sData.c_str() + 1) == 0)
	{
		if (m_sData[0] == data.m_sData[0])
			return BigData(INT64(1));
		else
			return BigData(INT64(-1));
	}

	//3.除数为1或者-1时
	if (data.m_sData == "-1" || data.m_sData == "+1")
	{
		string result = m_sData;
		if (m_sData[0] == data.m_sData[0])
			result[0] = '+';
		else
			result[0] = '-';

		return BigData(result.c_str());
	}

	//4.俩个操作数都没有溢出,结果不溢出 
	if (!IsOverFlow() && !data.IsOverFlow())
		return BigData(m_llData / data.m_llData);

	//5.俩个操作数至少有一个溢出,结果是否溢出待定,自己实现的函数
	return BigData((Div(m_sData, data.m_sData)).c_str());
}

//================================================================
string BigData::Add(string left, string right)
{
	int leftSize = left.size();
	int rightSize = right.size();
	//1.顺序问题
	if (leftSize < rightSize)
	{
		swap(left, right);
		swap(leftSize, rightSize);
	}

	//2.结果空间的分配及符号位的处理
	string result;
	result.resize(leftSize + 1);
	result[0] = left[0];

	//3.处理结果的过程及进位问题
	char carryBit = '0';
	int index = 1;
	for (; index < leftSize; ++index)
	{
		char retch = left[leftSize - index] - '0' + carryBit;
		if (index < rightSize)
		{
			retch += right[rightSize - index] - '0';
		}
		result[leftSize - index + 1] = retch % 10 + '0';
		carryBit = retch / 10;
	}
	result[1] = carryBit + '0';
	return result;
}

string BigData::Sub(string left, string right)
{
	//1.顺序问题以及符号位的问题 
	char symbol = left[0];
	int leftSize = left.size();
	int rightSize = right.size();
	if (leftSize < rightSize || (leftSize == rightSize && left < right))
	{
		swap(left, right);
		swap(leftSize, rightSize);
		if (left[0] == '+')
			symbol = '-';
		else
			symbol = '+';
	}

	//2.结果空间的分配及符号位的处理
	string result;
	result.resize(leftSize);
	result[0] = symbol;

	//3.处理结果的过程及借位问题
	int index = 1;
	for (; index < leftSize; ++index)
	{
		char retch = left[leftSize - index] - '0';
		if (index < rightSize)
		{
			retch -= right[rightSize - index] - '0';
		}
		if (retch < 0)
		{
			left[leftSize - index - 1] -= 1;
			retch += 10;
		}
		result[leftSize - index] = retch + '0';
	}
	return result;
}

string BigData::Mul(string left, string right)
{
	//1.符号位的确定
	char symbol = '+';
	if (left[0] != right[0])
	{
		symbol = '-';
	}
	
	int leftSize = left.size();
	int rightSize = right.size();
	if (leftSize > rightSize)
	{
		swap(left, right);
		swap(leftSize, rightSize);
	}

	string result;
	//result.resize(rightSize + leftSize - 1);
	result.assign(rightSize + leftSize - 1, '0');
	result[0] = symbol;
	int resultSize = result.size();

	int indexL = 1;
	int indexR = 1;
	int offset = 0;
	for (indexL = 1; indexL < leftSize; ++indexL)
	{
		char carryBit = '0';
		char retchL = left[leftSize - indexL] - '0';
		//1.跳过左操作数为0的位
		if (retchL == '0')
		{
			++offset;
			continue;
		}

		for (indexR = 1; indexR < rightSize; ++indexR)
		{
			char retchR = retchL * (right[rightSize - indexR] - '0') + carryBit;
			//result[resultSize - indexR - offset] += retchR % 10 + '0';//?more+'0'
			retchR += result[resultSize - indexR - offset] - '0';
			result[resultSize - indexR - offset] = retchR % 10 + '0';
			carryBit = retchR / 10;
		}
		//+=??no
		result[resultSize - indexR - offset]  = carryBit + '0';
		++offset;
	}
	return result;
}

string BigData::Div(string left, string right)
{
	char symbol = '+';
	if (left[0] != right[0])
		symbol = '-';

	string result;
	//result.resize(left.size());
	//result[0] = symbol;
	result.append(1, symbol);

	char* Left = (char*)left.c_str() + 1;
	char* Right = (char*)right.c_str() + 1;
	int Lsize = right.size() - 1;

	for (int index = 0; index < left.size() - 1; ++index)
	{
		//跳过‘0’
		if (*Left == '0')
		{
			++Left;
			--Lsize;
			continue;
		}

		if (!IsLeftNumBig(Left, Lsize, Right, right.size()-1))
		{
			result.append(1, '0');
			Lsize++;
 			if (Lsize + index > left.size() - 1)
			{
				break;
			}
		}
		else
		{
			//循环相减
			result.append(1, LoopSub(Left, Lsize, Right, right.size() - 1));
			//此处不跳他会在if语句处跳,continue,导致index多增加了一倍
			while (*Left == '0' && Lsize > 0)
			{
				++Left;
				--Lsize;
			}
			//循环相减后,左Lsize需要加1
			// 222
			//  33
			///-----
			//  24
			++Lsize;
			//此处限制同上面if语句相同
			if (index + Lsize > left.size() - 1)
				break;
		}
	}
	return result;
}

测试用例如下:

#include"BigData.h"

void funAdd()
{
	//=============================================
	//内置类型INT64检测
	BigData left(1234);
	BigData right(789);
	cout << left + right << endl;

	BigData left1(MAX_INT64);
	BigData right1(3);
	cout << left1 + right1 << endl;

	/error C4146: 一元负运算符应用于无符号类型,结果仍为无符号类型/
	BigData left2(MIN_INT64);
	BigData right2(-2);
	cout << left2 + right2 << endl;

	BigData left3(MAX_INT64);
	BigData right3(-999);
	cout << left3 + right3 << endl;

	//===============================================
	//字符串的形式检测
	BigData left4("99999999999999999999999999999999999999999999");
	BigData right4("11111111111111111111111111111111111111111111");
	cout << left4 + right4 << endl;

	BigData left5("11111111111111111111111111111111");
	BigData right5("999999999999999999999999999999999");
	cout << left5 + right5 << endl;
}

void funSub()
{
	//==================================================
	//内置类型INT64检测
	BigData left(1234);
	BigData right(34);
	cout << left - right << endl;

	BigData left1(MAX_INT64);
	BigData right1(-999);
	cout << left1 - right1 << endl;

	BigData left2(MIN_INT64);
	BigData right2(-999);
	cout << left2 - right2 << endl;
	
	//===============================================
	//字符串的形式检测
	BigData left3("111111111111111111111111111111111111111111111");
	BigData right3("9999");
	cout << left3 - right3 << endl;

	BigData left4("1111");
	BigData right4("999999999999999999999999999999999");
	cout << left4 - right4 << endl;

	BigData left5("111111111111111111111111111111111");
	BigData right5("999999999999999999999999999999999");
	cout << left5 - right5 << endl;
}

void funMul()
{
	//==================================================
	//内置类型INT64检测
	BigData left(88);
	BigData right(999);
	cout << left * right << endl;

	BigData left1(MAX_INT64);
	BigData right1(22);
	cout << left1 * right1 << endl;

	BigData left2(MIN_INT64 + 1);
	BigData right2(-22);
	cout << left2 * right2 << endl;

	BigData left3(999);
	BigData right3("9999999999999999999");
	cout << left3 * right3 << endl;

	//===============================================
	//字符串的形式检测
	BigData left4("999");
	BigData right4("99999999999999999999999999999999999999999");
	cout << left4 * right4 << endl;

	BigData left5("111111111111111111111111111111111111111111");
	BigData right5(-999);
	cout << left5 * right5 << endl;

	BigData left6(110011);
	BigData right6("22222222222222222222222222222222222222222222");
	cout << left6 * right6 << endl;

	BigData left7(INT64(0));
	BigData right7("22222222222222222222222222222222222222222222");
	cout << left7 * right7 << endl;
}

void funDiv()
{
	//==================================================
	//内置类型INT64检测
	BigData left(222);
	BigData right(33);
	cout << left / right << endl;

	BigData left1(33);
	BigData right1(34);
	cout << left1 / right1 << endl;

	BigData left2(33);
	BigData right2(-33);
	cout << left2 / right2 << endl;

	BigData left3(MIN_INT64);		//?no
	BigData right3(-1);
	cout << left3 / right3 << endl;

	//===============================================
	//字符串的形式检测
	BigData left4("22222222222222222222222222");
	BigData right4("33");
	cout << left4 / right4 << endl;

	BigData left5("22222220000000000000000000");
	BigData right5("33");
	cout << left5 / right5 << endl;

	BigData left6("22222000000000222222222222");
	BigData right6("33");
	cout << left6 / right6 << endl;
}

int main()
{
	cout << "===========================库函数测试============================" << endl;
	//========================================================
	//库函数测试
	cout << atoi("1234567890") << endl;
	cout << atoi("+123456789") << endl;
	cout << atoi("00001234") << endl;
	cout << atoi("123456ahdks") << endl;
	cout << atoi("+") << endl;
	cout << atoi("    ") << endl;
	cout << atoi("-123456") << endl;
	cout << atoi("0000001212324243fdjvkfdjv") << endl;
	
	cout << "==========================实现构造函数测试============================" << endl;
	cout << "测试结果与库函数测试的结果做出比较是否相同" << endl;
	//========================================================
	//实现构造函数测试,测试结果与库函数测试的结果做出比较是否相同
	BigData b1("1234567890");
	BigData b2("+123456789");
	BigData b3("00001234");
	BigData b4("123456ahdks");
	BigData b5("+");
	BigData b6("    ");
	BigData b7("-123456");
	BigData b8("0000001212324243fdjvkfdjv");
	cout << b1 << endl;
	cout << b2 << endl;
	cout << b3 << endl;
	cout << b4 << endl;
	cout << b5 << endl;
	cout << b6 << endl;
	cout << b7 << endl;
	cout << b8 << endl;

	//=======================================================
	//+ - * / 测试用例
	cout << "======================+ - * /测试用例=================================" << endl;
	cout << "======================Add测试用例=================================" << endl;
	funAdd();
	cout << "======================Sub测试用例=================================" << endl;
	funSub();
	cout << "======================Mul测试用例=================================" << endl;
	funMul();
	cout << "======================Div测试用例=================================" << endl;
	funDiv();
	return 0;
}

测试结果如下图:




项目中遇到的问题:

1.#ifndef 宏  与 #pragma once 都是防止头文件重复包含,区别是什么呢;
  #define 宏
  #endif

2.构造函数传入的是内置类型,需要将整型转换为string,以便于在处理数据时方便处理,即函数INT64toString;
3.数据为负数时,函数INT64toString中tmp%10=负数,导致String成为乱码。所以往进放结果时需要处理为正数;
4.程序中在语句min = MIN_INT64 - m_llData;的处理中,必须要使用临时变量min去当做限制条件,否则会出现错   误:error C4146: 一元负运算符应用于无符号类型,结果仍为无符号类型;
5.operator+ 同号:Add  异号:Sub
  operator- 同号:Sub  异号:Add
6.函数Mul中,某注释处‘0’加了多次,所以处理时注意;
7.string的resize(size)函数与函数assign(size,'0')的应用场景,及函数append(size,char);
8.注意Div中的函数调用IsLeftBig(参数处理细节)与函数调用LoopSub(函数处理)。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值