csv解析器-CSDN博客

头文件：

#ifndef __ClientModule_CSVFile_H__

#define __ClientModule_CSVFile_H__

#include <fstream>

#include < string>

#include <sstream>

#include <vector>

#include <iostream>

typedef std:: string String;

/** CSV文件解析器

* CSV逗号分隔值文件（Comma Separated value）规则

* 1 开头是不留空，以行为单位。

* 2 可含或不含列名，含列名则居文件第一行。

* 3 一行数据不垮行，无空行。

* 4 以半角符号，作分隔符，列为空也要表达其存在。

* 5 列内容如存在半角逗号（即,）则用半角引号（即""）将该字段值包含起来。

* 6 列内容如存在半角引号（即"）则应替换成半角双引号（""）转义。

* 7 文件读写时引号，逗号操作规则互逆。

* 8 内码格式不限，可为ASCII、Unicode或者其他。

class CSVFile

{

public:

CSVFile();

~CSVFile();

public:

/// 打开CSV文件

/// @param[in] strFilename 文件名

bool open(const char* strFilePath);

/// 另存为CSV文件

bool save(const char* strFilePath);

/// 保存已经打开的CSV文件

bool save();

/// 是否已经打开有文件

bool isOpen();

/// 根据列名获得到列索引

int getFieldIndex(const char* fieldName);

/// 获取列数

int getColumnSize() { return mColumnCount; }

/// 获取行数

int getRowSize() { return (int)mTable.size(); }

/// 是否使用UTF8编码

void setUTF8(bool bUtf8);

public:

/// 读取数据

/// @param[in] row 第几行

/// @param[in] column 第几列

template<class T>

T Read(const int row, const int column)

{

//printf("CSVFile::Read(%d, %d)\n", row, column);

// 获取行

int rowIndex = row - 1;

if (row<1 || row>(int)mTable.size())

{

printf("CSVFile::Read() CANNOT GET ROW[%d]!\n", row);

return T();

}

Field& filed = mTable[rowIndex];

// 获取列

int columnIndex = column - 1;

if (column<1 || column>(int)filed.size())

{

printf("CSVFile::Read() CANNOT GET COLUMN[%d]!\n", column);

return T();

}

std::string& strData = filed[columnIndex];

// 获取数据

T data;

std::stringstream ss;

ss << strData;

ss >> data;

return data;

}

String readString(const int row, const int column)

{

// 获取行

int rowIndex = row - 1;

if (rowIndex<0 || rowIndex>(int)mTable.size()-1)

{

printf("CSVFile::readString() CANNOT GET ROW[%d]!\n", row);

return String();

}

Field& filed = mTable[rowIndex];

// 获取列

int columnIndex = column - 1;

if (column<1 || column>(int)filed.size())

{

printf("CSVFile::readString() CANNOT GET COLUMN[%d]!\n", column);

return String();

}

String& strData = filed[columnIndex];

// 获取数据

return strData;

}

/// 读取数据

/// @param[in] row 第几行

/// @param[in] column 第几列

/// @param[in] data 写入的数据

template<class T>

bool Write(const int row, const int column, T data)

{

//printf("CSVFile::Write(%d, %d)\n", row, column);

int rowIndex = row - 1;

if (rowIndex < 0)

{

// 如果索引号为负数，则增长一行。

mTable.push_back(Field());

rowIndex = (int)mTable.size() - 1;

}

else

{

// 行数不够，自动增长。

while ((int)mTable.size()<row)

{

mTable.push_back(Field());

}

Field& filed = mTable[rowIndex];

int columnIndex = column - 1;

if (columnIndex < 0)

{

// 如果索引号为负数，则增长一列。

printf("CSVFile::Write() row number is error!\n");

return false;

}

else

{

// 列数不够，自动增长。

while ((int)filed.size()<column)

{

filed.push_back(std::string());

}

std::string& strData = filed[columnIndex];

// 写入数据

std::stringstream ss;

ss << data;

strData = ss.str();

return true;

}

/// 读取数据

/// @param[in] cell 列数

/// @param[in] data 写入的数据

template<class T>

bool Push(const int cell, const T& data)

{

return true;

}

/// 删除一行

bool deleteRow(const int row);

/// 删除所有行

bool deleteAllRow();

private:

typedef std::vector<std::string> Field;

typedef std::vector<Field> Table;

/// 读取一行数据

void readLine(Field& field);

/// 写入一行

void writeLine(const Field& field);

/// 解析整个文本

bool parse();

/// 解析一行数据

void parseLine(const char* strLine, int nSize, Field& result);

void parseLine(const std::string& strLine, Field& result);

/// 写入一行

void writeLine(std::fstream& file, const Field& field);

/// 读取一行数据

void readLine(std::fstream& file, Field& field);

private:

std::fstream mFile; ///< 文件流

String mFilePath; ///< 文件路径

Table mTable; ///< 表

Field mHead; ///< 表头

String mLine; ///< 一行字符串，没有解析的

int mRowCount; ///< 行数

int mColumnCount; ///< 列数

bool m_bUtf8; ///< UTF8编码

};

#endif // __CSVFile_H__

源文件：

#include "stdafx.h"

#include "CSVFile.h"

#include <sstream>

#include <assert.h>

#include <iostream>

#include <stdio.h>

#include "FileUtility.h"

#include "StringUtility.h"

#pragma warning(disable: 4996)

/* Character values */

const char CSV_TAB = 0x09; ///< Tab

const char CSV_SPACE = 0x20; ///< 空白符

const char CSV_CR = 0x0d; ///< 回车符\r

const char CSV_LF = 0x0a; ///< 换行符\n

const char CSV_COMMA = ','; ///< 逗号,

const char CSV_SINGLE_QUOTE = '\''; ///< 单引号'

const char CSV_DOUBLE_QUOTE = '\"'; ///< 双引号“

namespace detail

{

int replace(String& src_str, const String& old_str, const String& new_str)

{

int count = 0;

int old_str_len = int(old_str.length());

int new_str_len = int(new_str.length());

int pos = 0;

while((pos=int(src_str.find(old_str,pos)))!=String::npos)

{

src_str.replace(pos,old_str_len,new_str);

pos+=new_str_len;

++count;

}

return count;

}

CSVFile::CSVFile()

: mRowCount(0)

, mColumnCount(0)

, m_bUtf8( false)

{

}

CSVFile::~CSVFile()

{

mFile.close();

}

bool CSVFile::open( const char* strFilePath)

{

// 检查传入的参数

if (strFilePath == 0) return false;

printf("begin open csv file: [%s]\n", strFilePath);

// 检查文件是否存在

if ( !FileUtility::fileExist(strFilePath) )

{

// 创建可读可写文件

FILE* f = fopen(strFilePath, "w+");

fclose(f);

}

// 关闭已经打开的

if (mFile.is_open())

{

mFile.close();//关闭文件

mFile.clear();//清除状态

}

// 打开文件

mFile.open(strFilePath, std::ios_base::in | std::ios_base::out);

if (!mFile.is_open())

{

goto FAILED;

}

printf("Open csv file OK: [%s]\n", strFilePath);

mFilePath = strFilePath;

// 清空数据

mTable.clear();

// 解析文件

return parse();

FAILED:

printf("open csv file FAILED!: [%s]\n", strFilePath);

return false;

}

bool CSVFile::save( const char* strFilePath )

{

std::fstream file(strFilePath, std::ios_base::out);

if (!file.is_open()) return false;

Table::iterator iter = mTable.begin();

Table::iterator iEnd = mTable.end();

for (;iter!=iEnd; ++iter)

{

Field& field = *iter;

writeLine(file, field);

}

file.close();

printf("save as cvs file: [%s] SUCCESS!\n", strFilePath );

return true;

}

bool CSVFile::save()

{

return save(mFilePath.c_str());

}

bool CSVFile::parse()

{

int nRowCount = 0;

int nCellCount = 0;

while (!mFile.eof())

{

mTable.push_back(Field());

Field& field = mTable.back();

readLine(field);

if (field.empty()) { mTable.pop_back(); continue; }

if ((int)field.size()>nCellCount) nCellCount = (int)field.size();

++nRowCount;

if (nRowCount==1) mHead = field;

}

mRowCount = nRowCount;

mColumnCount = nCellCount;

return true;

}

void CSVFile::parseLine( const String& strLine, Field& result )

{

return parseLine(strLine.c_str(), (int)strLine.size(), result);

}

void CSVFile::parseLine( const char* strLine, int nSize, Field& result)

{

if (strLine==0) return;

result.clear();

bool bIsInWord = false;

bool bIsHaveSpace = false;

String strCurWorld;

for (int i=0; i<nSize; i++)

{

const char& ch = strLine[i];

if (ch == '\0')

{

if (i >= 1 && strLine[i-1] == CSV_COMMA)

{

strCurWorld = CSV_SPACE;

}

break;

}

bool bIsAdd = true;

switch (ch)

{

// 逗号

case CSV_COMMA:

{

if (!bIsInWord)

{

// 一项结束

result.push_back(strCurWorld);

bIsInWord = false;

bIsHaveSpace = false;

strCurWorld = "";

bIsAdd = false;

}

break;

// 双引号

case CSV_DOUBLE_QUOTE:

{

if (!bIsInWord)

{

bIsInWord = true;

bIsHaveSpace= true;

bIsAdd = false;

}

else

{

if (CSV_DOUBLE_QUOTE == strLine[i+1])

{

i++;

}

else if (bIsHaveSpace)

{

bIsInWord = false;

bIsAdd = false;

}

else

{

assert(0);

}

break;

default:

//bIsInWord = true;

break;

};

if (bIsAdd)

{

strCurWorld += ch;

}

if (!strCurWorld.empty())

{

result.push_back(strCurWorld);

}

void CSVFile::readLine( Field& field )

{

readLine(mFile, field);

}

void CSVFile::readLine( std::fstream& file, Field& field )

{

// 读取一行

std::getline(file, mLine, '\n');

if (mLine.empty()) return;

// 转换编码

if (m_bUtf8)

{

StringUtility::UTF8ToAnsi(mLine, mLine);

}

// 解析数据

parseLine(mLine.c_str(), (int)mLine.size(), field);

}

void CSVFile::writeLine( const Field& field )

{

writeLine(mFile, field);

}

void CSVFile::writeLine( std::fstream& file, const Field& field )

{

Field::const_iterator iter =field.begin();

Field::const_iterator iEnd =field.end();

for (; iter!=iEnd; ++iter)

{

String str = *iter;

detail::replace(str, "\"", "\"\"");

// 转码

if (m_bUtf8)

{

StringUtility::AnsiToUTF8(str, str);

}

// 写入内容

if (str.find_first_of(CSV_COMMA) != String::npos)

{

// 塞两个双引号

file << CSV_DOUBLE_QUOTE << str << CSV_DOUBLE_QUOTE;

}

else

{

file << str;

}

if ( (iter + 1) != iEnd) file << CSV_COMMA; // 写入逗号

}

file << std::endl;

}

int CSVFile::getFieldIndex( const char* fieldName )

{

if (fieldName==0) return -1;

Field::const_iterator iter = mHead.begin();

Field::const_iterator iEnd = mHead.end();

for (; iter!=iEnd; ++iter)

{

if (*iter == fieldName)

{

return int(iter - mHead.begin());

}

return -1;

}

bool CSVFile::deleteRow( const int row )

{

int index = row - 1;

if (index<0 || index>=(int)mTable.size()) return false;

mTable.erase(mTable.begin()+index);

return true;

}

bool CSVFile::deleteAllRow()

{

mRowCount = 0;

mColumnCount = 0;

mTable.clear();

return true;

}

bool CSVFile::isOpen()

{

return mFile.is_open();

}

void CSVFile::setUTF8( bool bUtf8 )

{

m_bUtf8 = bUtf8;

}

读取示例：

CSVFile csv;

csv.open("hello.csv");

int n = csv.Read< int>(1, 1);

std:: string str = csv.Read<std:: string>(1, 2);

写入示例：

CSVFile csv;

csv.deleteAllRow();

csv.Write< int>(1, 1, 1);

csv.Write<std:: string>(1, 2, "hello");

csv.save("hello.csv");