#include <unistd.h>
#include <sys/types.h>
#include <sys/time.h>
#include "rapidjson/document.h"
#include "rapidjson/stringbuffer.h"
#include "rapidjson/writer.h"
#define BUFFER_SIZE 1024*1024
#define N 102400
using namespace std;
/**
* {"Int":1,
* "Double":12.0000001,
* "String":"This is a string",
* "Object":{"name":"qq849635649","age":25},
* "IntArray":[10,20,30],
* "DoubleArray":[1.0,2.0,3.0],
* "StringArray":["one","two","three"],
* "MixedArray": ["one",50,false,12.005],
* "People":[{"name":"qq849635649","age":0,"sex":true},
* {"name":"qq849635649","age":10,"sex":false},
* {"name":"qq849635649","age":20,"sex":true}]}
*/
string data =
"{\"Int\":1,"
"\"Double\":12.0000001,"
"\"String\":\"This is a string\","
"\"Object\":{\"name\":\"qq849635649\",\"age\":25},"
"\"IntArray\":[10,20,30],"
"\"DoubleArray\":[1.0,2.0,3.0],"
"\"StringArray\":[\"one\",\"two\",\"three\"],"
"\"MixedArray\":[\"one\",50,false,12.005],"
"\"People\":[{\"name\":\"qq849635649\",\"age\":0,\"sex\":true},"
"{\"name\":\"qq849635649\",\"age\":10,\"sex\":false},"
"{\"name\":\"qq849635649\",\"age\":20,\"sex\":true}]}";
void parse() {
/*//如果读文件
std::ifstream fin;
fin.open("target_column_config");
std::string str = "";
std::string str_in="";
while(getline(fin,str)) //一行一行地读到字符串str_in中
{
str_in=str_in+str+'\n';
}
rapidjson::Document doc;
//首先进行解析,没有解析错误再进行具体字段解析
if(!doc.Parse((str_in.c_str())).HasParseError()) {
if(doc.HasMember("target_column")&&doc["target_column"].IsString()) {
std::cout<<doc["target_column"].GetString()<<std::endl;
}
}*/
//创建解析对象
rapidjson::Document doc;
//首先进行解析,没有解析错误才能进行具体字段的解析
if(!doc.Parse(data.data()).HasParseError())
{
//1. 解析整数
if(doc.HasMember("Int") && doc["Int"].IsInt())
{
cout << "Int = " << doc["Int"].GetInt() << endl;
}
//2. 解析浮点型
if(doc.HasMember("Double") && doc["Double"].IsDouble())
{
cout << "Double = " << doc["Double"].GetDouble() << endl;
}
//3. 解析字符串
if(doc.HasMember("String") && doc["String"].IsString())
{
cout << "String = " << doc["String"].GetString() << endl;
}
//4. 解析结构体
if(doc.HasMember("Object") && doc["Object"].IsObject())
{
const rapidjson::Value& object = doc["Object"];
if(object.HasMember("name") && object["name"].IsString())
{
cout << "Object.name = " << object["name"].GetString() << endl;
}
if(object.HasMember("age") && object["age"].IsInt())
{
cout << "Object.age = " << object["age"].GetInt() << endl;
}
}
//5. 解析数组类型
//5.1 整型数组类型
if(doc.HasMember("IntArray") && doc["IntArray"].IsArray())
{
//5.1.1 将字段转换成为rapidjson::Value类型
const rapidjson::Value& array = doc["IntArray"];
//5.1.2 获取数组长度
size_t len = array.Size();
//5.1.3 根据下标遍历,注意将元素转换为相应类型,即需要调用GetInt()
for(size_t i = 0; i < len; i++)
{
cout << "IntArray[" << i << "] = " << array[i].GetInt() << endl;
}
}
//5.2 浮点型数组类型
if(doc.HasMember("DoubleArray") && doc["DoubleArray"].IsArray())
{
const rapidjson::Value& array = doc["DoubleArray"];
size_t len = array.Size();
for(size_t i = 0; i < len; i++)
{
//为防止类型不匹配,一般会添加类型校验
if(array[i].IsDouble())
{
cout << "DoubleArray[" << i << "] = " << array[i].GetDouble() << endl;
}
}
}
//5.3 字符串数组类型
if(doc.HasMember("StringArray") && doc["StringArray"].IsArray())
{
const rapidjson::Value& array = doc["StringArray"];
size_t len = array.Size();
for(size_t i = 0; i < len; i++)
{
//为防止类型不匹配,一般会添加类型校验
if(array[i].IsString())
{
cout << "StringArray[" << i << "] = " << array[i].GetString() << endl;
}
}
}
//5.4 混合型
if(doc.HasMember("MixedArray") && doc["MixedArray"].IsArray())
{
const rapidjson::Value& array = doc["MixedArray"];
size_t len = array.Size();
for(size_t i = 0; i < len; i++)
{
//为防止类型不匹配,一般会添加类型校验
if(array[i].IsString())
{
cout << "MixedArray[" << i << "] = " << array[i].GetString() << endl;
}
else if(array[i].IsBool())
{
cout << "MixedArray[" << i << "] = " << array[i].GetBool() << endl;
}
else if(array[i].IsInt())
{
cout << "MixedArray[" << i << "] = " << array[i].GetInt() << endl;
}
else if(array[i].IsDouble())
{
cout << "MixedArray[" << i << "] = " << array[i].GetDouble() << endl;
}
}
}
//5.5 结构体数组类型
if(doc.HasMember("People") && doc["People"].IsArray())
{
const rapidjson::Value& array = doc["People"];
size_t len = array.Size();
for(size_t i = 0; i < len; i++)
{
const rapidjson::Value& object = array[i];
//为防止类型不匹配,一般会添加类型校验
if(object.IsObject())
{
cout << "ObjectArray[" << i << "]: ";
if(object.HasMember("name") && object["name"].IsString())
{
cout << "name=" << object["name"].GetString();
}
if(object.HasMember("age") && object["age"].IsInt())
{
cout << ", age=" << object["age"].GetInt();
}
if(object.HasMember("sex") && object["sex"].IsBool())
{
cout << ", sex=" << (object["sex"].GetBool() ? "男" : "女") << endl;
}
}
}
}
}
/**
* 最后注意:因为rapidjson不会做安全校验,所以要自己做安全校验,以int整型为例
* “if(object.HasMember("age") && object["age"].IsInt()) {}”
* 这句校验很重要,既要校验有该子段,也要校验类型正确,否则会引发程序崩溃
*/
}
//遍历解析
void parse_1()
{
// 这个是用于遍历json数组,用于不知道name的前提下
string data = "{\"name\":\"qq849635649\",\"age\":20,\"sex\":true}";
rapidjson::Document dom;
if (! dom.Parse(data.data()).HasParseError())
{
for (rapidjson::Value::ConstMemberIterator iter = dom.MemberBegin(); iter != dom.MemberEnd(); ++iter)
{
string name = (iter->name).GetString();
const rapidjson::Value& value = iter->value;
if(value.IsString()) {
cout << name << " : " << value.GetString() << endl;
}
else if(value.IsInt()){
cout << name << " : " << value.GetInt() << endl;
}
else if(value.IsBool()){
cout << name << " : " << value.GetBool() << endl;
}
}
}
}
/*“raw_data_meta”:{
…
“target_column”:0,
“job_type”:”binary” //regression, binary, multy
“label_encode”:[{“raw”:”a”, “code”:0}, {“raw”:”b”, “code”:1}]
…
} */
void Serialize_1()
{
rapidjson::StringBuffer strBuf;
rapidjson::Writer<rapidjson::StringBuffer> writer(strBuf);
writer.StartObject();
//1. 整数类型
writer.Key("Int");
writer.Int(0);
//2. 浮点类型
writer.Key("Double");
writer.Double(12.0000001);
//3. 字符串类型
writer.Key("String");
writer.String("This is a string");
//4. 结构体类型
writer.Key("Object");
writer.StartObject();
writer.Key("name");
writer.String("qq849635649");
writer.Key("age");
writer.Int(25);
writer.EndObject();
//5. 数组类型
//5.1 整型数组
writer.Key("IntArray");
writer.StartArray();
//顺序写入即可
writer.Int(10);
writer.Int(20);
writer.Int(30);
writer.EndArray();
//5.2 浮点型数组
writer.Key("DoubleArray");
writer.StartArray();
for(int i = 1; i < 4; i++)
{
writer.Double(i * 1.0);
}
writer.EndArray();
//5.3 字符串数组
writer.Key("StringArray");
writer.StartArray();
writer.String("one");
writer.String("two");
writer.String("three");
writer.EndArray();
//5.4 混合型数组
//这说明了,一个json数组内容是不限制类型的
writer.Key("MixedArray");
writer.StartArray();
writer.String("one");
writer.Int(50);
writer.Bool(false);
writer.Double(12.005);
writer.EndArray();
//5.5 结构体数组
writer.Key("People");
writer.StartArray();
for(int i = 0; i < 3; i++)
{
writer.StartObject();
writer.Key("name");
writer.String("qq849635649");
writer.Key("age");
writer.Int(i * 10);
writer.Key("sex");
writer.Bool((i % 2) == 0);
writer.EndObject();
}
writer.EndArray();
writer.EndObject();
string data = strBuf.GetString();
cout << data << endl;
}
void parse_2(std::string& data) {
//创建解析对象
rapidjson::Document doc;
//首先进行解析,没有解析错误才能进行具体字段的解析
if(!doc.Parse(data.data()).HasParseError())
{
cout << "parse doc OK !!!" << endl;
//1. 解析整数
if(doc.HasMember("target_column") && doc["target_column"].IsInt())
{
cout << "Int = " << doc["target_column"].GetInt() << endl;
}
//3. 解析字符串
if(doc.HasMember("job_type") && doc["job_type"].IsString())
{
cout << "String = " << doc["job_type"].GetString() << endl;
}
//5.5 结构体数组类型
if(doc.HasMember("label_encode") && doc["label_encode"].IsArray())
{
const rapidjson::Value& array = doc["label_encode"];
size_t len = array.Size();
for(size_t i = 0; i < len; i++)
{
const rapidjson::Value& object = array[i];
//为防止类型不匹配,一般会添加类型校验
if(object.IsObject())
{
//cout << "ObjectArray[" << i << "]: ";
if(object.HasMember("raw") && object["raw"].IsString())
{
cout << "raw=" << object["raw"].GetString();
}
if(object.HasMember("code") && object["code"].IsInt())
{
cout << ", code=" << object["code"].GetInt();
}
}
}
}
}
/**
* 最后注意:因为rapidjson不会做安全校验,所以要自己做安全校验,以int整型为例
* “if(object.HasMember("age") && object["age"].IsInt()) {}”
* 这句校验很重要,既要校验有该子段,也要校验类型正确,否则会引发程序崩溃
*/
}
void Serialize_2(int code)
{
rapidjson::StringBuffer strBuf;
rapidjson::Writer<rapidjson::StringBuffer> writer(strBuf);
//writer.Key("raw_data_meta");
//writer.StartArray();
writer.StartObject();
writer.Key("raw_data_meta");
writer.StartObject();
writer.Key("target_column");
writer.Int(code);
writer.Key("job_type");
writer.String("binary");
writer.EndObject();
writer.EndObject();
//writer.EndArray();
/* writer.StartArray();
writer.Key("label_encode");
writer.StartObject();
writer.Key("raw");
writer.String("a");
writer.Key("code");
writer.Int(0);
writer.EndObject();
writer.StartObject();
writer.Key("raw");
writer.String("b");
writer.Key("code");
writer.Int(1);
writer.EndObject();
writer.EndArray();
writer.EndObject(); */
// writer.StartObject();
// writer.Key("job_type");
//std::string str = "binary";
// writer.String("binary");
// writer.EndObject();
// writer.EndArray();
//5.5 结构体数组
/* writer.Key("label_encode");
writer.StartArray();
writer.StartObject();
writer.Key("raw");
writer.String("a");
writer.Key("code");
writer.Int(0);
writer.EndObject();
writer.StartObject();
writer.Key("raw");
writer.String("b");
writer.Key("code");
writer.Int(1);
writer.EndObject();
writer.EndArray(); */
//writer.EndArray();
//1. 整数类型
/* writer.Key("target_column");
writer.Int(code);
//2. 浮点类型
writer.Key("job_type");
//std::string str = "binary";
writer.String("binary");
//5.5 结构体数组
writer.Key("label_encode");
writer.StartArray();
writer.StartObject();
writer.Key("raw");
writer.String("a");
writer.Key("code");
writer.Int(0);
writer.EndObject();
writer.StartObject();
writer.Key("raw");
writer.String("b");
writer.Key("code");
writer.Int(1);
writer.EndObject();
writer.EndArray();
writer.EndObject();
writer.EndObject(); */
string data = strBuf.GetString();
cout << data << endl;
//将数据保存起来
/* ofstream ofs_raw_data_meta_file("meta.txt");
if (!ofs_raw_data_meta_file) {
std::cout << "read file failed!!!" << std::endl;
return;
}
ofs_raw_data_meta_file << data;
ofs_raw_data_meta_file.close();
string tmp_str = "";
ifstream ifs_raw_data_meta_file("meta.txt");
if (!ifs_raw_data_meta_file) {
std::cout << "read file failed!!!" << std::endl;
return;
}
std::string line;
while(getline(ifs_raw_data_meta_file, line))
{
tmp_str.append(Trim(line));
}
ifs_raw_data_meta_file.close();
//加载数据
rapidjson::Document doc1;
rapidjson::Document doc2;
//首先进行解析,没有解析错误才能进行具体字段的解析
if(!doc1.Parse(tmp_str.data()).HasParseError())
{
cout << "parse error!!!" << endl;
}
//首先进行解析,没有解析错误才能进行具体字段的解析
if(!doc2.Parse(tmp_str.data()).HasParseError())
{
cout << "parse error!!!" << endl;
} */
/* parse_2(data);
data.clear(); */
}
void Serialize_3()
{
rapidjson::StringBuffer strBuf;
rapidjson::Writer<rapidjson::StringBuffer> writer(strBuf);
writer.StartObject();
/* "separator" : 44 , //ASCII码
"column_cout":15,
"valid_data_line" : 10000,
"total_data_line" : 11000,
"valid_feature_cout": 10000000, */
//1. 整数类型
writer.Key("separator");
writer.Int(44);
writer.Key("column_cout");
writer.Int(15);
writer.Key("valid_data_line");
writer.Int(10000);
writer.Key("total_data_line");
writer.Int(11000);
writer.Key("valid_feature_cout");
writer.Int(10000000);
//5.5 结构体数组
writer.Key("feature_and_label");
//writer.StartObject();
writer.StartArray();
//特征1
writer.StartObject();
writer.Key("feature_name");
writer.String("column_1");
writer.Key("column_index");
writer.Int(1);
writer.Key("type");
writer.Int(0);
writer.Key("count");
writer.Int(1);
writer.Key("value_top_frequence");
writer.StartArray();
writer.StartObject();
writer.Key("value");
writer.String("1");
writer.Key("frequence");
writer.String("1000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("2");
writer.Key("frequence");
writer.String("2000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("3");
writer.Key("frequence");
writer.String("3000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("4");
writer.Key("frequence");
writer.String("4000");
writer.EndObject();
writer.EndArray();
writer.EndObject();
//特征2
writer.StartObject();
writer.Key("feature_name");
writer.String("column_2");
writer.Key("column_index");
writer.Int(2);
writer.Key("type");
writer.Int(1);
writer.Key("count");
writer.Int(2);
writer.Key("value_top_frequence");
writer.StartArray();
writer.StartObject();
writer.Key("value");
writer.String("0");
writer.Key("frequence");
writer.String("5000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("1");
writer.Key("frequence");
writer.String("6000");
writer.EndObject();
/* writer.StartObject();
writer.Key("value");
writer.String("7");
writer.Key("frequence");
writer.String("7000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("8");
writer.Key("frequence");
writer.String("8000");
writer.EndObject(); */
writer.EndArray();
writer.EndObject();
//特征3
writer.StartObject();
writer.Key("feature_name");
writer.String("column_3");
writer.Key("column_index");
writer.Int(3);
writer.Key("type");
writer.Int(0);
writer.Key("count");
writer.Int(3);
writer.Key("value_top_frequence");
writer.StartArray();
writer.StartObject();
writer.Key("value");
writer.String("9");
writer.Key("frequence");
writer.String("9000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("10");
writer.Key("frequence");
writer.String("10000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("11");
writer.Key("frequence");
writer.String("11000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("12");
writer.Key("frequence");
writer.String("12000");
writer.EndObject();
writer.EndArray();
writer.EndObject();
//特征4
writer.StartObject();
writer.Key("feature_name");
writer.String("column_4");
writer.Key("column_index");
writer.Int(4);
writer.Key("type");
writer.Int(0);
writer.Key("count");
writer.Int(4);
writer.Key("value_top_frequence");
writer.StartArray();
writer.StartObject();
writer.Key("value");
writer.String("13");
writer.Key("frequence");
writer.String("13000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("14");
writer.Key("frequence");
writer.String("14000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("15");
writer.Key("frequence");
writer.String("15000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("16");
writer.Key("frequence");
writer.String("16000");
writer.EndObject();
writer.EndArray();
writer.EndObject();
//特征2
/* writer.Key("feature_name");
writer.String("column_2");
writer.Key("column_index");
writer.Int(2);
writer.Key("type");
writer.Int(1);
writer.Key("count");
writer.Int(2000);
writer.Key("value_top_frequence");
writer.StartArray();
writer.StartObject();
writer.Key("value");
writer.String("5");
writer.Key("frequence");
writer.String("5000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("6");
writer.Key("frequence");
writer.String("6000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("7");
writer.Key("frequence");
writer.String("7000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("8");
writer.Key("frequence");
writer.String("8000");
writer.EndObject();
writer.EndArray();
//特征3
writer.Key("feature_name");
writer.String("column_3");
writer.Key("column_index");
writer.Int(3);
writer.Key("type");
writer.Int(0);
writer.Key("count");
writer.Int(3000);
writer.Key("value_top_frequence");
writer.StartArray();
writer.StartObject();
writer.Key("value");
writer.String("9");
writer.Key("frequence");
writer.String("9000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("10");
writer.Key("frequence");
writer.String("10000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("11");
writer.Key("frequence");
writer.String("11000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("12");
writer.Key("frequence");
writer.String("12000");
writer.EndObject();
writer.EndArray();
//特征4
writer.Key("feature_name");
writer.String("column_4");
writer.Key("column_index");
writer.Int(4);
writer.Key("type");
writer.Int(0);
writer.Key("count");
writer.Int(4000);
writer.Key("value_top_frequence");
writer.StartArray();
writer.StartObject();
writer.Key("value");
writer.String("13");
writer.Key("frequence");
writer.String("13000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("14");
writer.Key("frequence");
writer.String("14000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("15");
writer.Key("frequence");
writer.String("15000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("16");
writer.Key("frequence");
writer.String("16000");
writer.EndObject();
writer.EndArray(); */
writer.EndArray();
writer.EndObject();
/* writer.StartObject();
writer.Key("feature_name");
writer.String("column_1");
writer.Key("column_index");
writer.Int(0);
writer.Key("type");
writer.Int(1);
writer.Key("count");
writer.Int(30);
writer.StartArray();
writer.Key("value");
writer.String("1");
writer.Key("frequence");
writer.String("1000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("2");
writer.Key("frequence");
writer.String("2000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("3");
writer.Key("frequence");
writer.String("3000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("4");
writer.Key("frequence");
writer.String("4000");
writer.EndArray();
writer.EndOject();
writer.StartObject();
writer.Key("feature_name");
writer.String("column_1");
writer.Key("column_index");
writer.Int(0);
writer.Key("type");
writer.Int(0);
writer.Key("count");
writer.Int(2);
writer.StartArray();
writer.Key("value");
writer.String("1");
writer.Key("frequence");
writer.String("1000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("2");
writer.Key("frequence");
writer.String("2000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("3");
writer.Key("frequence");
writer.String("3000");
writer.EndObject();
writer.StartObject();
writer.Key("value");
writer.String("4");
writer.Key("frequence");
writer.String("4000");
writer.EndArray();
writer.EndOject();
writer.EndObject(); */
string data = strBuf.GetString();
cout << data << endl;
string file = "test.txt";
ofstream ofs_file(file.c_str());
if (!ofs_file) {
std::cout << "read file failed!!!" << std::endl;
return;
}
ofs_file << data;
ofs_file.close();
}
int main()
{
// Serialize_1();
// Serialize_2(0);
// patse();
// parse_1();
//Document ptr_doc;
//ptr_doc.Parse("{}");
// std::string ss = "44";
// char c = atoi(ss.c_str());
// cout << c;
Serialize_3();
return 0;
}