高效地读取&解析文件(fread)

自己写的一个读取资源列表的源码,思想来自lua的单词分割解析模块,但是没有像lua一样通过buf来储存每个WORD,采用0x00字符串结束符来分割各个WORD,有点类似投机取巧的感觉,不是绝对安全的方法,所以并不推荐这样写,只是希望能汲取里面解析文件的思路,^_^

/*****************************   
*        ResManager.h        *   
*****************************/
#pragma once
  
struct img_member
{
    wchar_t *name;
    wchar_t *path;
    bool mask;
};
  
struct aimg_member
{
    wchar_t *name;
    wchar_t *path;
    int hnum;
    int vnum;
};
  
struct music_member
{
    char *name;
    char *path;
};
  
class ResManagerClass
{
public:
    std::list<img_member*> imglist;
    std::list<aimg_member*> aimglist;
    std::list<music_member*> musiclist;
  
    bool LoadResArray(const char *resfile);
  
private:
    FILE *f;
  
    char *buf;
    unsigned long filelen;
};
  
extern ResManagerClass ResManager;                                                                                                         
                                                                                                  
                                                  
/*****************************   
*       ResManager.cpp       *   
*****************************/
#include "StdAfx.h"
#include "resmanager.h"
using namespace std;
  
//每种格式的个数
#define MEMBER_NUM_IMG 3
#define MEMBER_NUM_AIMG 4
#define MEMBER_NUM_MUSIC 2
enum ResStyle {img,aimg,music,none};
  
ResManagerClass ResManager;
  
  
bool ResManagerClass::LoadResArray(const char *resfile)
{
    if(fopen_s(&f,resfile,"rb"))
        return false;
  
    fseek(f,0,2); filelen = ftell(f); fseek(f,0,0);
    buf = new char[filelen];
    fread(buf,filelen,1,f);
  
    //初始化各种mark
    bool in_note = false, in_string = false;
    int string_strat_pos = -1, mn_img = 0, mn_aimg = 0, mn_music = 0;
    char *string = 0;
    ResStyle style_entry = none;
    char *imgmember_str[MEMBER_NUM_IMG], *aimgmember_str[MEMBER_NUM_AIMG], *musicmember_str[MEMBER_NUM_MUSIC];
  
    for(unsigned long p=0;p<filelen;p++)
    {
        switch(buf[p])
        {
        case ' ': case '\f': case '\t': case '\v':      //跳过空格制表符
        case '\n': case '\r':                           //跳过换行
            if(in_note) break;
            if(in_string)
            {
                buf[p] = 0;
  
                int slen = p-string_strat_pos + 1;
                string = new char[slen];
                strcpy_s(string,slen,buf+string_strat_pos);
  
                string_strat_pos = -1;
                in_string = false;
            }
            break;
  
        //注释
        case '/':
            if(buf[p+1]=='*') {p++; in_note = true;}
            break;
        case '*':
            if(buf[p+1]=='/')
                if(in_note) {p++; in_note = false;};
            break;
  
        //类型入口
        case ':':
            if(buf[p+1]=='i'&&buf[p+2]=='m'&&buf[p+3]=='g') {style_entry=img;p+=3;}
            else if(buf[p+1]=='a'&&buf[p+2]=='i'&&buf[p+3]=='m'&&buf[p+4]=='g') {
                style_entry=aimg;p+=4;}
            else if(buf[p+1]=='m'&&buf[p+2]=='u'&&buf[p+3]=='s'&&buf[p+4]=='i'&&buf[p+5]=='c') {
                style_entry=music;p+=5;}
            else style_entry=none;
            break;
  
  
        //String
        default:
            if(in_note) break;
  
            if(!in_string)
            {
                string_strat_pos = p;
                in_string = true;
            }
            else
                //到达文件尾
                if(p==filelen-1)
                {
                    int slen = p-string_strat_pos + 2;
                    string = new char[slen];
                    for(int i=0;i<slen+1;i++)
                        string[i] = buf[string_strat_pos++];
                    string[slen-1]=0;
                }
  
            break;
        }
  
        //接收到string的话
        if(string)
        {
            //确定资源类型
            switch(style_entry)
            {
            case img:
                //将获得的string放入resarray_member
                if(mn_img < MEMBER_NUM_IMG)
                {
                    imgmember_str[mn_img] = string;
                    string = 0;
                }
                //string达到数量时,放入一个pic_member中,并push list
                if(++mn_img == MEMBER_NUM_IMG)
                {
                    img_member *im = new img_member;
                    im->name = ctow(imgmember_str[0]);
                    im->path = ctow(imgmember_str[1]);
                    im->mask = !strcmp("yes",imgmember_str[2]);
  
                    imglist.push_back(im);
  
                    delete[] imgmember_str[0];
                    delete[] imgmember_str[1];
                    delete[] imgmember_str[2];
  
                    mn_img=0;
                }
                break;
            case aimg:
                if(mn_aimg < MEMBER_NUM_AIMG)
                {
                    aimgmember_str[mn_aimg] = string;
                    string = 0;
                }
                if(++mn_aimg == MEMBER_NUM_AIMG)
                {
                    aimg_member *aim = new aimg_member;
                    aim->name = ctow(aimgmember_str[0]);
                    aim->path = ctow(aimgmember_str[1]);
                    aim->hnum= atoi(aimgmember_str[2]);
                    aim->vnum= atoi(aimgmember_str[3]);
  
                    aimglist.push_back(aim);
  
                    delete[] aimgmember_str[0];
                    delete[] aimgmember_str[1];
                    delete[] aimgmember_str[2];
                    delete[] aimgmember_str[3];
  
                    mn_aimg=0;
                }
                break;
            case music:
                if(mn_music < MEMBER_NUM_MUSIC)
                {
                    musicmember_str[mn_music] = string;
                    string = 0;
                }
                if(++mn_music == MEMBER_NUM_IMG)
                {
                    music_member *micm = new music_member;
                    micm->name = musicmember_str[0];
                    micm->path = musicmember_str[1];
  
                    musiclist.push_back(micm);
  
                    mn_music=0;
                }
                break;
            }
        }
    }
  
    return true;
}
这里要说的主要是提出一个意见,在需要高效解析文件时应该使用c语言风格的fread,而不应该使用iostream或者fstream的格式化输入,因为那样会消耗将近几倍的时间,但是在数据量比较小的情况下fstream是不错的选择,省去了自己写解析分割WORD的实现,所以在面对需要从文件读取信息时需要谨慎考虑应该选用的方法.


各种输入函数在不同平台下读取一千万个随机数所用的时间

方法/平台/时间(秒)Linux gccWindows mingwWindows VC2008
scanf2.0103.7043.425
cin6.38064.00319.208
cin取消同步2.0506.00419.616
fread0.2900.2410.304
read0.2900.398不支持
mmap0.250不支持不支持
Pascal read2.1604.668 

来自:http://www.byvoid.com/blog/fast-readfile/

转载于:https://my.oschina.net/gal/blog/200210

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值