C++性能优化之时间效率优化

全文以SSD6的Exercise4为例。

任务目的,优化程序的时间效率。

/* substitute -- substitute strings in a list of files

  This program operates on a set of files listed on
  the command line. The first file specifies a list of
  string substitutions to be performed on the remaining
  files. The list of string substitutions has the form:

  "string 1" "replacement 1"
  "string 2" "replacement 2"
  ...

  If a string contains a double quote character or
  a backslash character, escape the character with
  backslash: "\"" denotes the string with one double
  quote character. "\\" contains one backslash.
  Each file is searched for instances of "string 1".
  Any occurences are replaced with "replacement 1".
  In a similar manner, all "string 2"s are replaced
  with "replacement 2"s, and so on.

  The results are written to the input file. Be sure
  to keep a backup of files if you do not want to lose
  the originals when you run this program.
*/

#include "afx.h"
#include "iostream"
using namespace std;
// parse a quoted string from buffer
// return final index in string
int parse1(CString* buffer, int start, CString* str) {
    // look for initial quote:
    int i = buffer->Find('\"', start);
    if (i != -1) {
        // copy to result string
        str->Empty();
        int j = 0; // index into str
        i++;       // skip over the opening double-quote
        // scan and copy up to the closing double-quote:
        while ((*buffer)[i] != 0) {
            if ((*buffer)[i] == '\\') {
                // read next char to see what to do
                i++;
                if ((*buffer)[i] != 0) {
                    str->Insert(j++, CString((*buffer)[i]));
                }
            } else if ((*buffer)[i] == '\"') {
                return i + 1;
            }
            str->Insert(j++, CString((*buffer)[i]));
            i++;
        }
    }
    return -1;
}

// parse two quoted strings from buffer; return false on failure
//
bool parse(CString* buffer, CString* pattern, CString* replacement) {
    int start = parse1(buffer, 0, pattern);
    if (start < 0) {
        return false;
    }
    start = parse1(buffer, start, replacement);
    return (start >= 0);
}

void substitute(CString* data, CString* pattern, CString* replacement) {
    int loc;
    // find every occurrence of pattern:
    for (loc = data->Find(*pattern, 0); loc >= 0; loc = data->Find(*pattern, 0)) {
        // delete the pattern string from loc:
        data->Delete(loc, pattern->GetLength());
        // insert each character of the replacement string:
        for (int i = 0; i < replacement->GetLength(); i++) {
            data->Insert(loc + i, (*replacement)[i]);
        }
    }
}

void do_substitutions(CString* data, CString* subs_filename) {
    TRY {
        CStdioFile file(*subs_filename, CFile::modeRead);
        while (true) {
            CString buffer; // holds line from file
            CString pattern;
            CString replacement;
            file.ReadString(buffer);
            // handle end of file
            if (buffer.GetLength() == 0)
                break;
            if (parse(&buffer, &pattern, &replacement)) {
                substitute(data, &pattern, &replacement);
            } else {
                cout << "Bad pattern/replacement line: " << buffer << endl;
                return;
            }
        }
    }
    CATCH(CFileException, e) { cout << "File could not be opened or read " << e->m_cause << endl; }
    END_CATCH
}

void process_file(CString* filename, CString* subs_filename) {
    // read in filename to a CString
    TRY {
        CFile file(*filename, CFile::modeRead);
        int   size = file.GetLength();
        // read the data, allocate more than we need
        char* data = new char[size + 16];
        file.Read(data, size);
        // files are not zero-terminated but string should be:
        data[size] = 0;
        // now we can make a CString from the data:
        CString content(data);
        delete data; // data is no longer needed
        do_substitutions(&content, subs_filename);
        // write the data
        file.Close();
        file.Open(*filename, CFile::modeWrite);
        file.Write(content, content.GetLength());
        file.SetLength(content.GetLength());
        file.Close();
    }
    CATCH(CFileException, e) { cout << "File could not be opened or read " << e->m_cause << " " << *filename << endl; }
    END_CATCH
}

int main(int argc, char* argv[]) {
    if (argc < 3) {
        cout << "Not enough input arguments" << endl;
        cout << "Usage: substitute subs-file src1 src2 ..." << endl;
    } else {
        CString subs_filename(argv[1]);
        for (int i = 2; i < argc; i++) {
            CString filename(argv[i]);
            process_file(&filename, &subs_filename);
        }
    }
    return 0;
}

程序做的事简单来说就是从replace.txt里获得模式串和替换串,然后遍历其他文件进行字符串替换。

性能优化任务,首先我们要进行性能检测。

性能检测的方式有很多,我选择VS自带的性能探测器,

 

分别用检测选项和CPU使用率选项,对程序检测的结果如下图。

 可以看到,最占时间的是substirute里的for循环。

这里做的事是遍历文件找匹配串,我们看for循环的条件,每一轮的loc的值,是调用find函数拿到的,而find函数的第二个参数是起始位置,所以显而易见我们不需要每一轮都从文件开头开始find,所以这里需要这样改。

void substitute(CString* data, CString* pattern, CString* replacement) {
    int loc;
    // find every occurrence of pattern:
    for (loc = data->Find(*pattern, 0); loc >= 0; loc = data->Find(*pattern, loc)) {
        // delete the pattern string from loc:
        data->Delete(loc, pattern->GetLength());
        // insert each character of the replacement string:
        data->Insert(loc, *replacement);
    }
}

同时insert这里不需要一个字符一个字符插入,因为插入一个字符和一个字符串的效率是差不多的

这里改完之后,程序执行时间由0.28降到0.25。

继续通过CPU占用率查看,发现IO操作占了很大一块时间,在这里也就是读取文件占时间很多,是一个可以优化的方向。

首先我们看程序的整体逻辑是遍历五个文件,然后每次遍历都要去从replace.txt里读字符串,这要执行5次不必要的IO操作,所以我们可以把这段重新写一下,改成先读replace.txt,然后把模式串和替换串都存在数组里,需要的时候直接操作数组就好,减少IO的时间。

void myIO(CStdioFile* fileSub, CString patterns[], CString replacements[]) {
    CString buffer;

    int i = 0;
    while (true) {
        fileSub->ReadString(buffer);
        // handle end of file
        if (buffer.GetLength() == 0)
            break;
        if(parse(&buffer, &patterns[i], &replacements[i])){

    }else {
            cout << "Bad pattern/replacement line: " << buffer << endl;
            return;
    }
        i++;
    }
}


int main(int argc, char* argv[]) {
    if (argc < 3) {
        cout << "Not enough input arguments" << endl;
        cout << "Usage: substitute subs-file src1 src2 ..." << endl;
    } else {
        CString    subs_filename(argv[1]);
        CStdioFile fileSub(subs_filename, CFile::modeRead);
        CString    patterns[20];
        CString    replacements[20];
        myIO(&fileSub, patterns, replacements);
        for (int i = 2; i < argc; i++) {
            CString filename(argv[i]);
            process_file(&filename, &fileSub, patterns, replacements);
        }
    }

    return 0;
}

这里优化完之后执行时间由0.25减少到0.22.

接着查看CPU占用率,发现delete占比还是很高,于是我试着用replace方法代替,结果发现效率真的变高了。

void do_substitutions(CString* data, CStdioFile* fileSub, CString patterns[], CString replacements[]) {
    TRY {
        int i = 0;  
        while (true) {
            // fileSub->ReadString(buffer);
            // handle end of file
            if (patterns[i].GetLength() == 0)
                break;
            //substitute(data, &patterns[i], &replacements[i]);
            data->Replace(patterns[i], replacements[i]);
            i++;
        }
    }
    CATCH(CFileException, e) { cout << "File could not be opened or read " << e->m_cause << endl; }
    END_CATCH
}

可能是replace的实现比较优秀吧,并且减少了一次函数调用。

然后接着改了几处getLength重复调用的情况。

最后一处优化点在读文件的地方。

5个文件每次都是读一遍关闭再打开再写入,我们可以让他只打开一次。

        CFile file(*filename, CFile::modeReadWrite);
        int   size = file.GetLength();
        // read the data, allocate more than we need
        char* data = new char[size + 16];
        file.Read(data, size);
        // files are not zero-terminated but string should be:
        data[size] = 0;
        // now we can make a CString from the data:
        CString content(data);
        delete data; // data is no longer needed
        do_substitutions(&content, fileSub, patterns, replacements);
        // write the data
        file.SeekToBegin();
        int32_t len = content.GetLength();
        file.Write(content, len);
        file.SetLength(len);
        file.Close();

将CFile的model改成readandwrite,然后在读完文件之后将文件指针放到文件开头,接着写入就可以了。


总结

优化的点:

  • 遍历字符串尽量保证不重复。
  • 尽量处理整个字符串,避免一个字符一个字符读取或写入。
  • 减少I/O操作,能一次处理完尽量一次处理完。
  • 读写文件只打开一次即可。
  • 2
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值