Question: 一个全英文文本,统计每个单词出现的次数,按次数从大到小排列,输出到文本文件中。
Data:MIT的Python教程---Python Programming : An Introduction to Computer Science,大约800K。
C++:
WinXp,VS2005开发平台,用时260ms
Linux平台,g++ 4.1.2 用时200ms
Python:
winxp平台, python 2.5 用时 402ms
效率分析:
这两处代码写的都不够优化,但大体反映了C++与python不同的执行效率.
但看到python不到40 行的代码数,优雅的语法,为什么fans喜爱它就不言自明了.
[注]转载请注明出处
本文做为python代码3,题目是一个月前(6.9)想好的,直到今天才真正的写起来,可能很多时候确是没有时间,但是一拖再拖的原因有二: 1.目标没有侧重点,究竟为什么要写这样一个系列,我本意是为google编程竞赛作些准备,但是却被眼前的事情所累,准备接项目,准备开源竞赛,准备购买基金,准备买数码相机...小事很多,我又在小事中间读些BBS,查看Email,玩玩卡丁车,2,最近心态不好,又找不到有效的舒缓渠道,很多事情做不下去...
Data:MIT的Python教程---Python Programming : An Introduction to Computer Science,大约800K。
C++:
#include <stdafx.h>
#include <windows.h>
#include <fstream>
#include <map>
#include <algorithm>
#include <vector>
#include <string>
#include <iostream>
// This class is for getting the elapsed thread time of the CPU, the unit is ms
// the usage is:
//
// CThreadTime ElapsedTime;
// ElapsedTime.BeginGetElapsedTime();
// TODO: Your performance code
// int nThreadTine = ElapsedTime.EndGetElapsedTime();
//
class CThreadTime
{
public:
void BeginGetElapsedTime();
__int64 EndGetElapsedTime();
private:
__int64 FileTimeToQuadWord(PFILETIME pft);
private:
FILETIME ftKernelTimeStart;
FILETIME ftKernelTimeEnd;
FILETIME ftUserTimeStart;
FILETIME ftUserTimeEnd;
FILETIME ftDummy;
};
// Get the time elapsed since the thread start
inline void CThreadTime::BeginGetElapsedTime()
{
GetThreadTimes(GetCurrentThread(), &ftDummy, &ftDummy, &ftKernelTimeStart, &ftUserTimeStart);
}
// Calculate the time elapsed
inline __int64 CThreadTime::EndGetElapsedTime()
{
GetThreadTimes(GetCurrentThread(), &ftDummy, &ftDummy, &ftKernelTimeEnd, &ftUserTimeEnd);
__int64 qwKernelTimeElapsed = FileTimeToQuadWord(&ftKernelTimeEnd) - FileTimeToQuadWord(&ftKernelTimeStart);
__int64 qwUserTimeElapsed = FileTimeToQuadWord(&ftUserTimeEnd) - FileTimeToQuadWord(&ftUserTimeStart);
// Get total time duration by adding the kernel and user times.
// the default is 100ns, so we convert it to ms
return (qwKernelTimeElapsed + qwUserTimeElapsed) / 10000;
}
inline __int64 CThreadTime::FileTimeToQuadWord(PFILETIME pft)
{
return (Int64ShllMod32(pft->dwHighDateTime, 32) | pft->dwLowDateTime);
}
//前面是代码运行时间测试函数,来源于网上,下面是正文
bool countcmp(const wordp &wa,const wordp &wb)
{
return wa.second > wb.second;
}
void wordscount()
{
string ifile("d://words.txt"),ofile("d://wordscount.txt");
ifstream infile(ifile.c_str());
if(infile){
wordc wordsmap;
string str;
while(infile >> str){
++wordsmap[str];
}
ofstream outfile(ofile.c_str());
wordc::iterator iter=wordsmap.begin();
vector<wordp> vv(wordsmap.begin(),wordsmap.end());
stable_sort(vv.begin(),vv.end(),countcmp);
vector<wordp> ::iterator viter=vv.begin();
while(viter != vv.end()){
outfile<<viter->first<<" "<<viter->second<<"/n";
viter++;
}
outfile.close();
}//if(infile)
infile.close();
}
int main()
{
CThreadTime ElapsedTime;
ElapsedTime.BeginGetElapsedTime();
for(int i=0;i<100;i++)
wordscount();
int nThreadTime = ElapsedTime.EndGetElapsedTime();
cout<<"have used time(millsecond): "<<nThreadTime/100<<endl;
return 0;
}
#include <windows.h>
#include <fstream>
#include <map>
#include <algorithm>
#include <vector>
#include <string>
#include <iostream>
// This class is for getting the elapsed thread time of the CPU, the unit is ms
// the usage is:
//
// CThreadTime ElapsedTime;
// ElapsedTime.BeginGetElapsedTime();
// TODO: Your performance code
// int nThreadTine = ElapsedTime.EndGetElapsedTime();
//
class CThreadTime
{
public:
void BeginGetElapsedTime();
__int64 EndGetElapsedTime();
private:
__int64 FileTimeToQuadWord(PFILETIME pft);
private:
FILETIME ftKernelTimeStart;
FILETIME ftKernelTimeEnd;
FILETIME ftUserTimeStart;
FILETIME ftUserTimeEnd;
FILETIME ftDummy;
};
// Get the time elapsed since the thread start
inline void CThreadTime::BeginGetElapsedTime()
{
GetThreadTimes(GetCurrentThread(), &ftDummy, &ftDummy, &ftKernelTimeStart, &ftUserTimeStart);
}
// Calculate the time elapsed
inline __int64 CThreadTime::EndGetElapsedTime()
{
GetThreadTimes(GetCurrentThread(), &ftDummy, &ftDummy, &ftKernelTimeEnd, &ftUserTimeEnd);
__int64 qwKernelTimeElapsed = FileTimeToQuadWord(&ftKernelTimeEnd) - FileTimeToQuadWord(&ftKernelTimeStart);
__int64 qwUserTimeElapsed = FileTimeToQuadWord(&ftUserTimeEnd) - FileTimeToQuadWord(&ftUserTimeStart);
// Get total time duration by adding the kernel and user times.
// the default is 100ns, so we convert it to ms
return (qwKernelTimeElapsed + qwUserTimeElapsed) / 10000;
}
inline __int64 CThreadTime::FileTimeToQuadWord(PFILETIME pft)
{
return (Int64ShllMod32(pft->dwHighDateTime, 32) | pft->dwLowDateTime);
}
//前面是代码运行时间测试函数,来源于网上,下面是正文
bool countcmp(const wordp &wa,const wordp &wb)
{
return wa.second > wb.second;
}
void wordscount()
{
string ifile("d://words.txt"),ofile("d://wordscount.txt");
ifstream infile(ifile.c_str());
if(infile){
wordc wordsmap;
string str;
while(infile >> str){
++wordsmap[str];
}
ofstream outfile(ofile.c_str());
wordc::iterator iter=wordsmap.begin();
vector<wordp> vv(wordsmap.begin(),wordsmap.end());
stable_sort(vv.begin(),vv.end(),countcmp);
vector<wordp> ::iterator viter=vv.begin();
while(viter != vv.end()){
outfile<<viter->first<<" "<<viter->second<<"/n";
viter++;
}
outfile.close();
}//if(infile)
infile.close();
}
int main()
{
CThreadTime ElapsedTime;
ElapsedTime.BeginGetElapsedTime();
for(int i=0;i<100;i++)
wordscount();
int nThreadTime = ElapsedTime.EndGetElapsedTime();
cout<<"have used time(millsecond): "<<nThreadTime/100<<endl;
return 0;
}
#include
<
fstream
>
#include < map >
#include < algorithm >
#include < vector >
#include < string >
#include < iostream >
#include < time.h >
using namespace std;
typedef map < string , int > wordc;
typedef pair < string , int > wordp;
bool countcmp( const wordp & wa, const wordp & wb)
{
return wa.second > wb.second;
}
void wordscount()
{
string ifile("./words"),ofile("./wordscount");
ifstream infile(ifile.c_str());
if(infile){
wordc wordsmap;
string str;
while(infile >> str){
++wordsmap[str];
}
ofstream outfile(ofile.c_str());
wordc::iterator iter=wordsmap.begin();
vector<wordp> vv(wordsmap.begin(),wordsmap.end());
stable_sort(vv.begin(),vv.end(),countcmp);
vector<wordp> ::iterator viter=vv.begin();
while(viter != vv.end()){
outfile<<viter->first<<" "<<viter->second<<" ";
viter++;
}
outfile.close();
}//if(infile)
infile.close();
}
int main()
{
long st=clock(),end(0);
for(int i=0;i<100;i++)
wordscount();
end=clock();
cout<<"the time is : "<<difftime(end,st)/100000<<endl;
return 0;
}
#include < map >
#include < algorithm >
#include < vector >
#include < string >
#include < iostream >
#include < time.h >
using namespace std;
typedef map < string , int > wordc;
typedef pair < string , int > wordp;
bool countcmp( const wordp & wa, const wordp & wb)
{
return wa.second > wb.second;
}
void wordscount()
{
string ifile("./words"),ofile("./wordscount");
ifstream infile(ifile.c_str());
if(infile){
wordc wordsmap;
string str;
while(infile >> str){
++wordsmap[str];
}
ofstream outfile(ofile.c_str());
wordc::iterator iter=wordsmap.begin();
vector<wordp> vv(wordsmap.begin(),wordsmap.end());
stable_sort(vv.begin(),vv.end(),countcmp);
vector<wordp> ::iterator viter=vv.begin();
while(viter != vv.end()){
outfile<<viter->first<<" "<<viter->second<<" ";
viter++;
}
outfile.close();
}//if(infile)
infile.close();
}
int main()
{
long st=clock(),end(0);
for(int i=0;i<100;i++)
wordscount();
end=clock();
cout<<"the time is : "<<difftime(end,st)/100000<<endl;
return 0;
}
Python:
import
timeit
import string
def compareitems((w1,c1),(w2,c2)):
if c1 > c2:
return - 1
elif c1 == c2:
return cmp(w1,w2)
else :
return 1
def main():
wordf = open( " d:/words.txt " , " r " )
wordc = open( " d:/wordscount.txt " , " w " )
worddict = {}
# for line in wordf.lines():
# lst=[]
# lst=string.split(line)
# for word in lst:
# if worddict.has_key(word):
# worddict[word] += 1
# else:
# worddict[word] = 1
lines = wordf.read()
wlst = string.split(lines)
for word in wlst:
try :
worddict[word] += 1
pass
except KeyError:
worddict[word] = 1
wlst = []
wlst = worddict.items()
wlst.sort(compareitems)
lines = ""
for i in range(len(wlst)):
lines += " %s %d " % wlst[i]
wordc.write(lines)
wordc.close()
wordf.close()
if __name__ == " __main__ " :
t = timeit.Timer( " wordcount.main() " , " import wordcount " )
print t.repeat( 3 , 100 )
# t.timeit()
import string
def compareitems((w1,c1),(w2,c2)):
if c1 > c2:
return - 1
elif c1 == c2:
return cmp(w1,w2)
else :
return 1
def main():
wordf = open( " d:/words.txt " , " r " )
wordc = open( " d:/wordscount.txt " , " w " )
worddict = {}
# for line in wordf.lines():
# lst=[]
# lst=string.split(line)
# for word in lst:
# if worddict.has_key(word):
# worddict[word] += 1
# else:
# worddict[word] = 1
lines = wordf.read()
wlst = string.split(lines)
for word in wlst:
try :
worddict[word] += 1
pass
except KeyError:
worddict[word] = 1
wlst = []
wlst = worddict.items()
wlst.sort(compareitems)
lines = ""
for i in range(len(wlst)):
lines += " %s %d " % wlst[i]
wordc.write(lines)
wordc.close()
wordf.close()
if __name__ == " __main__ " :
t = timeit.Timer( " wordcount.main() " , " import wordcount " )
print t.repeat( 3 , 100 )
# t.timeit()
效率分析:
这两处代码写的都不够优化,但大体反映了C++与python不同的执行效率.
但看到python不到40 行的代码数,优雅的语法,为什么fans喜爱它就不言自明了.
[注]转载请注明出处
本文做为python代码3,题目是一个月前(6.9)想好的,直到今天才真正的写起来,可能很多时候确是没有时间,但是一拖再拖的原因有二: 1.目标没有侧重点,究竟为什么要写这样一个系列,我本意是为google编程竞赛作些准备,但是却被眼前的事情所累,准备接项目,准备开源竞赛,准备购买基金,准备买数码相机...小事很多,我又在小事中间读些BBS,查看Email,玩玩卡丁车,2,最近心态不好,又找不到有效的舒缓渠道,很多事情做不下去...