前面布置了求取素数的期中作业,各位同学都提交了答案。有筛法的,有直接两层for循环判断的。对于梅森素数,有快速算法,不过我们这次的要求是求取并输出连续素数表,能在规定时间(24个小时)内求取的越多越好。
对筛法的连续求取程序,主要的问题是存储问题。筛法需要一个存储空间,用来存储标记。即使采用1bit这样的压缩存储,也是有上限的。尤其是学校的计算机内存只有2G。
对简单的循环判断法,主要问题是时间开销,要注意对于N,只要验证sqrt(N)之内的素数能否整除N即可,而不是采用暴力的两层for ++.
我把曹景焕同学的程序略加修改、注释发布一下。他这个程序做的还是比较完备的,利用磁盘存储,内存开销很小,且性能不错,利用了OpenMP技术,i7 6700K CPU 8核心下,计算到1G(1,000,000,000) 以内只需要190 秒,耗费内存小于20MB。
10亿以内的最大素数是999999937,是从2开始的第50847534个素数。下课前,计算几十分钟后,已突破6G,但文本文件过大,已经打不开了。
改进建议:
按照区间存储txt, 而不是放在一个大文件里面。因为这个文本文件很快会超过4GB,一般的编辑器打不开。
计算机专业有余力的同学,可考虑使用gpu+mpi进行分布式异构计算。在这种情况下,如何存储,发布和共享计算结果也同样重要了。
C++ 实现
代码:
/*学号=YJD201702032716 曹景焕 JHC2017, G++ Raspberry-PI */
#include <vector>
#include <cstdio>
#include <memory>
#include <memory.h>
#include <omp.h>
#include <mutex>
#include <set>
#include <cmath>
int main()
{
//开辟一个二进制的文件作为缓存,一个文本文件作为结果
FILE * fp = fopen("primer.dat","ab+");
FILE * fpTxt = fopen("primer.txt","a+");
if (!fp || !fpTxt)
return -1;
//要接着上次计算,读取最新的结果
fseeko64(fp,0,SEEK_END);
fseeko64(fpTxt,0,SEEK_END);
unsigned long long fsz = ftello64(fp);
unsigned long long test = 2;
//首次计算,把2写入,作为第一个素数
if (fsz<sizeof(unsigned long long))
{
fseeko64(fp,0,SEEK_SET);
fwrite(&test,sizeof(unsigned long long),1,fp);
fprintf(fpTxt,"%lld\n",test);
}
else
//否则,读取上次的进度
{
fseeko64(fp,fsz - sizeof(unsigned long long),SEEK_SET);
fread(&test,sizeof(unsigned long long),1,fp);
}
printf ("Calc primers from %lld\n",test);
//设置下一个测试的值
if (test==2)
test = 3;
else
test += 2;//只测试奇数
//OMP并行计算的初始规模。规模会随着测试数test的增大,而对数增大
const unsigned int batch = 65536*16;
//最大的内存读入历史素数个数,8MB。
const int max_buf_size = 1024*1024*8;
//获取本机的CPU计算核心个数
const int numthreads = omp_get_num_procs();
printf ("\nOMP Num proces = %d\n",numthreads);
//为并行计算分配缓存,这里是C++17标准的,请注意用GCC的版本。
std::shared_ptr<unsigned long long[][max_buf_size]> pbuffer( new unsigned long long [numthreads][max_buf_size]);
//为并行计算分配建议缓存大小
std::shared_ptr<int> suggest_buf_size_arr (new int [numthreads]);
int * suggest_buf_size_ptr = suggest_buf_size_arr.get();
memset (suggest_buf_size_ptr,0,sizeof(int)*numthreads);
//为并行计算分配当前测试位置偏移
std::shared_ptr<unsigned long long> pos_arr (new unsigned long long [numthreads]);
unsigned long long * file_pos = pos_arr.get();
for (int i=0;i<numthreads;++i)
file_pos[i] = 1;
//文件互斥
std::mutex m_f;
//初始的0~65536,采用单线程,后续的进行并行化。
int sthr_fpos = 1;
int rprimers_sthd = 0;
while(1)
{
//单线程
if (test*16<batch)
{
int & suggest_buf_size = suggest_buf_size_ptr[0];
unsigned long long * buffer = pbuffer.get()[0];
if (suggest_buf_size<1)
suggest_buf_size = 1;
if (suggest_buf_size>=max_buf_size)
suggest_buf_size = max_buf_size;
if (sthr_fpos!=0)
{
fseeko64(fp,0,SEEK_SET);
rprimers_sthd = fread(buffer,sizeof(unsigned long long),suggest_buf_size,fp);
sthr_fpos = 0;
}
bool ex_test = false;
bool test_curr_ok = true;
while (test_curr_ok && rprimers_sthd)
{
for (int i=0;i<rprimers_sthd && test_curr_ok;++i)
{
if (buffer[i] * buffer[i]>test)
{
ex_test = true;
break;
}
if (test % buffer[i]==0)
test_curr_ok = false;
}
if (ex_test)
break;
if (test_curr_ok)
{
fseeko64(fp,sthr_fpos,SEEK_SET);
rprimers_sthd = fread(buffer,sizeof(unsigned long long),suggest_buf_size,fp);
sthr_fpos = ftello64(fp);
suggest_buf_size += 32;
}
}
if (!test_curr_ok)
{
test+=2;
continue;
}
printf ("Single Thread, Primer: %lld \r", test);
fseeko64(fp,0,SEEK_END);
fwrite(&test,sizeof(unsigned long long),1,fp);
fprintf(fpTxt,"%lld\n",test);
fflush(fpTxt);
fflush(fp);
test+=2;
}
//多线程
else
{
//用于存储新增素数的集合,顺便可以按照大小顺序排序。
std::set<unsigned long long> set_results;
//计算并行规模
unsigned long long rbat = (unsigned long long)(log(test)/log(2) * (batch*1.0/(log(batch)/log(2))))/2*2;
//控制并行规模
if (rbat>=1024*1024*16)
rbat = 1024*1024*16;
if (rbat<batch)
rbat = batch;
printf ("Batch %lld ", rbat);
#pragma omp parallel for
for (unsigned long long t = test; t<test+rbat;t+=2)
{
const int omp_n = omp_get_thread_num();
int & suggest_buf_size = suggest_buf_size_ptr[omp_n];
unsigned long long * buffer = pbuffer.get()[omp_n];
if (suggest_buf_size<16)
suggest_buf_size = 16;
if (suggest_buf_size>=max_buf_size)
suggest_buf_size = max_buf_size;
int rprimers = 0;
//需要重新读取缓存。由于绝大部分合数都在第一波缓存中被淘汰,所以这个读取行为发生的不频繁。
if (file_pos[omp_n]!=0)
{
m_f.lock();
fseeko64(fp,0,SEEK_SET);
rprimers = fread(buffer,sizeof(unsigned long long),suggest_buf_size,fp);
file_pos[omp_n] = 0;
m_f.unlock();
}
else
rprimers = suggest_buf_size;
bool ex_test = false;
bool test_curr_ok = true;
while (test_curr_ok && rprimers)
{
for (int i=0;i<rprimers;++i)
{
if (!test_curr_ok)
break;
if (buffer[i] * buffer[i]>t)
{
ex_test = true;
break;
}
if (t % buffer[i]==0)
test_curr_ok = false;
}
if (ex_test)
break;
if (test_curr_ok)
{
m_f.lock();
fseeko64(fp,file_pos[omp_n],SEEK_SET);
rprimers = fread(buffer,sizeof(unsigned long long),suggest_buf_size,fp);
file_pos[omp_n] = ftello64(fp);
suggest_buf_size += 32;
m_f.unlock();
}
}
if (!test_curr_ok)
continue;
#pragma omp critical (ccres)
{
set_results.insert(t);
}
}
for (int i=0;i<numthreads;++i)
file_pos[i] = 1;
test += rbat;
if (set_results.size())
{
printf (",%d primes, last Primer: %lld \r", set_results.size(),*set_results.rbegin());
fseeko64(fp,0,SEEK_END);
for(auto p = set_results.begin();p!=set_results.end();++p)
{
fwrite(&(*p),sizeof(unsigned long long),1,fp);
fprintf(fpTxt,"%lld\n",(*p));
}
fflush(fpTxt);
fflush(fp);
}
else {
printf (", empty\n");
}
}
}
fclose(fpTxt);
fclose(fp);
return 0;
}
Qt5文本文件分片
经过改进,使用Qt5的附带功能,进行文件夹创建和文件名分片。
QT -= gui
CONFIG += c++11 console
CONFIG -= app_bundle
# The following define makes your compiler emit warnings if you use
# any Qt feature that has been marked deprecated (the exact warnings
# depend on your compiler). Please consult the documentation of the
# deprecated API in order to know how to port your code away from it.
DEFINES += QT_DEPRECATED_WARNINGS
# You can also make your code fail to compile if it uses deprecated APIs.
# In order to do so, uncomment the following line.
# You can also select to disable deprecated APIs only up to a certain version of Qt.
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
SOURCES += \
main.cpp
# Default rules for deployment.
qnx: target.path = /tmp/$${TARGET}/bin
else: unix:!android: target.path = /opt/$${TARGET}/bin
!isEmpty(target.path): INSTALLS += target
QMAKE_CXXFLAGS += -fopenmp
LIBS += -lgomp
/*学号=YJD201702032716 JH-C, G++ Raspberry-PI, JB-ding */
#include <vector>
#include <cstdio>
#include <memory>
#include <memory.h>
#include <omp.h>
#include <string.h>
#include <mutex>
#include <set>
#include <cmath>
#include <QCoreApplication>
#include <QDir>
FILE * fpTxt = nullptr;
unsigned long long old_nn = 0;
inline void output_txt(unsigned long long count,unsigned long long n)
{
char buf_txtfm[256] = {0};
if ((old_nn/100000000 != n/100000000) || !fpTxt)
{
QDir dir(".");
QString strDir = QString().sprintf("./prime_res/%016lld",n/100000000000);
dir.mkpath(strDir);
old_nn = n;
sprintf (buf_txtfm,"./prime_res/%016lld/primer_%016lld.txt",n/100000000000,n/100000000);
if (fpTxt)
fclose(fpTxt);
fpTxt = fopen64(buf_txtfm,"a+");
if (!fpTxt)
printf("Fopen %s failed!\n",buf_txtfm);
else {
printf("Fopen %s succeed!\n",buf_txtfm);
}
fseeko64(fpTxt,0,SEEK_END);
}
if (fpTxt)
fprintf(fpTxt,"%lld\t%lld\n",count,n);
}
int main(int argc, char * argv[])
{
QCoreApplication app(argc,argv);
QDir dir(".");
dir.mkpath("./prime_res");
//开辟一个二进制的文件作为缓存,一个文本文件作为结果
FILE * fp = fopen64("./prime_res/primer.dat","ab+");
if (!fp)
return -1;
//要接着上次计算,读取最新的结果
fseeko64(fp,0,SEEK_END);
unsigned long long fsz = ftello64(fp);
unsigned long long test = 2;
//首次计算,把2写入,作为第一个素数
if (fsz<sizeof(unsigned long long))
{
fseeko64(fp,0,SEEK_SET);
fwrite(&test,sizeof(unsigned long long),1,fp);
output_txt(1,2);
}
else
//否则,读取上次的进度
{
fseeko64(fp,fsz - sizeof(unsigned long long),SEEK_SET);
fread(&test,sizeof(unsigned long long),1,fp);
}
printf ("Calc primers from %lld\n",test);
//设置下一个测试的值
if (test==2)
test = 3;
else
test += 2;//只测试奇数
//OMP并行计算的初始规模。规模会随着测试数test的增大,而对数增大
const unsigned int batch = 65536*16;
//最大的内存读入历史素数个数,8MB。
const int max_buf_size = 1024*1024*8;
//获取本机的CPU计算核心个数
const int numthreads = omp_get_num_procs();
printf ("\nOMP Num proces = %d\n",numthreads);
//为并行计算分配缓存,这里是C++17标准的,请注意用GCC的版本。
std::shared_ptr<unsigned long long[][max_buf_size]> pbuffer( new unsigned long long [numthreads][max_buf_size]);
//为并行计算分配建议缓存大小
std::shared_ptr<int> suggest_buf_size_arr (new int [numthreads]);
int * suggest_buf_size_ptr = suggest_buf_size_arr.get();
memset (suggest_buf_size_ptr,0,sizeof(int)*numthreads);
//为并行计算分配当前测试位置偏移
std::shared_ptr<unsigned long long> pos_arr (new unsigned long long [numthreads]);
unsigned long long * file_pos = pos_arr.get();
for (int i=0;i<numthreads;++i)
file_pos[i] = 1;
//文件互斥
std::mutex m_f;
//初始的0~65536,采用单线程,后续的进行并行化。
int sthr_fpos = 1;
int rprimers_sthd = 0;
while(1)
{
//单线程
if (test*16<batch)
{
int & suggest_buf_size = suggest_buf_size_ptr[0];
unsigned long long * buffer = pbuffer.get()[0];
if (suggest_buf_size<2)
suggest_buf_size = 2;
if (suggest_buf_size>=max_buf_size)
suggest_buf_size = max_buf_size;
if (sthr_fpos!=0)
{
fseeko64(fp,0,SEEK_SET);
rprimers_sthd = fread(buffer,sizeof(unsigned long long),suggest_buf_size,fp);
sthr_fpos = 0;
}
bool ex_test = false;
bool test_curr_ok = true;
while (test_curr_ok && rprimers_sthd)
{
for (int i=0;i<rprimers_sthd && test_curr_ok;++i)
{
if (buffer[i] * buffer[i]>test)
{
ex_test = true;
break;
}
if (test % buffer[i]==0)
test_curr_ok = false;
}
if (ex_test)
break;
if (test_curr_ok)
{
fseeko64(fp,sthr_fpos,SEEK_SET);
rprimers_sthd = fread(buffer,sizeof(unsigned long long),suggest_buf_size,fp);
sthr_fpos = ftello64(fp);
suggest_buf_size += 32;
}
}
if (!test_curr_ok)
{
test+=2;
continue;
}
printf ("Single Thread, Primer: %lld \r", test);
fseeko64(fp,0,SEEK_END);
fwrite(&test,sizeof(unsigned long long),1,fp);
unsigned long long ttl = ftello64(fp)/8;
output_txt(ttl,test);
fflush(fp);
test+=2;
}
//多线程
else
{
//用于存储新增素数的集合,顺便可以按照大小顺序排序。
std::set<unsigned long long> set_results;
//计算并行规模
unsigned long long rbat = (unsigned long long)(log(test)/log(2) * (batch*1.0/(log(batch)/log(2))))/2*2;
//控制并行规模
if (rbat>=1024*1024*16)
rbat = 1024*1024*16;
if (rbat<batch)
rbat = batch;
printf ("Batch %lld ", rbat);
#pragma omp parallel for
for (unsigned long long t = test; t<test+rbat;t+=2)
{
const int omp_n = omp_get_thread_num();
int & suggest_buf_size = suggest_buf_size_ptr[omp_n];
unsigned long long * buffer = pbuffer.get()[omp_n];
if (suggest_buf_size<16)
suggest_buf_size = 16;
if (suggest_buf_size>=max_buf_size)
suggest_buf_size = max_buf_size;
int rprimers = 0;
//需要重新读取缓存。由于绝大部分合数都在第一波缓存中被淘汰,所以这个读取行为发生的不频繁。
if (file_pos[omp_n]!=0)
{
m_f.lock();
fseeko64(fp,0,SEEK_SET);
rprimers = fread(buffer,sizeof(unsigned long long),suggest_buf_size,fp);
file_pos[omp_n] = 0;
m_f.unlock();
}
else
rprimers = suggest_buf_size;
bool ex_test = false;
bool test_curr_ok = true;
while (test_curr_ok && rprimers)
{
for (int i=0;i<rprimers;++i)
{
if (!test_curr_ok)
break;
if (buffer[i] * buffer[i]>t)
{
ex_test = true;
break;
}
if (t % buffer[i]==0)
test_curr_ok = false;
}
if (ex_test)
break;
if (test_curr_ok)
{
m_f.lock();
fseeko64(fp,file_pos[omp_n],SEEK_SET);
rprimers = fread(buffer,sizeof(unsigned long long),suggest_buf_size,fp);
file_pos[omp_n] = ftello64(fp);
suggest_buf_size += 32;
m_f.unlock();
}
}
if (!test_curr_ok)
continue;
#pragma omp critical (ccres)
{
set_results.insert(t);
}
}
for (int i=0;i<numthreads;++i)
file_pos[i] = 1;
test += rbat;
//输出
if (set_results.size())
{
printf (",%d primes, last Primer: %lld \n", set_results.size(),*set_results.rbegin());
fseeko64(fp,0,SEEK_END);
unsigned long long ttl = ftello64(fp)/8;
for(auto p = set_results.begin();p!=set_results.end();++p)
{
fwrite(&(*p),sizeof(unsigned long long),1,fp);
output_txt(++ttl,*p);
}
fflush(fpTxt);
fflush(fp);
}
else {
printf (", empty\n");
}
}
}
fclose(fpTxt);
fclose(fp);
return 0;
}
在树莓派上的运行(挂一个extFat 文件系统的2TB移动硬盘)