写在前面
1.实现直方图统计必定会有大量的IO操作,当数据规模很大时,IO会耗费大量时间并且占据整个终端屏幕,影响我们观察数据。因此,在加大数据规模评估性能的时候,把Print_histo函数注释掉即可
2.自身电脑实验环境为12核处理机,2.70GHz,机带RAM为16.0GB
3.实现直方图统计的数据规模为1000,评估性能数据规模为10000000。均为将【0,100】分为10个区间
均为将【0,100】分为10个区间,1000个数据,2个线程
windows cmd 运行示例(vscode的Mingw自带pthread库)
cd C:\Users\17926\Desktop\test1
gcc -g -o test01 test01.c -lpthread
test01 10 0 100 1000 2
Linux终端运行示例
cd /*文件夹所在位置*/
gcc -g -o test01 test01.c -lpthread
./test01 10 0 100 1000 2
计时头文件
/* File: timer.h
*
* Purpose: Define a macro that returns the number of seconds that
* have elapsed since some point in the past. The timer
* should return times with microsecond accuracy.
*
* Note: The argument passed to the GET_TIME macro should be
* a double, *not* a pointer to a double.
*
* Example:
* #include "timer.h"
* . . .
* double start, finish, elapsed;
* . . .
* GET_TIME(start);
* . . .
* Code to be timed
* . . .
* GET_TIME(finish);
* elapsed = finish - start;
* printf("The code to be timed took %e seconds\n", elapsed);
*
* IPP: Section 3.6.1 (pp. 121 and ff.) and Section 6.1.2 (pp. 273 and ff.)
*/
#ifndef _TIMER_H_
#define _TIMER_H_
#include <sys/time.h>
/* The argument now should be a double (not a pointer to a double) */
#define GET_TIME(now) { \
struct timeval t; \
gettimeofday(&t, NULL); \
now = t.tv_sec + t.tv_usec/1000000.0; \
}
#endif
忙等待
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include "time.h"
/****************全局区****************/
int thread_count;//共享变量:线程数
int data_count;//数据量
float min_meas, max_meas;//整段区间的左右端点
float* bin_maxes;//每个区间段右区间的集合
int* bin_counts;//每个区间的数据集合
int bin_count;//区间数量
float* data;//整个数据集
int flag;
/*************************************/
//提示错误信息
void Usage(char prog_name[]) {
fprintf(stderr, "usage: %s ", prog_name);
fprintf(stderr, "<bin_count> <min_meas> <max_meas> <data_count> <thread_count>\n");
exit(0);
}
//生成数据
void Gen_data(
float min_meas /* in */,
float max_meas /* in */,
float data[] /* out */,
int data_count /* in */) {
int i;
srand(0);
for (i = 0; i < data_count; i++)
data[i] = min_meas + (max_meas - min_meas)*rand()/((double) RAND_MAX);
}
//生成区间段
void Gen_bins(float min_meas,float max_meas,float bin_maxes[],int bin_counts[], int bin_count)
{
float bin_width;
int i;
bin_width = (max_meas - min_meas)/bin_count;
for (i=0; i < bin_count; i++) {
bin_maxes[i] = min_meas + (i+1)*bin_width;
bin_counts[i] = 0;
}
}
//确定数据区间
int Which_bin(float data,float bin_maxes[],int bin_count,float min_meas)
{
int bottom = 0, top = bin_count-1;
int mid;
float bin_max, bin_min;
while (bottom <= top) {//二分查找
mid = (bottom + top)/2;
bin_max = bin_maxes[mid];
bin_min = (mid == 0) ? min_meas: bin_maxes[mid-1];
if (data >= bin_max)
bottom = mid+1;
else if (data < bin_min)
top = mid-1;
else
return mid;
}
}
//直方统计图可视化
void Print_histo(float bin_maxes[], int bin_counts[], int bin_count, float min_meas)
{
int i, j;
float bin_max, bin_min;
for (i = 0; i < bin_count; i++) {
bin_max = bin_maxes[i];
bin_min = (i == 0) ? min_meas: bin_maxes[i-1];
printf("%.3f-%.3f:\t", bin_min, bin_max);
for (j = 0; j < bin_counts[i]; j++)
printf("*");//一个*代表一个数据
printf("\n");
}
}
void *Thread_sum(void* rank)
{
//2021040058 xyy
long my_rank = (long)rank;
long long i;
long long bin;
long long my_n = data_count/thread_count;
long long my_first_i = my_rank * my_n;
long long my_last_i = my_first_i + my_n;
int *my_bin_counts;
my_bin_counts = malloc(bin_count*sizeof(int));
for(int i=0;i<bin_count;i++){
my_bin_counts[i] = 0;
}
for (i = my_first_i;i<my_last_i; i++) {
bin = Which_bin(data[i], bin_maxes, bin_count, min_meas);
my_bin_counts[bin]++;
}
for(i = 0;i<bin_count;i++){
while(flag!=my_rank);
bin_counts[i] += my_bin_counts[i];
flag = (flag+1)%thread_count;
}
return NULL;
}
int main(int argc, char* argv[])
{
pthread_t* thread_handles;
double start,finish;//计算程序运行时间
double record;
//2021040058 xyy
if (argc != 6) Usage(argv[0]);
bin_count = strtol(argv[1], NULL, 10);
min_meas = strtof(argv[2], NULL);
max_meas = strtof(argv[3], NULL);
data_count = strtol(argv[4], NULL, 10);
thread_count = strtol(argv[5],NULL,10);
thread_handles = malloc(thread_count*sizeof(pthread_t));
bin_maxes = malloc(bin_count*sizeof(float));
bin_counts = malloc(bin_count*sizeof(int));
data = malloc(data_count*sizeof(float));
Gen_data(min_meas, max_meas, data, data_count);
Gen_bins(min_meas, max_meas, bin_maxes, bin_counts, bin_count);
for(int i=0;i<bin_count;i++){
bin_counts[i] = 0;
}
GET_TIME(start);
for(int i=0;i<thread_count;i++){
pthread_create(&thread_handles[i],NULL,Thread_sum,(void*)i);
}
for(int i=0;i<thread_count;i++){
pthread_join(thread_handles[i],NULL);
}
GET_TIME(finish);
record = finish - start;
//Print_histo(bin_maxes, bin_counts, bin_count, min_meas);
GET_TIME(start);
for (int i = 0; i < data_count; i++) {
int bin = Which_bin(data[i], bin_maxes, bin_count, min_meas);
bin_counts[bin]++;
}
GET_TIME(finish);
printf("thread number:%d\t run time:%.6lfseconds\n",thread_count,record);
printf("single thread:\t run time:%.6lfseconds\n",finish-start);
free(data);
free(bin_maxes);
free(bin_counts);
return 0;
}
互斥量
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include "time.h"
/****************全局区****************/
int thread_count;//共享变量:线程数
int data_count;//数据量
float min_meas, max_meas;//整段区间的左右端点
float* bin_maxes;//每个区间段右区间的集合
int* bin_counts;//每个区间的数据集合
int bin_count;//区间数量
float* data;//整个数据集
pthread_mutex_t mutex;
/*************************************/
//提示错误信息
void Usage(char prog_name[]) {
fprintf(stderr, "usage: %s ", prog_name);
fprintf(stderr, "<bin_count> <min_meas> <max_meas> <data_count> <thread_count>\n");
exit(0);
}
//生成数据
void Gen_data(
float min_meas /* in */,
float max_meas /* in */,
float data[] /* out */,
int data_count /* in */) {
int i;
srand(0);
for (i = 0; i < data_count; i++)
data[i] = min_meas + (max_meas - min_meas)*rand()/((double) RAND_MAX);
}
//生成区间段
void Gen_bins(float min_meas,float max_meas,float bin_maxes[],int bin_counts[], int bin_count)
{
float bin_width;
int i;
bin_width = (max_meas - min_meas)/bin_count;
for (i=0; i < bin_count; i++) {
bin_maxes[i] = min_meas + (i+1)*bin_width;
bin_counts[i] = 0;
}
}
//确定数据区间
int Which_bin(float data,float bin_maxes[],int bin_count,float min_meas)
{
int bottom = 0, top = bin_count-1;
int mid;
float bin_max, bin_min;
while (bottom <= top) {//二分查找
mid = (bottom + top)/2;
bin_max = bin_maxes[mid];
bin_min = (mid == 0) ? min_meas: bin_maxes[mid-1];
if (data >= bin_max)
bottom = mid+1;
else if (data < bin_min)
top = mid-1;
else
return mid;
}
}
//直方统计图可视化
void Print_histo(float bin_maxes[], int bin_counts[], int bin_count, float min_meas)
{
int i, j;
float bin_max, bin_min;
for (i = 0; i < bin_count; i++) {
bin_max = bin_maxes[i];
bin_min = (i == 0) ? min_meas: bin_maxes[i-1];
printf("%.3f-%.3f:\t", bin_min, bin_max);
for (j = 0; j < bin_counts[i]; j++)
printf("*");//一个*代表一个数据
printf("\n");
}
}
void *Thread_sum(void* rank)
{
//2021040058 xyy
long my_rank = (long)rank;
long long i;
long long bin;
long long my_n = data_count/thread_count;
long long my_first_i = my_rank * my_n;
long long my_last_i = my_first_i + my_n;
int *my_bin_counts;
my_bin_counts = malloc(bin_count*sizeof(int));
for(int i=0;i<bin_count;i++){
my_bin_counts[i] = 0;
}
for (i = my_first_i;i<my_last_i; i++) {
bin = Which_bin(data[i], bin_maxes, bin_count, min_meas);
my_bin_counts[bin]++;
}
for(i = 0;i<bin_count;i++){
pthread_mutex_lock(&mutex);
bin_counts[i] += my_bin_counts[i];
pthread_mutex_unlock(&mutex);
}
return NULL;
}
int main(int argc, char* argv[])
{
pthread_t* thread_handles;
double start,finish;//计算程序运行时间
double record;
//2021040058 xyy
if (argc != 6) Usage(argv[0]);
bin_count = strtol(argv[1], NULL, 10);
min_meas = strtof(argv[2], NULL);
max_meas = strtof(argv[3], NULL);
data_count = strtol(argv[4], NULL, 10);
thread_count = strtol(argv[5],NULL,10);
thread_handles = malloc(thread_count*sizeof(pthread_t));
bin_maxes = malloc(bin_count*sizeof(float));
bin_counts = malloc(bin_count*sizeof(int));
data = malloc(data_count*sizeof(float));
pthread_mutex_init(&mutex,NULL);
for(int i=0;i<bin_count;i++){
bin_counts[i] = 0;
}
Gen_data(min_meas, max_meas, data, data_count);
Gen_bins(min_meas, max_meas, bin_maxes, bin_counts, bin_count);
GET_TIME(start);
for(int i=0;i<thread_count;i++){
pthread_create(&thread_handles[i],NULL,Thread_sum,(void*)i);
}
for(int i=0;i<thread_count;i++){
pthread_join(thread_handles[i],NULL);
}
GET_TIME(finish);
record = finish - start;
//Print_histo(bin_maxes, bin_counts, bin_count, min_meas);
GET_TIME(start);
for (int i = 0; i < data_count; i++) {
int bin = Which_bin(data[i], bin_maxes, bin_count, min_meas);
bin_counts[bin]++;
}
GET_TIME(finish);
printf("thread number:%d\t run time:%.6lfseconds\n",thread_count,record);
printf("single thread:\t run time:%.6lfseconds\n",finish-start);
free(data);
free(bin_maxes);
free(bin_counts);
return 0;
}
信号量
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <semaphore.h>
#include "time.h"
/****************全局区****************/
int thread_count;//共享变量:线程数
int data_count;//数据量
float min_meas, max_meas;//整段区间的左右端点
float* bin_maxes;//每个区间段右区间的集合
int* bin_counts;//每个区间的数据集合
int bin_count;//区间数量
float* data;//整个数据集
sem_t* sem;
/*************************************/
//提示错误信息
void Usage(char prog_name[]) {
fprintf(stderr, "usage: %s ", prog_name);
fprintf(stderr, "<bin_count> <min_meas> <max_meas> <data_count> <thread_count>\n");
exit(0);
}
//生成数据
void Gen_data(
float min_meas /* in */,
float max_meas /* in */,
float data[] /* out */,
int data_count /* in */) {
int i;
srand(0);
for (i = 0; i < data_count; i++)
data[i] = min_meas + (max_meas - min_meas)*rand()/((double) RAND_MAX);
}
//生成区间段
void Gen_bins(float min_meas,float max_meas,float bin_maxes[],int bin_counts[], int bin_count)
{
float bin_width;
int i;
bin_width = (max_meas - min_meas)/bin_count;
for (i=0; i < bin_count; i++) {
bin_maxes[i] = min_meas + (i+1)*bin_width;
bin_counts[i] = 0;
}
}
//确定数据区间
int Which_bin(float data,float bin_maxes[],int bin_count,float min_meas)
{
int bottom = 0, top = bin_count-1;
int mid;
float bin_max, bin_min;
while (bottom <= top) {//二分查找
mid = (bottom + top)/2;
bin_max = bin_maxes[mid];
bin_min = (mid == 0) ? min_meas: bin_maxes[mid-1];
if (data >= bin_max)
bottom = mid+1;
else if (data < bin_min)
top = mid-1;
else
return mid;
}
}
//直方统计图可视化
void Print_histo(float bin_maxes[], int bin_counts[], int bin_count, float min_meas)
{
int i, j;
float bin_max, bin_min;
for (i = 0; i < bin_count; i++) {
bin_max = bin_maxes[i];
bin_min = (i == 0) ? min_meas: bin_maxes[i-1];
printf("%.3f-%.3f:\t", bin_min, bin_max);
for (j = 0; j < bin_counts[i]; j++)
printf("*");//一个*代表一个数据
printf("\n");
}
}
void *Thread_sum(void* rank)
{
//2021040058 xyy
long my_rank = (long)rank;
long long i;
long long bin;
long long my_n = data_count/thread_count;
long long my_first_i = my_rank * my_n;
long long my_last_i = my_first_i + my_n;
int *my_bin_counts;
my_bin_counts = malloc(bin_count*sizeof(int));
for(int i=0;i<bin_count;i++){
my_bin_counts[i] = 0;
}
for (i = my_first_i;i<my_last_i; i++) {
bin = Which_bin(data[i], bin_maxes, bin_count, min_meas);
my_bin_counts[bin]++;
}
for(i = 0;i<bin_count;i++){
sem_wait(&sem[i]);
bin_counts[i] += my_bin_counts[i];
sem_post(&sem[i]);
}
return NULL;
}
int main(int argc, char* argv[])
{
pthread_t* thread_handles;
double start,finish;//计算程序运行时间
double record;
//2021040058 xyy
if (argc != 6) Usage(argv[0]);
bin_count = strtol(argv[1], NULL, 10);
min_meas = strtof(argv[2], NULL);
max_meas = strtof(argv[3], NULL);
data_count = strtol(argv[4], NULL, 10);
thread_count = strtol(argv[5],NULL,10);
thread_handles = malloc(thread_count*sizeof(pthread_t));
bin_maxes = malloc(bin_count*sizeof(float));
bin_counts = malloc(bin_count*sizeof(int));
data = malloc(data_count*sizeof(float));
sem = malloc(bin_count*sizeof(sem_t));
for(int i=0;i<bin_count;i++){
bin_counts[i] = 0;
sem_init(&sem[i],0,1);
}
Gen_data(min_meas, max_meas, data, data_count);
Gen_bins(min_meas, max_meas, bin_maxes, bin_counts, bin_count);
GET_TIME(start);
for(int i=0;i<thread_count;i++){
pthread_create(&thread_handles[i],NULL,Thread_sum,(void*)i);
}
for(int i=0;i<thread_count;i++){
pthread_join(thread_handles[i],NULL);
}
GET_TIME(finish);
record = finish - start;
//Print_histo(bin_maxes, bin_counts, bin_count, min_meas);
GET_TIME(start);
for (int i = 0; i < data_count; i++) {
int bin = Which_bin(data[i], bin_maxes, bin_count, min_meas);
bin_counts[bin]++;
}
GET_TIME(finish);
printf("thread number:%d\t run time:%.6lfseconds\n",thread_count,record);
printf("single thread:\t run time:%.6lfseconds\n",finish-start);
free(data);
free(bin_maxes);
free(bin_counts);
return 0;
}