本文主要是根据MC随机抽样思想,进行已知分布的抽样,对于数据分析有用,主要做如下几个版本
- C++
- MATLAB
- C#
- PYTHON
- C
C++版本的主要代码为
(1)数据部分,概率密度分布
const double energy[210]={21.000000, 22.000000, 23.000000, 24.000000, 25.000000, 26.000000, 27.000000, 28.000000, 29.000000, 30.000000, 31.000000, 32.000000, 33.000000, 34.000000, 35.000000, 36.000000, 37.000000, 38.000000, 39.000000, 40.000000, 41.000000, 42.000000, 43.000000, 44.000000, 45.000000, 46.000000, 47.000000, 48.000000, 49.000000, 50.000000, 51.000000, 52.000000, 53.000000, 54.000000, 55.000000, 56.000000, 57.000000, 58.000000, 59.000000, 60.000000}; const double probability[210]={ 0.003172, 0.001603, 0.003484, 0.000000, 0.000365, 0.003666, 0.001642, 0.005879, 0.005202, 0.022721, 0.027685, 0.056204, 0.075032, 0.118366, 0.163304, 0.215736, 0.283005, 0.343148, 0.422711, 0.494946, 0.574868, 0.652534, 0.717833, 0.794975, 0.842019, 0.909445, 0.924232, 0.986227, 0.974940, 1.000000, 0.964995, 0.968823, 0.896359, 0.866523, 0.761573, 0.687310, 0.557857, 0.420584, 0.248464, 0.038391};
(2)归一化概率密度分布并积分得到分布函数
for (int x = 1; x < 210; x++) { sump += probability[x]; } pp[0] = probability[0]; for (int x = 1; x < 210; x++) { pp[x] = pp[x - 1] + probability[x]; }
(3)抽样函数
1 double IncidentGammaEnergy() 2 { 3 double rand=G4UniformRand(); 4 double sum=pp[0]; 5 double pre_sum=0.; 6 int p_size = sizeof(probability)/sizeof(probability[0]); 7 for(int k=0;k<p_size-1;k++) 8 { 9 if ((rand>pre_sum) && (rand<=sum)) 10 { 11 return energy[k]+(energy[k+1]-energy[k])*(rand-pre_sum)*(sum-pre_sum); 12 } 13 pre_sum = sum; 14 sum=pp[k+1]; 15 } 16 return energy[p_size-1]; 17 }
matlab实现的代码为
clear clc
% data name is cs137.mat,it has sub var x and y load cs137 x=x; y=y; % normalize y=y/sum(y); n=length(x); % fenbuhanshu pp=zeros(n,1); for i=1:n-1 pp(i+1)=pp(i)+y(i+1); end % sample now pre_sum=0; sumup=pp(1); m=100000; rand_number=rand(m,1); out=zeros(m,1); for j=1:m for i=1:n-1 if(rand_number(j)>pre_sum)&&(rand_number(j)<=sumup) out(j)=x(i); end pre_sum=sumup; sumup=pp(i+1); end out=out'; end % plot and compare [histvalue,nbins]=hist(out,1:1:n); histvalue=histvalue'; plot(nbins,histvalue/sum(histvalue)) hold on plot(x,y,'r') hold off legend('抽样结果','原始数据')
抽样100000次(m=100000)结果为
m=10000
m=1000;
C#实现
using MathNet.Numerics.LinearAlgebra;
using MathNet.Numerics.Statistics;
using System;
using System.Collections.Generic;
using System.IO;
namespace array_sample
{
internal class Program
{
private static void Main(string[] args)
{
var spectest = loadspec("spectest");
var result = IncidentGammaEnergy(spectest, 100000);
int n = spectest.Length;
//int n = 200;
double[] histallll = new double[n];
var histogram = new Histogram(result, n);
for (int i = 0; i < histallll.Length; i++)
{
histallll[i] = histogram[i].Count;
}
//save_data(histallll, "test.txt");
save_data(histallll, "test.txt");
//foreach (double test in histallll)
//{
// Console.WriteLine(test);
//}
//Console.WriteLine(histogram);
}
private static double[] IncidentGammaEnergy(double[] probability, int cishu)
{
double sump = 0;
double[] pp = new double[probability.Length];
double[] energy = new double[probability.Length];
for (int x = 0; x < probability.Length; x++)
{ sump += probability[x]; }
//double pp[210];
pp[0] = probability[0] / sump;
energy[0] = 0;
for (int x = 1; x < probability.Length; x++)
{
pp[x] = pp[x - 1] + probability[x] / sump;
energy[x] = x;
}
double sum = pp[0];
double pre_sum = 0.0;
int p_size = probability.Length;
double[] result = new double[cishu];
var randoms = GenerateRandom(cishu);
for (int i = 0; i < cishu; i++)
{
//double rand = new Random().NextDouble();
//long tick = DateTime.Now.Ticks;
//Random ran = new Random((int)(tick & 0xffffffffL) | (int)(tick >> 32));
//double rand = ran.NextDouble();
//Console.WriteLine(rand);
var rand = randoms[i];
//Console.WriteLine(rand);
for (int k = 0; k < p_size - 1; k++)
{
if ((rand > pre_sum) && (rand <= sum))
{
result[i] = energy[k];// +(energy[k + 1] - energy[k]) * (rand - pre_sum) * (sum - pre_sum);
}
pre_sum = sum;
sum = pp[k + 1];
}
//result[i] = energy[p_size - 1];
}
return result;
}
private static List<double> GenerateRandom(int iNum)
{
long lTick = DateTime.Now.Ticks;
List<double> lstRet = new List<double>();
for (int i = 0; i < iNum; i++)
{
Random ran = new Random((int)lTick * i);
double iTmp = ran.NextDouble();
lstRet.Add(iTmp);
lTick += (new Random((int)lTick).Next(978));
}
return lstRet;
}
private static double[] loadspec(string specname)
{
var M = Matrix<double>.Build;
var V = Vector<double>.Build;
string[] content = File.ReadAllLines(@specname);
double[] yvalue = new double[content.Length];
//double[] axesx = new double[content.Length];
for (int i = 1; i < content.Length; i++)
{
//axesx[i] = i;
yvalue[i] = Convert.ToDouble(content[i]);
}
return yvalue;
}
private static void save_data(double[] content, string str)
{
FileStream fs = new FileStream(@str, FileMode.Create);
StreamWriter sw = new StreamWriter(fs);
for (int i = 1; i < content.Length; i++)
{
sw.WriteLine(Convert.ToDouble(content[i]));//Convert.ToString(hist)
}
sw.Close();
sw.Dispose();
sw = null;
}
private static void save_data(Histogram content, string str)
{
FileStream fs = new FileStream(@str, FileMode.Create);
StreamWriter sw = new StreamWriter(fs);
sw.Write(content);//Convert.ToString(hist)
sw.Close();
sw.Dispose();
sw = null;
}
}
}
其中遇到的主要问题是随机数的问题,跟MATLAB不同,他生成的随机数有很多重复,这是种子决定的,也就是时间变化赶不上生成的速度,于是查找资料,参考改写了种子,而且只能提前生成随机数。没解决的问题是histgram函数后面指定lower bound 和upper bound都有问题,所以只能让他自动确定了。其实可以自己写一个hist函数,参考引用4或5即可。
2016-12-30 01:24:05
明天写一下python实现,还没安装python,囧~突然发现有安装winpython,只是以前安装的,升级WIN10的1601版本,由于是全新升级,导致各种path路径都没了。。。
python实现
基本思路是使用random.choice函数。首先将数据读入,简单点先做数组好了,复杂点就读取文件,将数组中元素采用*n的形式搞定,n指的是纵坐标,需要为整数。直接choice函数抽样,采用循环,设定不同次数,然后hist结果,最后画图。具体实现如下
按照以上思路果然实现了(2016-12-31,03:18:05)
# -*- coding: utf-8 -*-
import numpy as np
import random as rd
import matplotlib.pyplot as plt
import matplotlib
zhfont1 = matplotlib.font_manager.FontProperties(fname='C:\Windows\Fonts\simsun.ttc')
#load data from file
bb=np.loadtxt('spectest')
cc=bb.copy()
n=len(bb)
aa=range(n)
#sum all yvalue
test=0
for k in range(len(bb)):
test += bb[k]
#creat an array with yvalues' xvalue
out = [0]*test
j=0
for k in range(len(aa)):
while bb[k]!=0:
out[j]= aa[k]
bb[k]=bb[k]-1
j=j+1
#sample
cishu=100000
count=0
result=[0]*cishu
while count<cishu:
result[count]=rd.choice(out)
count=count+1
#hist it
histout=np.histogram(result, bins=range(1025))
#save data
np.savetxt('result.txt',histout[0])
np.savetxt('cc.txt',cc)
#plot data
fig = plt.figure(1)
x = range(1024)
plt.plot(x,histout[0]/cishu,'m:',label="sampled data")
plt.plot(x,cc/test,'k-',label="raw data")
plt.xlabel('道址',fontproperties=zhfont1)
plt.ylabel('计数',fontproperties=zhfont1)
plt.legend(loc = 'upper left')
plt.show()
fig.savefig("test.png")
中间遇到一些波折,这是python自己写的第一个程序吧,,,其中的一个波折是数组长度搞错了,test应该用第一列的纵坐标之和不小心用成了第三列x.*y之和,如下图
检查了下,发现c#中写入文件后没有fs.close,特此标记下。
C的将来有时间再说了,或者谁有兴趣给做个也好啊,C还是不够熟悉哦,虽然大学只是学了C。。。
参考文献:
1、许淑艳老师的《蒙特卡罗方法在实验核物理中的应用》的第三章《由已知分布的随机抽样》
2、C#产生一组不重复随机数的两种方法
3、MATH.NET
4、msdn的Random.NextDouble Method ()
5、JasenKin的《算法--区间数据计算》