优化算法比较

优化算法比较

下面是某个blog主写的关于优化算法解法的一些conclusion,第二篇则是某个国外的blog主总结的,貌似这位已经总结成个survey发表了,国内已经有人翻译成中文了,可以结合着看下,先保存起来,目前po主自己刚开始看到 momentum 部分,实现也只到这里,拿各种优化算法来解logistic regression 。结果回头贴出来,先标记个


http://blog.csdn.net/luo123n/article/details/48239963

http://sebastianruder.com/optimizing-gradient-descent/index.html#fn:7

http://mp.weixin.qq.com/s?__biz=MzA3MzI4MjgzMw==&mid=2650720663&idx=3&sn=d9f671f77be23a148d1830448154a545&chksm=871b0de9b06c84ffaf260b9ba2a010108cca62d5ce3dcbd8c98c72c9f786f9cd460b27b496ca&mpshare=1&scene=2&srcid=1121mgll9exVL2Gia7trGTn7&from=timeline#wechat_redirect


https://www.52ml.net/21094.html


/**
	 * 归一化
	 */
	private void normalization(){
		double max[] = new double [corpus.getFeatureNum()]; 
		double min[] = new double [corpus.getFeatureNum()] ; 
		for(int i = 0 ;i<corpus.getFeatureNum();i++){
			max[i] = 0.0;
			min[i] = 0.0;
		}
		
		for (LRInstance instance:corpus.getInstances().values()){
			for (int i = 0 ;i<corpus.getFeatureNum();i++){
				double feature = instance.getFeatureIndex(i);
				if(max[i]<feature) {
					max[i] = feature;
				}else if(min[i]>feature){
					min[i] = feature;
				}
			}
		}
		for (LRInstance instance:corpus.getInstances().values()){
			for (int i = 0 ;i<corpus.getFeatureNum();i++){
				if(max[i]==min[i]) continue;
				double feature = instance.getFeatureIndex(i);
				instance.setFeatureIndex(i, (feature-min[i])/(max[i]-min[i]));
			}
		}
	}

private double sigmod(double z){
		return 1/(1+Math.exp(-z));
	}	
 

/**
	 * batch gradient descent : every step use all examples 
	 */
	private void batchGradientDescent(){
		for (int k = 0; k <conf.getMaxIter();k++){
			for (int i = 0 ; i<corpus.getFeatureNum();i++){
				double gradient =0.0;
				for(LRInstance instance: corpus.getInstances().values()){
					double predict = sigmod(instance.getCurrRTW(weights));
					gradient += (instance.getLabel()-predict)*instance.getFeatureIndex(i);
				}
			//if (theta<conf.getEpsilon())break; // convergence
			 weights[i]=weights[i]+conf.getShrinkage()*gradient;
			}
			error(k);
		}
	}	

	/**
	 * stochastic gradient descent : every step use one example
	 */
	private void stoGradientDescent(){
		for (int k = 0; k <conf.getMaxIter();k++){ // iterative
			Random random = new Random(); 
			int instanceId = random.nextInt(corpus.getInstancesNum())%(corpus.getInstancesNum())+1;
			
			LRInstance instance = corpus.getInstances().get(instanceId);
			for (int i = 0;i<corpus.getFeatureNum();i++){  
				double predict =sigmod(instance.getCurrRTW(weights));
				double gradient = (instance.getLabel()-predict)*instance.getFeatureIndex(i);
				//if(theta<conf.getEpsilon()) break; //convergence
				weights[i] = weights[i]+conf.getShrinkage()*gradient; 
			}
			error(k);
		}
	}
/**
	 * min batch gradient : every step use n examples
	 * @param sampleNum
	 */
	private void miniBatchGradientDescent(int sampleNum){ 
		Random random = new Random();
		List<LRInstance> samples;
		for(int k = 0 ;k<conf.getMaxIter();k++){ 
			samples = new ArrayList<LRInstance>();
			//step1:sampling
			for (int n = 0 ;n < sampleNum;n++){ 
				int stanceId = random.nextInt(corpus.getInstancesNum())%(corpus.getInstancesNum())+1;
				LRInstance instance = corpus.getInstances().get(stanceId);
				samples.add(instance);
			}
			//gradient 
			for (int i = 0 ;i <corpus.getFeatureNum();i++){
				double gradient = 0.0;
				for(LRInstance instance:samples){
					double predict = sigmod(instance.getCurrRTW(weights));
					gradient += (instance.getLabel()-predict)*instance.getFeatureIndex(i);
				}
				//if(theta<conf.getEpsilon()) break; //convergence 
				weights[i] = weights[i]+conf.getShrinkage()*gradient;
			}
			error(k);
		}
	}

	/**
	 * momentum ()
	 * 
	 */
	private void momentum (){
		System.out.println(conf.getGamma());
		Random random = new Random();
		LRInstance instance ;
		int instanceId ;
		double gradient ;
		double predict ;
		double[] vector = new double[corpus.getFeatureNum()];
		for(int i = 0 ;i < vector.length;i++){
			vector[i] = 0;
		}
		for(int k = 0 ;k < conf.getMaxIter();k++){
			instanceId = random.nextInt(corpus.getInstancesNum())%(corpus.getInstancesNum())+1;
			instance = corpus.getInstances().get(instanceId);
			for(int i = 0 ;i < corpus.getFeatureNum();i++){
				predict = sigmod(instance.getCurrRTW(weights));
				gradient = (instance.getLabel()-predict)*instance.getFeatureIndex(i);
				vector[i] = conf.getGamma()*vector[i]+conf.getShrinkage()*gradient; // vector [i]? init is right?
				weights[i] = weights[i]+vector[i];
			}
			error(k);
		}
	}

中间算 gradient 停止条件那部分运行中被我注释掉了,因为样本量太小了,Epsilon 设置基本无效

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值