用来平滑和标记异常点的简单脚本

一维平滑:Iterative Smoother和Gaussian Process

Iterative Smoother:来自这篇论文
Gaussian Process:来自scikit-learn
代码如下:

import numpy as np
import matplotlib.pyplot as plt
from numba import njit, prange
from astropy.cosmology import Planck15 as plk15
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel
import warnings

def centerize(x, xmin, xmax, std=None):
    x = np.asarray(x)
    xcenter = (xmin + xmax)/2.0
    scale = (xmax - xmin)/2.0
    if scale == 0:
        return x, std
    if std is not None:
        std = np.asarray(std)/scale
    return (x-xcenter)/scale, std

def uncenterize(x, xmin, xmax, std=None):
    x = np.asarray(x)
    xcenter = (xmin + xmax)/2.0
    scale = (xmax - xmin)/2.0
    if std is not None:
        std = np.asarray(std)*scale
    return x*scale + xcenter, std

def CRBF_kernel(constant_value=1.0, constant_value_bounds=(1e-3, 1e3), length_scale=0.1, length_scale_bounds=(1e-3, 1e3)):
    '''
    ConstantKernel * RBF see website below for details:
    https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.ConstantKernel.html#sklearn.gaussian_process.kernels.ConstantKernel
    https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.RBF.html#sklearn.gaussian_process.kernels.RBF
    https://scikit-learn.org/stable/modules/gaussian_process.html#gp-kernels
    '''
    kernel = ConstantKernel(constant_value=constant_value, constant_value_bounds=constant_value_bounds) * RBF(length_scale=length_scale, length_scale_bounds=length_scale_bounds)
    return kernel

def no_optimizer(obj_func, initial_theta, bounds):
    '''
    Do not do any optimization, equivalent to setting bounds='fixed' in kernel function
    '''
    return initial_theta, 1.e-6

def gaussian_process(x, y, kernel=CRBF_kernel(), alpha=0.0, optimizer='fmin_l_bfgs_b', n_restarts_optimizer=0, get_obj=False, normalize_y=True, centerize_x=True, **kwargs):
    '''
    Wrapper for GaussianProcessRegressor in scikit-learn, see https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcessRegressor.html#sklearn.gaussian_process.GaussianProcessRegressor
    kernel: kernel instance, default=None, arguments for kernel functions are typically, (parameter, bounds), where bounds can be tuple or 'fixed'\n
    alpha: float or ndarray of shape (n_samples,), default=1e-10. Value added to the diagonal of the kernel matrix during fitting. To avoid numerical problem and can also be interpreted as the variance of additional Gaussian measurement noise on the training observations. NOTE, in this function, if normalize_y, I will normalize alpha in the same time.\n
    normalize_y: normalize the target values y by removing the mean and scaling to unit-variance\n
    centerize_x: normalize x so that x.min() is -1 and x.max() is 1\n
    optimizer: 'fmin_l_bfgs_b' or callable, default='fmin_l_bfgs_b';, optimizing the kernel’s parameters. Signature:\n
        def optimizer(obj_func, initial_theta, bounds):\n
            return theta_opt, func_min\n
    n_restarts_optimizer: int, default 0.\n
    return:\n
        if get_obj:\n
            fit_func, gp_obj\n
        else:\n
            fit_func\n
        NOTE: fit_func can accept either return_std and return_cov, default are both False\n
    '''
    x = np.asarray(x)
    xmin = x.min()
    xmax = x.max()
    if centerize_x:
        x, _ = centerize(x, xmin, xmax)
    y = np.asarray(y)
    x = x[:,None]
    if normalize_y:
        alpha = alpha/y.std(axis=0)**2
    gp = GaussianProcessRegressor(kernel=kernel, alpha=alpha, optimizer=optimizer, n_restarts_optimizer=n_restarts_optimizer, normalize_y=normalize_y, **kwargs)
    gp.fit(x, y)
    def _predict(xin, return_std=False, return_cov=False):
        if centerize_x:
            xin, _ = centerize(xin, xmin, xmax)
        else:
            xin = np.asarray(xin)
        xin = xin[:,None]
        return gp.predict(xin, return_std=return_std, return_cov=return_cov)
    if get_obj:
        return _predict, gp
    else:
        return _predict
        
def lognormal_weight(xi, x, delta=0.3):
    '''
    lognormal weighting function for different points, from https://arxiv.org/pdf/2009.12045.pdf
    xi: array_like, (M,), data points
    x: array_like, (N,), points to evaluate 
    delta: correlation length, larger delta, more smooth
    return: ndarray, (M, N), the weight matrix
    '''
    xi, x = np.meshgrid(xi, x, indexing='ij')
    return np.exp(-np.log(x/xi)**2/2./delta**2)

def normal_weight(xi
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值