用来平滑和标记异常点的简单脚本

FurenDeng

已于 2022-04-11 22:14:10 修改

阅读量1k

点赞数

分类专栏：代码库文章标签： python 开发语言

于 2021-11-28 14:34:07 首次发布

本文链接：https://blog.csdn.net/qq_41748940/article/details/121591049

版权

一维平滑：Iterative Smoother和Gaussian Process

Iterative Smoother：来自这篇论文
Gaussian Process：来自scikit-learn
代码如下：

import numpy as np
import matplotlib.pyplot as plt
from numba import njit, prange
from astropy.cosmology import Planck15 as plk15
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel
import warnings

def centerize(x, xmin, xmax, std=None):
    x = np.asarray(x)
    xcenter = (xmin + xmax)/2.0
    scale = (xmax - xmin)/2.0
    if scale == 0:
        return x, std
    if std is not None:
        std = np.asarray(std)/scale
    return (x-xcenter)/scale, std

def uncenterize(x, xmin, xmax, std=None):
    x = np.asarray(x)
    xcenter = (xmin + xmax)/2.0
    scale = (xmax - xmin)/2.0
    if std is not None:
        std = np.asarray(std)*scale
    return x*scale + xcenter, std

def CRBF_kernel(constant_value=1.0, constant_value_bounds=(1e-3, 1e3), length_scale=0.1, length_scale_bounds=(1e-3, 1e3)):
    '''
    ConstantKernel * RBF see website below for details:
    https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.ConstantKernel.html#sklearn.gaussian_process.kernels.ConstantKernel
    https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.kernels.RBF.html#sklearn.gaussian_process.kernels.RBF
    https://scikit-learn.org/stable/modules/gaussian_process.html#gp-kernels
    '''
    kernel = ConstantKernel(constant_value=constant_value, constant_value_bounds=constant_value_bounds) * RBF(length_scale=length_scale, length_scale_bounds=length_scale_bounds)
    return kernel

def no_optimizer(obj_func, initial_theta, bounds):
    '''
    Do not do any optimization, equivalent to setting bounds='fixed' in kernel function
    '''
    return initial_theta, 1.e-6

def gaussian_process(x, y, kernel=CRBF_kernel(), alpha=0.0, optimizer='fmin_l_bfgs_b', n_restarts_optimizer=0, get_obj=False, normalize_y=True, centerize_x=True, **kwargs):
    '''
    Wrapper for GaussianProcessRegressor in scikit-learn, see https://scikit-learn.org/stable/modules/generated/sklearn.gaussian_process.GaussianProcessRegressor.html#sklearn.gaussian_process.GaussianProcessRegressor
    kernel: kernel instance, default=None, arguments for kernel functions are typically, (parameter, bounds), where bounds can be tuple or 'fixed'\n
    alpha: float or ndarray of shape (n_samples,), default=1e-10. Value added to the diagonal of the kernel matrix during fitting. To avoid numerical problem and can also be interpreted as the variance of additional Gaussian measurement noise on the training observations. NOTE, in this function, if normalize_y, I will normalize alpha in the same time.\n
    normalize_y: normalize the target values y by removing the mean and scaling to unit-variance\n
    centerize_x: normalize x so that x.min() is -1 and x.max() is 1\n
    optimizer: 'fmin_l_bfgs_b' or callable, default='fmin_l_bfgs_b';, optimizing the kernel’s parameters. Signature:\n
        def optimizer(obj_func, initial_theta, bounds):\n
            return theta_opt, func_min\n
    n_restarts_optimizer: int, default 0.\n
    return:\n
        if get_obj:\n
            fit_func, gp_obj\n
        else:\n
            fit_func\n
        NOTE: fit_func can accept either return_std and return_cov, default are both False\n
    '''
    x = np.asarray(x)
    xmin = x.min()
    xmax = x.max()
    if centerize_x:
        x, _ = centerize(x, xmin, xmax)
    y = np.asarray(y)
    x = x[:,None]
    if normalize_y:
        alpha = alpha/y.std(axis=0)**2
    gp = GaussianProcessRegressor(kernel=kernel, alpha=alpha, optimizer=optimizer, n_restarts_optimizer=n_restarts_optimizer, normalize_y=normalize_y, **kwargs)
    gp.fit(x, y)
    def _predict(xin, return_std=False, return_cov=False):
        if centerize_x:
            xin, _ = centerize(xin, xmin, xmax)
        else:
            xin = np.asarray(xin)
        xin = xin[:,None]
        return gp.predict(xin, return_std=return_std, return_cov=return_cov)
    if get_obj:
        return _predict, gp
    else:
        return _predict
        
def lognormal_weight(xi, x, delta=0.3):
    '''
    lognormal weighting function for different points, from https://arxiv.org/pdf/2009.12045.pdf
    xi: array_like, (M,), data points
    x: array_like, (N,), points to evaluate 
    delta: correlation length, larger delta, more smooth
    return: ndarray, (M, N), the weight matrix
    '''
    xi, x = np.meshgrid(xi, x, indexing='ij')
    return np.exp(-np.log(x/xi)**2/2./delta**2)

def normal_weight(xi

最低0.47元/天解锁文章

FurenDeng

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
用来平滑和标记异常点的简单脚本

一维平滑：Iterative Smoother和高斯过程Iterative Smoother：来自这篇论文高斯过程：来自scikit-learn代码如下：import numpy as npimport matplotlib.pyplot as pltfrom numba import njit, prangefrom astropy.cosmology import Planck15 as plk15from sklearn.gaussian_process import Gaussian
复制链接

扫一扫

专栏目录