Input contains NaN, infinity or a value too large for dtype(‘float64‘).

37 篇文章 2 订阅
1 篇文章 0 订阅

贴上出错代码:

import cv2 as cv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math
import os
import glob
import shutil
import scipy 
from scipy import misc
from sklearn.datasets import fetch_lfw_people
import matplotlib.pyplot as plt
import sys 
from pathlib import Path
from tqdm import tqdm # 进度条
from warnings import warn
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import pickle
from sklearn.preprocessing import LabelBinarizer

 

 

data1 = pd.DataFrame(data=None,columns=range(0,64*64))
def pic_to_vector(pic_dir,data):
    for file in os.listdir(pic_dir):
        if ".ipynb_checkpoints" not in file:
            img = cv.imread(os.path.join(pic_dir,file),2)
            #print(img)
            #print(img.shape,img.dtype)
            if img.shape[0] >= 512:    ####波纹极光样例是1024*1024,而极地中心原始图片是512*512
                width = 64
                height = 64
                dim = (width,height)
                img = cv.resize(img,dim,interpolation=cv.INTER_AREA)
                img = img*255/65535
            #img_1_dimen = img.reshape(1,512*512)
            img_1_dimen = img.reshape(1,64*64)
            line1 = pd.DataFrame(img_1_dimen,dtype=np.uint8)
            #print(line1)
            data = pd.concat([data,line1],axis=0)
            #print(data)
            
    return data

if __name__ == '__main__':
    #print("ok")
    #data_dir = '/data/UserData/ASA_airglow_image/XLT_ASA01_IIT_L01_STP/2018/'
    save_dir1 = '/home/zhongjia/aurora/choose_benchmark/YES/'
    save_dir2 = '/home/zhongjia/aurora/choose_benchmark/NO/'
    
    data1 = pic_to_vector(save_dir1,data1)
    data1 = pic_to_vector(save_dir2,data1)
    
#     yes = np.ones((31,1),dtype=np.int)
#     no  = np.zeros((47,1),dtype=np.int)
#     yes = pd.DataFrame(yes)
#     no = pd.DataFrame(no)
#     label = pd.concat([yes,no],axis=0)
    label = pd.DataFrame(data=None,columns=range(0,1))
    yes = np.ones((1,1),dtype=np.int8)
    yes = pd.DataFrame(yes)
    no = np.zeros((1,1),dtype=np.int8)
    no = pd.DataFrame(no)
    for i in range(31):
        label = pd.concat([label,yes],axis=0)
    for i in range(47):
        label = pd.concat([label,no],axis=0)
     
    #print(label.shape[0],label.shape[1],label.shape,label)
    #print(data1.shape[0],data1.shape[1],data1.shape,data1)
    data1 = pd.concat([data1,label],axis=1)
    #print(type(data1),data1.shape[0],data1.shape[1],data1.shape,data1)
    
    data1.dropna(inplace=True) #缺失值处理函数dropna:去除数据结构中值为空得数据
    scaler = MinMaxScaler()##实例化
    print(type(data1),data1.shape)
    
    #scaler = StandardScaler()
#     scaler = scaler.fit(data1.iloc[:,0:64*64]) #fit在这里本质上生成min(x)和max(x)
#     sample = scaler.transform(data1.iloc[:,0:64*64]) ##通过接口导出结果,导出为数组
    scaler = scaler.fit(data1)
    sample = scaler.transform(data1)
    print(type(sample),sample.shape)
    
    tt = data1.iloc[:,0:64*64]
    tt = tt.values
    print(tt.shape,type(tt))
    #print(type(tt.values),tt.values)
    print(np.isnan(sample).any())
    print(pd.isna(tt).any())
    print(pd.isna(sample).any())
    print(np.isfinite(sample).all())

    
    #print(sample_XY.shape)
    #sample_XY = scaler.fit_transform(sample_xy)
    #X_train, X_test, Y_train, Y_test = train_test_split(sample_XY.iloc[:,1:85].values, sample_XY.iloc[:,86].values,test_size=0.3)
    #sample_X,sample_Y = shuffle(sample[:,0:64*64], data1.iloc[:,64*64].values)
    sample_X,sample_Y = shuffle(sample[:,0:64*64], sample[:,64*64])
    X_train, X_test, Y_train, Y_test = train_test_split(sample_X,sample_Y,test_size=0.3,random_state=10)
        
    
    # 每个图像的大小是62*47
    # 在这里我们把每个像素点当做是一个特征,用PCA降维


    pca = PCA(n_components=4*4,whiten=True, random_state=42)
    svc = SVC(kernel='rbf',class_weight='balanced')
    model = make_pipeline(pca, svc)


    
    param_grid = {'svc__C': [1, 5, 10],
                  'svc__gamma': [0.0001, 0.0005, 0.001]}  #C的默认值一般是1,gamma是高斯核的核宽度参数,也就是径向作用范围
    grid = GridSearchCV(model, param_grid)
    #print(X_train.shape,Y_train.shape)
    print(Y_train)
    grid.fit(X_train, Y_train) #建立模型
    #grid.fit(X_train, Y_train.astype('int')) #建立模型
    print(grid.best_params_) #输出模型的参数组合

    model = grid.best_estimator_  #输出最好的模型

    yfit = model.predict(X_test)  #用当前最好的模型做预测

报错:

虽然data1里面没有Nan,但是报错了

Input contains NaN, infinity or a value too large for dtype('float64').

将标红代码

line1 = pd.DataFrame(img_1_dimen,dtype=np.uint8)

改为

line1 = pd.DataFrame(img_1_dimen,dtype=np.float64)

正确!!

 

可能是读取图片时,很多像素为0的点导致的。

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值