python dataframe 统计检查处理

统计检查:

import pywt
import pandas as pd
import numpy as np
import random
import os
import lightkurve as lk
import seaborn as sns
import matplotlib.pyplot as plt
from keras.models import Sequential, Model
from keras.layers import Dense,Dropout,Conv1D, Conv2D, MaxPooling1D,LeakyReLU,Flatten,BatchNormalization,MaxPooling1D,Input,ReLU,LSTM,GlobalAveragePooling1D
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, normalize 
from tensorflow.keras import callbacks,utils
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,plot_confusion_matrix,precision_score,recall_score,f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from scipy import ndimage
from sklearn import  metrics
from sklearn.metrics import roc_curve,roc_auc_score,plot_roc_curve
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from imblearn.over_sampling import SMOTE
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from scipy.signal import savgol_filter
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.utils import shuffle
import time
%matplotlib inline
from imblearn.under_sampling import RandomUnderSampler
from sklearn.datasets import make_classification
from collections import Counter
from keras.optimizers import Adam
from scipy import interpolate
from tensorflow.keras.utils import to_categorical,plot_model
from keras import regularizers  # 过拟合正则化
from keras.regularizers import l2
import tensorflow as tf
import keras.backend as K
from attention import Attention

# 解决中文显示问题
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 当前目录文件列表
os.listdir()
# 拼接文件,添加列
Xmatch = MISC.append(MISC_SR).append(DCEP).append(DSCT).append(RRAB).append(RRC).append(MIRA)
Xmatch['type'] = 'V'
Xmatch['label'] = 2
# 去重
Xmatch = Xmatch[~Xmatch.duplicated('TIC')]   
Xmatch
# 拼接
df = pd.concat([Xmatch.iloc[:,:2], Xmatch.iloc[:,-3:], Xmatch.iloc[:,2:-3]], axis =1)
df.head()
# 过滤nan值,过滤空值
df = df.dropna(subset=['0'])
df = df[~df.isnull().any(axis=1)]  
#保存
df.to_csv('XmatchV_spoc120s_multiperiod_500Pmm_cacd.csv',index = False)
# 保存 ndarray为npy文件,并读取
np.save('v32TIC',temp)
npzfile0=np.load('v32TIC.npy') #保存一份数据
print(npzfile0)
# 取出列
test = df[['tic_id','Disposition','Sectors']]
test
# list 导出 txt,并换行
with open('ddtic.txt','w') as fp:
    [fp.write(str(item)+'\n') for  item in ddsearch]
    fp.close()
#统计
tces['Disposition'].value_counts()
#检查重复
tces[tces.duplicated('tic_id')]
#合并两个列表
v = list(zip(v1, v2))
v 
# 有nan值

# flux_list[0] Nan的个数:
print(len(flux_list[0]), len(flux_list[0][np.isnan(flux_list[0])]))

# time_list[0] Nan的个数:
print(len(time_list[0]), len(time_list[0][np.isnan(time_list[0])]))
# 多重条件筛选
data.loc[(data['type']=='KP') & (data['label']==0)]
data.loc[(data['type']=='KP') & (data['label']==0)].index ,data.shape
# 删除冲突
data.drop(data.loc[(data['type']=='CP') & (data['label']==0)].index, inplace = True) ,data.shape
# 插入列
data.insert(3,'selfsector',sect)
vt2.insert(loc=1,column='type',value='V')
#以某列为索引
data.set_index('TIC', inplace=True)
data
result = pd.concat([test, data], axis=1).reindex(data.index)
result
# 重置索引,并保留原来的索引(TIC)
data.reset_index(drop=False, inplace = True)
df1.insert(2,'val',['']*1631)
df1
#转list
temp = df1['tessid'].tolist()
temp
# 取出列表交集
res1 = list(set(cp) & set(fp))  
res1
# 去掉某列
df1.drop(['TIC'], axis=1, inplace = True) 
# 抽样 
ne = data[data['type'] == 'EB']#.sample(n=3350) 
po = data.loc[(data['type']=='KP') | (data['type']=='CP')| (data['type']=='PC')]
# 按索引删除
update_df1 = data.drop(faidx)
# 按索引取出
update_df.loc[ebidx]   # 按索引取出行
# 按列索引修改列值
update_df.loc[ebidx,'astro']='EB'

# 修改列名
df1.columns = ['TIC ID', 'toiCount']

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值