合并几个文件,然后过滤出需要的列useful_cols
import os #合并几个文件,然后过滤出需要的列useful_cols
import pandas as pd
import numpy as np
import glob
from pandas.io.excel import ExcelWriter
df_all = pd.DataFrame()
useful_cols = ['Date', 'Time', 'SerialNum', 'TestDesc', 'PassFail', 'ResValue', 'ResMin', 'ResMax',
'SetValue1','SetValue2', 'SetValue4','SetValue5']
#test_list = ["VSG Power Accuracy High Outp","VSG Power Accuracy Low", "VSA Power Accuracy"]
for i in glob.glob('D:\CertData_collect\Test python\IQXL02939_20140503\*.csv', recursive=True):
df = pd.read_csv(i, low_memory=False, parse_dates=[['Date', 'Time']], usecols=useful_cols)
df_all = pd.concat([df_all,df], ignore_index=True) #合并所有CSV成一个,筛选出有用的列
df_all.to_excel("2939.xlsx" ,sheet_name="sheet1", index=False)
选择需要第一列里面的某一类,然后再过滤另一列的某几类!
import pandas as pd #选择需要第一列里面的某一类,然后再过滤另一列的某几类!
import numpy as np
data =pd.read_csv(r"D:\CertData_collect\Test python\IQXFinal20221205.csv",index_col=0,low_memory=False)
test_list = ["VSG Power Accuracy High Outp","VSG Power Accuracy Low","VSA Power Accuracy"] # "VSG Power Accuracy High Outp","VSG Power Accuracy Low","VSA Power Accuracy"
#temp = data.loc[data["TestDesc"].isin(test_list),:]
temp = data.loc[(data['SerialNum']=='IQXL02697') & (data["TestDesc"].isin(test_list)),:"SetUnit4"]
#选取第'SerialNum'列的里面是'IQ1218A3694'这一项 ,并且只选取列"TestDesc" 里面其中某几项的项目
#temp.iloc[:,:25].to_excel('3306.xlsx') #保存所以行以及前24列的数据保存未excel
temp.to_excel('3306.xlsx')
过滤某几项
import pandas as pd #过滤某几项
import numpy as np
data=pd.DataFrame()
test_list = ["VSG Power Accuracy High Outp","VSG Power Accuracy Low","VSA Power Accuracy"] #此项漏了,加上
data =pd.read_excel(r"D:\CertData_collect\Test python\2938.xlsx",index_col=0)
temp = data.loc[(data['SerialNum']=='IQXL02697') & (data["TestDesc"].isin(test_list)),:]
#选取第'SerialNum'列的里面是'IQ1218A3694'这一项 ,并且只选取列"TestDesc" 里面其中某几项的项目
temp.to_excel('3306.xlsx')
上面两个程序合并到一起
import os #上面两个程序1&2合并到一起
import pandas as pd
import numpy as np
import glob
df_all = pd.DataFrame()
useful_cols = ['Date', 'Time', 'SerialNum', 'TestDesc', 'PassFail', 'ResValue', 'ResMin', 'ResMax',
'SetValue1','SetValue2', 'SetValue4','SetValue5']
test_list = ["VSG Power Accuracy High Outp","VSG Power Accuracy Low", "VSA Power Accuracy"]
for i in glob.glob('D:\CertData_collect\Test python\IQXL02939_20140503\*.csv', recursive=True):
df = pd.read_csv(i, low_memory=False, parse_dates=[['Date', 'Time']], usecols=useful_cols)
df_all = pd.concat([df_all,df], ignore_index=True) #合并所有CSV成一个,筛选出有用的列
#df_all.to_excel("2938.xlsx" ,sheet_name="sheet1", index=False)
temp = df_all.loc[(df_all['SerialNum']=='IQXL02939') & (df_all["TestDesc"].isin(test_list)),:] #筛选出需要的VSA VSG
temp.to_excel("2939.xlsx" ,sheet_name="sheet1", index=False)
遍历文件夹下面的CSV文件,然后合并去重,参考Yangzy
import pandas as pd #遍历文件夹下面的CSV文件,然后合并去重,参考Yangzy
import numpy as np
from tqdm import tqdm
import os
result =pd.DataFrame()
path = r"D:\CertData_collect\Test python"
#path = r"D:\CertData_collect\Test python\IQXL02939_20140503\"
for file in os.listdir(path):
if file.endswith(".csv"):
df = pd.read_csv(file)
result = pd.concat([result,df[['Date','Time','SerialNum','TestDesc','PassFail','ResValue','ResMin','SetValue2','SetUnit5']]])
print(result.drop_duplicates())
uiqu_df = result.drop_duplicates()
uniq_df.to_csv("2312.csv")
对表格的多个列排序
sort1=pd.DataFrame() #对表格的多列排序
file = pd.read_excel(r"D:\CertData_collect\Test python\2939_sort_soure_vsa.xlsx",sheet_name="sheet1")
sort1=file.sort_values(["SetValue4","SetValue0","SetValue1"],ascending=[True,True,True])
#vsa 的setvalue is 4-0-1, vsg is 4-0-2
sort1.to_excel("2939sort_vsa.xlsx",index=False)
对表格进行分割成不同的sheet,写到一个excel里面
# slice the 937clums data to vsa &vsg high &vsg low
import os
import pandas as pd
import numpy as np
import glob
from pandas.io.excel import ExcelWriter
from matplotlib import pyplot as plt
df = pd.read_excel(r"D:\CertData_collect\Test python\2939.xlsx") #将937列整的数据分切到不同的df,然后写入不同的sheet
df_acc = df.groupby("TestDesc")
df_vsg_low=df_acc.get_group('VSG Power Accuracy Low')
df_vsg_high=df_acc.get_group('VSG Power Accuracy High Outp')
df_vsa=df_acc.get_group('VSA Power Accuracy')
with pd.ExcelWriter('2939_vsghighlow_vsa.xlsx') as writer:# 将多个df写到不同的sheet
df_vsg_low.to_excel(writer,sheet_name="VSG_Low", index=False)
df_vsg_high.to_excel(writer,sheet_name="VSG_High", index=False)
df_vsa.to_excel(writer,sheet_name="VSA", index=False)
切分937项到三个不同df,然后把df按照frequency,power accuracy 排序,后面生成三个不同的sheet
df = pd.read_excel(r"D:\CertData_collect\Test python\2939.xlsx") #切分937项到三个不同df,然后把df按照frequency,power accuracy 排序,后面生成三个不同的sheet
df_acc = df.groupby("TestDesc")
df_vsg_low=df_acc.get_group('VSG Power Accuracy Low')#切需要的列里面的子项
df_sort_vsg_low = df_vsg_low.sort_values(["SetValue5","SetValue0","SetValue2"],ascending=[True,True,True])#多列排序
df_vsg_high=df_acc.get_group('VSG Power Accuracy High Outp')
df_sort_vsg_high = df_vsg_high.sort_values(["SetValue4","SetValue0","SetValue2"],ascending=[True,True,True])
df_vsa=df_acc.get_group('VSA Power Accuracy')
df_sort_vsa = df_vsa.sort_values(["SetValue4","SetValue0","SetValue2"],ascending=[True,True,True])
with pd.ExcelWriter('2939_vsghighlow_vsa.xlsx') as writer:# 将多个df写到不同的sheet
df_sort_vsg_low.to_excel(writer,sheet_name="VSG_Low", index=False)
df_sort_vsg_high.to_excel(writer,sheet_name="VSG_High", index=False)
df_sort_vsa.to_excel(writer,sheet_name="VSA", index=False)
一个工作表分成不同的工作簿
file_path=r"D:\jupyter\good\3066.xlsx"
data=pd.read_excel(file_path,sheet_name= "sheet1")
pro_data= data.groupby("Date_Time")
"""for i,j in pro_data:# 一个工作表分成不同的工作簿
new_file_path="D:\jupyter\good\split" + i +".xlsx"
j.to_excel(new_file_path,sheet_name=i,index=False)"""
#workbook=pd.ExcelWriter("3066split.xlsx")
workbook=pd.ExcelWriter(r"D:\jupyter\good\split\3066split.xlsx") #把一个工作表分成不同的工作表
for i,j in pro_data:
j.to_excel(workbook,sheet_name=i,index = False)
workbook.save()