多个文件数据统计分布_wafer文件数据-CSDN博客

本文链接：https://blog.csdn.net/hunterli1029/article/details/128798819

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab 
from scipy.stats import norm
from scipy import stats
import math
from scipy.stats import kstest
import statsmodels.api as sm
A=2#用A来控制文件的数据生成到不同文件下
wafer_0=pd.read_excel(f'C:/Users/Desktop/统计/{A}/HCS5909B_3V6_KB23392#{A}_LogS1A.xlsx',skiprows=6)#导入文件
data_wafer0=wafer_0.drop(index=[0,1,2],axis=0)
data_wafer=data_wafer0.reset_index(drop=False)#数据部分
wafer_limit0=wafer_1.loc[0:2]#门限值
wafer_interval=wafer_limit0.drop(['Index','Date','StationNumb','ABStation','Site','X','Y','SoftBIN'],axis=1)#专栏

#series数据进行转换类型，object→float64
HardBIN_float = pd.to_numeric(data_wafer["HardBIN"],errors='coerce')
HardBIN_count=len(HardBIN_float)
title_count="N: %1.0f" % HardBIN_count
#bin1：pass，bin2：OS问题，bin3：充电电压问题，bin4：充电电流问题，bin5：晶振频率问题，bin7：静态工作电流问题
HardBIN_info_sortcount = HardBIN_float.value_counts(sort=False,normalize=True)
index_cout=len(HardBIN_info_sortcount.index)#对其数据类别计数，完成多个问题的分析
#作饼图
fig=plt.figure(figsize=(29.7,21))
ax1=fig.add_subplot(121)
colors=['LimeGreen','DarkRed','Maroon','Brown','FireBrick','IndianRed']
HardBIN_pie_label=['bin1','bin2','bin3','bin4','bin5','bin7']
patches,l_text,p_text = ax1.pie(HardBIN_info_sortcount,labels=HardBIN_pie_label,autopct= '%1.2f%%',explode=(0,0.15,0.15,0.15,0.15,0.15),labeldistance = 1.18,pctdistance = 1.08,colors=colors)
for t in l_text:#改变label字体大小
    t.set_size(15)
for t in p_text:#改变数据字体大小
    t.set_size(15)
plt.title('总体占比',fontsize=30)
plt.axis('equal')# 显示为圆（避免比例压缩为椭圆）
plt.legend(fontsize=15,title=f"{title_count}")
#fail饼图
HardBIN_fail_list=[]
for x in HardBIN_float:
    if x!=1:
        HardBIN_fail_list.append(x)
HardBIN_fail = pd.Series(data=HardBIN_fail_list)
HardBIN_fail_count=len(HardBIN_fail)
title_count="N: %1.0f" % HardBIN_fail_count
HardBIN_fail_sortcount = HardBIN_fail.value_counts(sort=False,normalize=True)
order = HardBIN_fail_sortcount.sort_index()#对fail分类降序，与标签对齐
ax2=fig.add_subplot(122)
colors=['DarkRed','Maroon','Brown','FireBrick','IndianRed']
HardBIN_pie_label_fail=['bin2','bin3','bin4','bin5','bin7']
patches,l_text,p_text = ax2.pie(order,labels=HardBIN_pie_label_fail,autopct= '%1.2f%%',explode=(0.1,0.1,0.1,0.1,0.1),labeldistance = 1.18,pctdistance = 1.08,colors=colors)
for t in l_text:
    t.set_size(15)
for t in p_text:
    t.set_size(15)
plt.rcParams['font.sans-serif']=['SimHei']#解决坐标内无法显示中文问题
plt.rcParams['axes.unicode_minus']=False#解决不显示负号
plt.title('fail占比',fontsize=30)
plt.axis('equal')# 显示为圆（避免比例压缩为椭圆）
plt.legend(fontsize=15,title=f"{title_count}")
plt.savefig(f'C:/Users/Desktop/统计/{A}/HardBIN_统计分布图.png',dpi=500,bbox_inches='tight')
plt.show()

#series数据进行转换类型，object→float64
OS_VCC_float = pd.to_numeric(data_wafer["OS_VCC"],errors='coerce')
#要对pd.to_numeric进行赋值才有变化
#print (help(pd.to_numeric))
#算OS_VCC总体各项指标
OS_VCC_info = OS_VCC_float.dropna(axis=0)#删除空数据
OS_VCC_info_count=len(OS_VCC_info)#对总体计数
title_count="N: %1.0f" % OS_VCC_info_count
OS_VCC_info_min=min(OS_VCC_info)
OS_VCC_info_max=max(OS_VCC_info)
title_min="min:%1.4f" % OS_VCC_info_min
title_max="max:%1.4f" % OS_VCC_info_max
OS_VCC_info_mean = np.mean(OS_VCC_info)
title_mean="μ:%1.4f" % OS_VCC_info_mean
OS_VCC_info_stigma = np.std(OS_VCC_info)
title_stigma="s:%1.4f" % OS_VCC_info_stigma
OS_VCC_info_var = np.var(OS_VCC_info)
title_var="s^2:%1.4f" % OS_VCC_info_var
pv=kstest(OS_VCC_info,'norm')
title_P="P: %1.2f" % pv.pvalue#检测正态性
OS_VCC_skew = OS_VCC_info.skew()
title_skew="Sk: %1.4f" % OS_VCC_skew
OS_VCC_kurt = OS_VCC_info.kurt()
title_kurt="bk: %1.4f" % OS_VCC_kurt
OS_VCC_SE=(OS_VCC_info_mean/math.sqrt(OS_VCC_info_count))
title_SE="SEM: %1.4f" % OS_VCC_SE
OS_VCC_CI=(OS_VCC_info_mean-(1.96*OS_VCC_SE),OS_VCC_info_mean+(1.96*OS_VCC_SE))
title_CI="95CI: %1.4f-%1.4f" % OS_VCC_CI
#整理总体数据归类
OS_VCC_int = round(OS_VCC_info,2)
OS_VCC_info_sortcount = OS_VCC_int.value_counts(sort=False,normalize=True)#pd.series中value_counts对其计数
OS_VCC_density=OS_VCC_info_sortcount.sort_index(ascending=True)#对index进行重新排序
OS_VCC_ylim=(OS_VCC_density[OS_VCC_density>=0.001])#用值筛选index，再用index导出数据
OS_VCC_other=(OS_VCC_density[OS_VCC_density<=0.001])#筛选出其它
OS_VCC_other_sum=sum(OS_VCC_other) #print(OS_VCC_other_sum)
OS_VCC_ylim['other']=OS_VCC_other_sum#对series增加数据
#print(OS_VCC_ylim)
OS_VCC_xlim=OS_VCC_ylim.index#柱形图X轴坐标
#print (OS_VCC_xlim)
#作柱形图
fig=plt.figure(figsize=(29.7,21))
ax1=fig.add_subplot(321)
ax1.bar(OS_VCC_xlim.map(str),OS_VCC_ylim,0.9,label='OS_VCC',color='deepskyblue')
#细节处理
plt.rcParams['font.sans-serif']=['SimHei']#解决坐标内无法显示中文问题
plt.rcParams['axes.unicode_minus']=False#解决不显示负号
plt.xlabel("OS_VCC/V",fontsize=15)
plt.ylabel("density",fontsize=15)
plt.legend(fontsize=15,title=f"{title_min}\n{title_max}\n{title_count}")
sns.despine()
plt.title('OS_VCC总体密度柱形图',fontsize=15)
#做总体密度曲线图和正态态图
ax2=fig.add_subplot(322)
OS_VCC_xlim_1=OS_VCC_info_mean-3*OS_VCC_info_stigma,OS_VCC_info_mean+3*OS_VCC_info_stigma
OS_VCC_float.plot(kind='kde',style='-k',xlim=(OS_VCC_xlim_1),label='密度曲线')
OS_VCC_bins=np.arange(OS_VCC_info_mean-3*OS_VCC_info_stigma,OS_VCC_info_mean+3*OS_VCC_info_stigma,0.001)
n,bins,patches = plt.hist(OS_VCC_float,density=True,bins=OS_VCC_bins,range=(OS_VCC_xlim_1),facecolor='lightgreen',alpha=0)
y = stats.norm.pdf(bins,loc=OS_VCC_info_mean,scale=OS_VCC_info_stigma)
ax2.plot (bins,y,'r--',label='偏态分布')
plt.xlabel("OS_VCC/V",fontsize=15)
plt.ylabel("probability",fontsize=15)
plt.legend(fontsize=15,title=(f"{title_mean}\n{title_stigma}\n{title_var}\n{title_P}\n{title_skew}\n{title_kurt}\n{title_SE}\n{title_CI}"))
sns.despine()
plt.title('OS_VCC总体非正态图',fontsize=15)
#筛选出bin1
OS_VCC_pass_list=[]
OS_VCC_fail_list=[]
for x in OS_VCC_info:
    if (x>=(-0.9))&(x<=(-0.2)):
        OS_VCC_pass_list.append(x)
    else:
        OS_VCC_fail_list.append(x)
OS_VCC_pass = pd.Series(data=OS_VCC_pass_list)#list→float
#算OS_VCC_bin1的各项指标
OS_VCC_pass_count=len(OS_VCC_pass)#对bin1计数
title_count="N: %1.0f" % OS_VCC_pass_count
OS_VCC_pass_min=min(OS_VCC_pass)
OS_VCC_pass_max=max(OS_VCC_pass)
title_min="min:%1.4f" % OS_VCC_pass_min
title_max="max:%1.4f" % OS_VCC_pass_max
OS_VCC_pass_mean = np.mean(OS_VCC_pass)
title_mean="μ:%1.4f" % OS_VCC_pass_mean
OS_VCC_pass_stigma = np.std(OS_VCC_pass)
title_stigma="s:%1.4f" % OS_VCC_pass_stigma
OS_VCC_pass_var = np.var(OS_VCC_pass)
title_var="s^2:%1.4f" % OS_VCC_pass_var
pv=kstest(OS_VCC_pass,'norm')
title_P="P: %1.2f" % pv.pvalue#检测正态性
OS_VCC_skew = (OS_VCC_pass.skew()) 
OS_VCC_kurt = (OS_VCC_pass.kurt())
title_skew="Sk: %1.4f" % OS_VCC_skew
title_kurt="bk: %1.4f" % OS_VCC_kurt
OS_VCC_SE=(OS_VCC_pass_mean/math.sqrt(OS_VCC_pass_count))
title_SE="SEM: %1.4f" % OS_VCC_SE
OS_VCC_CI=(OS_VCC_pass_mean-(1.96*OS_VCC_SE),OS_VCC_pass_mean+(1.96*OS_VCC_SE))
title_CI="95CI: %1.4f-%1.4f" % OS_VCC_CI
#bin1密度柱形图
OS_VCC_pass_int = round(OS_VCC_pass,2)
OS_VCC_pass_info = OS_VCC_pass_int.value_counts(sort=False,normalize=True)#pd.series中value_counts对其计数
OS_VCC_density=OS_VCC_pass_info.sort_index(ascending=True)#对index进行重新排序
OS_VCC_ylim=(OS_VCC_density[OS_VCC_density>=0.001])#用值筛选index，再用index导出数据
OS_VCC_other=(OS_VCC_density[OS_VCC_density<=0.001])#筛选出其它
OS_VCC_other_sum=sum(OS_VCC_other) #print(OS_VCC_other_sum)
OS_VCC_ylim['other']=OS_VCC_other_sum#对series增加数据
#print(OS_VCC_ylim)
OS_VCC_xlim=OS_VCC_ylim.index#柱形图X轴坐标
#print (OS_VCC_xlim)
ax3=fig.add_subplot(323)
ax3.bar(OS_VCC_xlim.map(str),OS_VCC_ylim,0.9,label='L>=-0.9\nH<=-0.2',color='limegreen')
#细节处理
plt.rcParams['font.sans-serif']=['SimHei']#解决坐标内无法显示中文问题
plt.rcParams['axes.unicode_minus']=False#解决不显示负号
plt.xlabel("OS_VCC/V",fontsize=15)
plt.ylabel("density",fontsize=15)
plt.legend(fontsize=15,title=f"{title_min}\n{title_max}\n{title_count}")
sns.despine()
plt.title('OS_VCC_bin1_密度柱形图',fontsize=15)
#做bin1密度曲线图及正态图
ax4=fig.add_subplot(324)
OS_VCC_xlim_0=(OS_VCC_pass_mean-3*OS_VCC_pass_stigma,OS_VCC_pass_mean+3*OS_VCC_pass_stigma)
#print (OS_VCC_xlim_0)
OS_VCC_pass_bins = np.arange(OS_VCC_pass_mean-3*OS_VCC_pass_stigma,OS_VCC_pass_mean+3*OS_VCC_pass_stigma,0.001)
OS_VCC_pass.plot(kind='kde',style='-k',xlim=OS_VCC_xlim_0,label='密度曲线')
n,bins,patches = plt.hist(OS_VCC_pass,bins=OS_VCC_pass_bins,range=(OS_VCC_xlim_0),density=True,facecolor='limegreen',alpha=0)
y = stats.norm.pdf(bins,loc=OS_VCC_pass_mean,scale=OS_VCC_pass_stigma)
ax4.plot (bins,y,'r--',label='偏态分布')
#细节处理
plt.xlabel("OS_VCC/V",fontsize=15)
plt.ylabel("probability",fontsize=15)
plt.legend(fontsize=15,title=(f"{title_mean}\n{title_stigma}\n{title_var}\n{title_P}\n{title_skew}\n{title_kurt}\n{title_SE}\n{title_CI}"))
sns.despine()
plt.title('OS_VCC_bin1_非正态图',fontsize=15)
#对OS_VCC_fail进行统计
OS_VCC_fail = pd.Series(data=OS_VCC_fail_list)#类型转换fail→float
#算OS_VCC_fail的各项指标
OS_VCC_fail_count=len(OS_VCC_fail)
#加个判断语句，是否有fail数据
if OS_VCC_fail_count>0:
    title_count="N: %1.0f" % OS_VCC_fail_count
    OS_VCC_fail_min=min(OS_VCC_fail)
    title_min="min:%1.4f" % OS_VCC_fail_min
    OS_VCC_fail_max=max(OS_VCC_fail)
    title_max="max:%1.4f" % OS_VCC_fail_max
    OS_VCC_fail_int = round(OS_VCC_fail,2)
    OS_VCC_fail_info = OS_VCC_fail_int.value_counts(sort=False,normalize=True)#pd.series中value_counts对其计数
    OS_VCC_density=OS_VCC_fail_info.sort_index(ascending=True)#对index进行重新排序
    OS_VCC_ylim=(OS_VCC_density[OS_VCC_density>=0.01])#用值筛选index，再用index导出数据
    OS_VCC_other=(OS_VCC_density[OS_VCC_density<=0.01])#筛选出其它
    OS_VCC_other_sum=sum(OS_VCC_other) #print(OS_VCC_other_sum)
    OS_VCC_ylim['other']=OS_VCC_other_sum#对series增加数据
    #print(OS_VCC_ylim)
    OS_VCC_xlim=OS_VCC_ylim.index#柱形图X轴坐标
    #print (OS_VCC_xlim) 
    #作柱形图
    ax5=fig.add_subplot(325)
    ax5.bar(OS_VCC_xlim.map(str),OS_VCC_ylim,0.9,label='L<-0.9\nH>-0.2',color='red')
    plt.xlabel("OS_VCC/V",fontsize=15)
    plt.ylabel("density",fontsize=15)
    plt.legend(fontsize=15,title=f"{title_min}\n{title_max}\n{title_count}")
    sns.despine()
    plt.xticks(rotation=45)
    plt.title('OS_VCC_fail_密度柱形图',fontsize=15)
    #作总体良率饼状图
    ax6=fig.add_subplot(326)
    OS_VCC_pie_pass = OS_VCC_pass_count/OS_VCC_info_count
    OS_VCC_pie_fail = OS_VCC_fail_count/OS_VCC_info_count
    #print(OS_VCC_pie_pass)  
    OS_VCC_pie_label=['pass','fail']
    OS_VCC_pie_persent=[OS_VCC_pie_pass,OS_VCC_pie_fail]
    patches,l_text,p_text = ax6.pie(OS_VCC_pie_persent, labels=OS_VCC_pie_label,autopct= '%1.2f%%',explode=(0.1,0),colors=['limegreen','red'],shadow=True,labeldistance = 1.35,pctdistance = 1.18)
    for t in l_text:
        t.set_size(15)
    for t in p_text:
        t.set_size(15)
    plt.title('OS_VCC良率占比',fontsize=15)
    plt.axis('equal')# 显示为圆（避免比例压缩为椭圆）
    plt.legend(fontsize=15)
    plt.savefig(f'C:/Users/Desktop/统计/{A}/OS_VCC_统计分布图.png',dpi=500,bbox_inches='tight')
    plt.show()
else :
    ax5=fig.add_subplot(325)
    OS_VCC_pie_pass = OS_VCC_pass_count/OS_VCC_info_count
    #print(OS_VCC_pie_pass)  
    OS_VCC_pie_label=['pass']
    OS_VCC_pie_persent=[OS_VCC_pie_pass]
    patches,l_text,p_text = ax5.pie(OS_VCC_pie_persent, labels=OS_VCC_pie_label,autopct= '%1.0f%%',colors=['limegreen'],shadow=True,labeldistance = 1.35,pctdistance = 1.18)
    for t in l_text:
        t.set_size(15)
    for t in p_text:
        t.set_size(15)
    plt.title('OS_VCC良率占比',fontsize=15)
    plt.axis('equal')# 显示为圆（避免比例压缩为椭圆）
    plt.legend(fontsize=15)
    plt.savefig(f'C:/Users/Desktop/统计/{A}/OS_VCC_统计分布图.png',dpi=500,bbox_inches='tight')
    plt.show()

效果图：