目录
一、写在前面
1.仅作代码记录
2.筛选规则:①白云母或者伊利石含量为0的数值,Assay表里Pos2200,Dep2200,Dep1900,以及IC值都不需要②如果白云母或伊利石与蒙脱石同时存在,Pos2200,Dep2200,Dep1900,以及IC值设为空值③如果绿泥石(各种绿泥石,只要含有绿泥石字段就算)在直方图中的含量全为0,Pos2250,Dep2250这两个值设为空值
二、常用代码片段
1.忽略警告信息
warnings.filterwarnings("ignore")
2.获取目录下的所有文件名
base_dir = r"C:\Users\zsllsz2022\Desktop\蚀变基础数据"
fileParent = [os.path.join(base_dir, file) for file in os.listdir(base_dir)]
3.修改文件夹下二级子目录所有文件后缀
changePath = []
base_dir = r"C:\Users\zsllsz2022\Desktop\蚀变基础数据"
fileParent = [os.path.join(base_dir, file) for file in os.listdir(base_dir)]
for eachParent in fileParent:
filesSon = [os.path.join(eachParent, file) for file in os.listdir(eachParent)]
for eachFile in filesSon:
if ").osm" in eachFile:
changePath.append(eachFile)
for eachPath in changePath:
newName = eachPath.split(".")[0]+".csv"
os.rename(eachPath,newName)
4.删除DataFrame中的全0行
#删除CSV文件多余数据并按各工程新建文件夹后保存CSV文件
for eachPath in changePath:
data = pd.read_csv(eachPath,encoding="gbk",on_bad_lines='skip')[:-1] #删除最后一行中文
data = data.loc[(data!=0).any(axis=1)] #删除全0行,只保存有效数据部分
5.保存文件;如果路径不存在则新建文件
#删除CSV文件多余数据并按各工程新建文件夹后保存CSV文件
for eachPath in changePath:
data = pd.read_csv(eachPath,encoding="gbk",on_bad_lines='skip')[:-1] #删除最后一行中文
data = data.loc[(data!=0).any(axis=1)] #删除全0行,只保存有效数据部分
#路径不存在时候新建工程路径
if not os.path.exists("C:\\Users\\zsllsz2022\\Desktop\\提取蚀变基础数据\\"+eachPath.split("\\")[-2]):
os.makedirs( "C:\\Users\\zsllsz2022\\Desktop\\提取蚀变基础数据\\"+eachPath.split("\\")[-2])
#保存文件
data.to_csv("C:\\Users\\zsllsz2022\\Desktop\\提取蚀变基础数据\\"+eachPath.split("\\")[-2]
+ "\\" + eachPath.split("\\")[-1],header=0)
6.DataFrame第一列插入数据
tempData.insert(loc=0,column="column",value=[filesSon[0].split("\\")[-2]]*len(tempData[2]))
三、完整代码
矿物基础数据与直方图-生成总表
import os
import pandas as pd
import warnings
import numpy as np
warnings.filterwarnings("ignore")
#①寻找文件夹中的所有osm文件
#②将osm文件后缀改为CSV文件
changePath = []
base_dir = r"C:\Users\zsllsz2022\Desktop\蚀变基础数据"
fileParent = [os.path.join(base_dir, file) for file in os.listdir(base_dir)]
for eachParent in fileParent:
filesSon = [os.path.join(eachParent, file) for file in os.listdir(eachParent)]
for eachFile in filesSon:
if ").osm" in eachFile:
changePath.append(eachFile)
for eachPath in changePath:
newName = eachPath.split(".")[0]+".csv"
os.rename(eachPath,newName)
#重新寻找所有CSV后缀文件
changePath = []
base_dir = r"C:\Users\zsllsz2022\Desktop\蚀变基础数据"
fileParent = [os.path.join(base_dir, file) for file in os.listdir(base_dir)]
for eachParent in fileParent:
filesSon = [os.path.join(eachParent, file) for file in os.listdir(eachParent)]
for eachFile in filesSon:
if ").csv" in eachFile:
changePath.append(eachFile)
#删除CSV文件多余数据并按各工程新建文件夹后保存CSV文件
for eachPath in changePath:
data = pd.read_csv(eachPath,encoding="gbk",on_bad_lines='skip')[:-1] #删除最后一行中文
data = data.loc[(data!=0).any(axis=1)] #删除全0行,只保存有效数据部分
#路径不存在时候新建工程路径
if not os.path.exists("C:\\Users\\zsllsz2022\\Desktop\\提取蚀变基础数据\\"+eachPath.split("\\")[-2]):
os.makedirs( "C:\\Users\\zsllsz2022\\Desktop\\提取蚀变基础数据\\"+eachPath.split("\\")[-2])
#保存文件
data.to_csv("C:\\Users\\zsllsz2022\\Desktop\\提取蚀变基础数据\\"+eachPath.split("\\")[-2]
+ "\\" + eachPath.split("\\")[-1],header=0)
#建立各工程矿物基础数据
#2200的表分列以后得E列放到矿物基础数据的F列,I列放到G列,J列放到H列
#2250的表E列放I列,J列放J列
base_dir = r"C:\Users\zsllsz2022\Desktop\提取蚀变基础数据"
fileParent = [os.path.join(base_dir, file) for file in os.listdir(base_dir)]
count = 0 #如果正确打开,则count应该为67*2
for eachParent in fileParent:
filesSon = [os.path.join(eachParent, file) for file in os.listdir(eachParent)]
# print(filesSon[0].split("\\")[-2]) #工程名-备用
# print(eachParent)
# for eachPath in filesSon:
# print(eachPath)
# print("---")
# print(filesSon)
if "2200" in filesSon[0]:
data2200 = pd.read_csv(filesSon[0],header=None)
count += 1
if "2250" in filesSon[1]:
data2250 = pd.read_csv(filesSon[1],header=None)
count += 1
tempData = data2250.iloc[:,:4].copy()
tempData["4"] = 0 #蚀变矿物
#A0 B1 C2 D3 E4 F5 G6 H7 I8 J9
tempData["5"] = data2200.iloc[:,4] #2200第E列放到基础数据F列
tempData["6"] = data2200.iloc[:,8] #2200第I列放到基础数据G列
tempData["7"] = data2200.iloc[:,9] #2200第J列放到基础数据H列
tempData["8"] = data2250.iloc[:,4] #2250第J列放到基础数据I列
tempData["9"] = data2250.iloc[:,9] #2250第J列放到基础数据J列
tempData["10"] = tempData["5"]/tempData["6"]
tempData.insert(loc=0,column="column",value=[filesSon[0].split("\\")[-2]]*len(tempData[2]))
tempData.insert(loc=1,column="样品顺序",value=[i for i in range(1,len(tempData[2])+1)])
tempData.to_excel("C:\\Users\\zsllsz2022\\Desktop\\提取蚀变基础数据\\"+filesSon[0].split("\\")[-2]
+ "\\" + "矿物基础数据.xlsx",header=0,index=False)
print(count)
#提取基础矿物数据Z坐标
base_dir = r"C:\Users\zsllsz2022\Desktop\2024年新处理"
fileParent = [os.path.join(base_dir, file) for file in os.listdir(base_dir)]
for eachParent in fileParent:
filesSon = [os.path.join(eachParent, file) for file in os.listdir(eachParent)]
for eachPath in filesSon:
if "矿物基础数据" in eachPath and "$" not in eachPath: #400kz有问题,不知道为什么有一个~$文件?
projectName = eachPath.split("\\")[-2]
# print(eachPath)
dataZ = pd.read_excel(eachPath)
data = pd.read_excel("C:\\Users\\zsllsz2022\\Desktop\\提取蚀变基础数据\\"+projectName+"\\矿物基础数据.xlsx",header=None)
data[5] = dataZ["Z坐标"]
data[6] = dataZ["蚀变矿物"]
data.to_excel("C:\\Users\\zsllsz2022\\Desktop\\提取蚀变基础数据\\"+projectName+"\\矿物基础数据.xlsx",header=0,index=False)
#汇总总表表头
count = 0
base_dir = r"C:\Users\zsllsz2022\Desktop\提取蚀变基础数据"
fileParent = [os.path.join(base_dir, file) for file in os.listdir(base_dir)]
allTable = pd.DataFrame(columns=['工程名', "样品顺序",'样品号', 'X',"Y","Z","蚀变矿物","Dep2200",
"IC","Pos2200","Dep2250","Pos2250","Dep1900"])
for eachParent in fileParent:
filesSon = [os.path.join(eachParent, file) for file in os.listdir(eachParent)]
for eachPath in filesSon:
if "矿物基础数据" in eachPath and "$" not in eachPath:
count += 1
tempData = pd.read_excel(eachPath,header=None)
tempData.columns = ['工程名', "样品顺序",'样品号', 'X',"Y","Z","蚀变矿物","Dep2200",
"IC","Pos2200","Dep2250","Pos2250","Dep1900"]
allTable = pd.concat([allTable,tempData])
allTable.to_excel("C:\\Users\\zsllsz2022\\Desktop\\提取蚀变基础数据\总表.xlsx",index=False)
print("处理工程数=",count)
#根据直方图筛选
data = pd.read_excel("C:\\Users\\zsllsz2022\\Desktop\\提取蚀变基础数据\总表.xlsx")
for i in range(len(data["样品顺序"])):
# for i in range(600):
sampleNum = data["样品顺序"].iloc[i]
if data["蚀变矿物"].iloc[i] in ["白云母","伊利石","蒙脱石","高岭石"]:
if data["蚀变矿物"].iloc[i] == "伊利石":
dataSelct = pd.read_excel("C:\\Users\\zsllsz2022\\Desktop\\2024年新处理\\"+data["工程名"].iloc[i]+"\\矿物直方图.xlsx")
if "蒙脱石" in list(dataSelct.columns[5:]):
value = dataSelct["蒙脱石"].iloc[sampleNum-1] #-1是因为直方图中有表头
if value != 0:
print(i,data["工程名"].iloc[i],data["样品顺序"].iloc[i])
data["Dep2200"].iloc[i] = np.NaN
data["IC"].iloc[i] = np.NaN
data["Pos2200"].iloc[i] = np.NaN
data["Dep1900"].iloc[i] = np.NaN
if "高岭石" in list(dataSelct.columns[5:]):
value = dataSelct["高岭石"].iloc[sampleNum-1] #-1是因为直方图中有表头
if value != 0:
print(i,data["工程名"].iloc[i],data["样品顺序"].iloc[i])
data["Dep2200"].iloc[i] = np.NaN
data["IC"].iloc[i] = np.NaN
data["Pos2200"].iloc[i] = np.NaN
data["Dep1900"].iloc[i] = np.NaN
if data["蚀变矿物"].iloc[i] == "白云母":
dataSelct = pd.read_excel("C:\\Users\\zsllsz2022\\Desktop\\2024年新处理\\"+data["工程名"].iloc[i]+"\\矿物直方图.xlsx")
if "蒙脱石" in list(dataSelct.columns[5:]):
value = dataSelct["蒙脱石"].iloc[sampleNum-1] #-1是因为直方图中有表头
if value != 0:
print(i,data["工程名"].iloc[i],data["样品顺序"].iloc[i])
data["Dep2200"].iloc[i] = np.NaN
data["IC"].iloc[i] = np.NaN
data["Pos2200"].iloc[i] = np.NaN
data["Dep1900"].iloc[i] = np.NaN
if "高岭石" in list(dataSelct.columns[5:]):
value = dataSelct["高岭石"].iloc[sampleNum-1] #-1是因为直方图中有表头
if value != 0:
print(i,data["工程名"].iloc[i],data["样品顺序"].iloc[i])
data["Dep2200"].iloc[i] = np.NaN
data["IC"].iloc[i] = np.NaN
data["Pos2200"].iloc[i] = np.NaN
data["Dep1900"].iloc[i] = np.NaN
if data["蚀变矿物"].iloc[i] == "蒙脱石":
dataSelct = pd.read_excel("C:\\Users\\zsllsz2022\\Desktop\\2024年新处理\\"+data["工程名"].iloc[i]+"\\矿物直方图.xlsx")
if "伊利石" in list(dataSelct.columns[5:]):
value = dataSelct["伊利石"].iloc[sampleNum-1] #-1是因为直方图中有表头
if value != 0:
print(i,data["工程名"].iloc[i],data["样品顺序"].iloc[i])
data["Dep2200"].iloc[i] = np.NaN
data["IC"].iloc[i] = np.NaN
data["Pos2200"].iloc[i] = np.NaN
data["Dep1900"].iloc[i] = np.NaN
if "白云母" in list(dataSelct.columns[5:]):
value = dataSelct["白云母"].iloc[sampleNum-1] #-1是因为直方图中有表头
if value != 0:
print(i,data["工程名"].iloc[i],data["样品顺序"].iloc[i])
data["Dep2200"].iloc[i] = np.NaN
data["IC"].iloc[i] = np.NaN
data["Pos2200"].iloc[i] = np.NaN
data["Dep1900"].iloc[i] = np.NaN
if data["蚀变矿物"].iloc[i] == "高岭石":
dataSelct = pd.read_excel("C:\\Users\\zsllsz2022\\Desktop\\2024年新处理\\"+data["工程名"].iloc[i]+"\\矿物直方图.xlsx")
if "伊利石" in list(dataSelct.columns[5:]):
value = dataSelct["伊利石"].iloc[sampleNum-1] #-1是因为直方图中有表头
if value != 0:
print(i,data["工程名"].iloc[i],data["样品顺序"].iloc[i])
data["Dep2200"].iloc[i] = np.NaN
data["IC"].iloc[i] = np.NaN
data["Pos2200"].iloc[i] = np.NaN
data["Dep1900"].iloc[i] = np.NaN
if "白云母" in list(dataSelct.columns[5:]):
value = dataSelct["白云母"].iloc[sampleNum-1] #-1是因为直方图中有表头
if value != 0:
print(i,data["工程名"].iloc[i],data["样品顺序"].iloc[i])
data["Dep2200"].iloc[i] = np.NaN
data["IC"].iloc[i] = np.NaN
data["Pos2200"].iloc[i] = np.NaN
data["Dep1900"].iloc[i] = np.NaN
data.to_excel("C:\\Users\\zsllsz2022\\Desktop\\提取蚀变基础数据\总表-筛选.xlsx",index=False)
根据直方图对总表进行筛选
import os
import pandas as pd
import warnings
import numpy as np
warnings.filterwarnings("ignore")
#根据直方图筛选
data = pd.read_excel(r"C:\Users\zsllsz2022\Desktop\assay.xlsx")
# for i in range(len(data["样品顺序"])):
for i in range(11706): #最后一个样品是11706
# print(data.iloc[i])
sampleNum = data["sample_id"].iloc[i]
holdId = data["hole_id"].iloc[i]
z = data["Z"].iloc[i]
# print(holdId,sampleNum,z)
tempData = "C:\\Users\\zsllsz2022\\Desktop\\2024年新处理\\{}\\矿物直方图.xlsx".format(holdId)
# print(tempData)
tempData = pd.read_excel(tempData,header=None)
tempData.columns = [str(each).replace(" ","") for each in list(tempData.iloc[0])]
tempData = tempData[1:]
#绿泥石字段
chlorite = [element for element in tempData.columns if "绿泥石" in element]
drawData = tempData[tempData["Z坐标"]==z]
# print(drawData)
if "白云母" in tempData.columns:
if float(drawData["白云母"].iloc[0])==0:
print(holdId,sampleNum,z,":处理白云母")
data["Dep2200"].iloc[i] = np.NaN
data["IC"].iloc[i] = np.NaN
data["Pos2200"].iloc[i] = np.NaN
data["Dep1900"].iloc[i] = np.NaN
if "伊利石" in tempData.columns:
if float(drawData["伊利石"].iloc[0])==0:
print(holdId,sampleNum,z,":处理伊利石")
data["Dep2200"].iloc[i] = np.NaN
data["IC"].iloc[i] = np.NaN
data["Pos2200"].iloc[i] = np.NaN
data["Dep1900"].iloc[i] = np.NaN
if "伊利石" in tempData.columns and "蒙脱石" in tempData.columns:
if float(drawData["伊利石"].iloc[0]) != 0 and float(drawData["蒙脱石"].iloc[0]) != 0:
print(holdId,sampleNum,z,":处理组合①")
data["Dep2200"].iloc[i] = np.NaN
data["IC"].iloc[i] = np.NaN
data["Pos2200"].iloc[i] = np.NaN
data["Dep1900"].iloc[i] = np.NaN
if "伊利石" in tempData.columns and "高岭石" in tempData.columns:
if float(drawData["伊利石"].iloc[0]) != 0 and float(drawData["高岭石"].iloc[0]) != 0:
print(holdId,sampleNum,z,":处理组合②")
data["Dep2200"].iloc[i] = np.NaN
data["IC"].iloc[i] = np.NaN
data["Pos2200"].iloc[i] = np.NaN
data["Dep1900"].iloc[i] = np.NaN
if "白云母" in tempData.columns and "蒙脱石" in tempData.columns:
if float(drawData["白云母"].iloc[0]) != 0 and float(drawData["蒙脱石"].iloc[0]) != 0:
print(holdId,sampleNum,z,":处理组合③")
data["Dep2200"].iloc[i] = np.NaN
data["IC"].iloc[i] = np.NaN
data["Pos2200"].iloc[i] = np.NaN
data["Dep1900"].iloc[i] = np.NaN
if "白云母" in tempData.columns and "高岭石" in tempData.columns:
if float(drawData["白云母"].iloc[0]) != 0 and float(drawData["高岭石"].iloc[0]) != 0:
print(holdId,sampleNum,z,":处理组合④")
data["Dep2200"].iloc[i] = np.NaN
data["IC"].iloc[i] = np.NaN
data["Pos2200"].iloc[i] = np.NaN
data["Dep1900"].iloc[i] = np.NaN
value = 0
for eachChlorite in chlorite:
value += float(drawData[eachChlorite].iloc[0])
if value == 0:
print(holdId,sampleNum,z,":处理绿泥石")
data["Dep2250"].iloc[i] = np.NaN
data["Pos2250"].iloc[i] = np.NaN
# print(drawData)
data.to_excel("C:\\Users\\zsllsz2022\\Desktop\\assay-全筛选.xlsx",index=False)