工具:Pycharm、ENVI、Excel
数据:Sentinel-2 L2A不同波段组合的TIFF、ROI数据
步骤一
首先,在ENVI中打开Sentinel-2影像,使用ROI工具绘制典型地物ROI,要求地物分布以及数量相对均匀,并导出ROI数据为ASCII格式,得到包含各波段地物信息的ROI数据。
步骤二
在 Excel 简单处理ROI数据,列为不同波段值,增加label列,标注不同地物类别。
步骤三
在Pycharm中运行如下代码得到随机森林训练结果,其中class_weight 是根据不同地物ROI数量
import numpy as np
from osgeo import gdal
#import gdal
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn import model_selection
from sklearn.model_selection import StratifiedShuffleSplit
import os
import random
Sample_path="Sample.csv"#训练数据集ROI信息
data_name=["2348.tif","23458.tif","23468_dem.tif","23478.tif","all.tif","all_dem.tif","RF_test23468.tif"]
band=[[1,2,3,7],[1,2,3,4,7],[1,2,3,5,7,12],[1,2,3,6,7],[0,1,2,3,4,5,6,7,8,9,10,11],[0,1,2,3,4,5,6,7,8,9,10,11,12],[1,2,3,5,7]]
#数据及波段组合
for i in range (len(data_name)):
Save_path="modelnew2"+str(i)+".pickle"#模型保存路径
RFpath = r"D:\\pycharm\\pyRaft\\"+Save_path #RF模型路径
image_Path = r"E:\\RAFT\\work3\\data\\"+data_name[i] #数据路径
SavePath = r"E:\\RAFT\\work3\\RF\\noweight\\"+data_name[i] #RF结果保存路径
data=np.loadtxt(open(Sample_path,"rb"),delimiter=",",skiprows=1)
bands=band[i]
x = data[:,bands] #数据
y = data[:,-1] #label
y=y.astype(int)
print(y[0])
split = StratifiedShuffleSplit(n_splits=1,test_size=0.1,random_state=0)
#for train_index,test_index in split.split(x,y):
#print(len(train_data),len(test_data))
#train_data,test_data,train_label,test_label=model_selection.train_test_split(x,y,random_state=1,train_size=0.9,test_size=0.1) 用于划分数据集
#class_weight=dict({1:1, 2:1, 3:1,4:1,5:1,6:1,7:1,8:1})使用CSL时按地物数量设置各类别的权值,并取消classfier中class_weight中的注释
#classifier中可以设置随机森林中的相关参数
classifier = RandomForestClassifier(bootstrap=True,
#class_weight=class_weight,
criterion='gini',
max_features='log2',
min_impurity_decrease=0.0,
min_samples_leaf=1,
n_estimators=500,
)
train_data = x
train_label = y
classifier.fit(train_data, train_label.ravel())
print("训练集:",str(i),classifier.score(train_data,train_label))
#print("测试集:",classifier.score(test_data,test_label))
file = open(Save_path, "wb")
#将模型写入文件:
pickle.dump(classifier, file)
#最后关闭文件:
file.close()
def readTif(fn):#读取TIFF
dataset=gdal.Open(fn)
if dataset == None:
print(fn+"ERROR")
return dataset
def writeTiff(im_data,im_geotrans,im_proj,path):#写入TIFF
if 'int8' in im_data.dtype.name:
datatype = gdal.GDT_Byte
elif 'int16' in im_data.dtype.name:
datatype = gdal.GDT_UInt16
else:
datatype = gdal.GDT_Float32
if len(im_data.shape) == 3:
im_bands, im_height, im_width = im_data.shape
elif len(im_data.shape) == 2:
im_data = np.array([im_data])
im_bands, im_height, im_width = im_data.shape
# 创建文件
driver = gdal.GetDriverByName("GTiff")
dataset = driver.Create(path, int(im_width), int(im_height), int(im_bands), datatype)
if (dataset != None):
dataset.SetGeoTransform(im_geotrans) # 写入仿射变换参数
dataset.SetProjection(im_proj) # 写入投影
for i in range(im_bands):
dataset.GetRasterBand(i + 1).WriteArray(im_data[i])
del dataset
dataset = readTif(image_Path)
Tif_width = dataset.RasterXSize # 栅格矩阵的列数
Tif_height = dataset.RasterYSize # 栅格矩阵的行数
Tif_geotrans = dataset.GetGeoTransform() # 获取仿射矩阵信息
Tif_proj = dataset.GetProjection() # 获取投影信息
image_data = dataset.ReadAsArray(0, 0, Tif_width, Tif_height)
################################################调用保存好的模型
# 以读二进制的方式打开文件
file = open(RFpath, "rb")
# 把模型从文件中读取出来
rf_model = pickle.load(file)
# 关闭文件
file.close()
################################################用读入的模型进行预测
# 在与测试前要调整一下数据的格式
data = np.zeros((image_data.shape[0], image_data.shape[1] * image_data.shape[2]))
for i in range(image_data.shape[0]):
data[i] = image_data[i].flatten()
data = data.swapaxes(0, 1)
# 对调整好格式的数据进行预测
pred = rf_model.predict(data)
# 同样地,我们对预测好的数据调整为我们图像的格式
pred = pred.reshape(image_data.shape[1], image_data.shape[2]) * 255
pred = pred.astype(np.uint8)
# 将结果写到tif图像里
writeTiff(pred, Tif_geotrans, Tif_proj, SavePath)
步骤四 SVM
from sklearn import svm
将random forest中的代码Classifier改为如下代码
classifier=svm.SVC(C=1,kernel='rbf',gamma=1,decision_function_shape='ovo',class_weight=classweight)