Houston数据集
休斯顿数据集由高光谱图像分析小组和美国休斯顿大学NSF资助的机载激光测绘中心(NCALM)提供。 数据集最初用于2013 IEEE GRSS数据融合竞赛的科学目的。
如何获得
可以根据条款和条件在此处下载数据集。数据集地址
数据集预处理
1.下载 GDAL库: pip install GDAL‑3.0.1‑cp35‑cp35m‑win_amd64.whl
2.通过ENVI.生成ground_truth
- 用envi 打开 2013_IEEE_GRSS_DF_Contest_CASI.tif
- 使用ENVI打开2013_IEEE_GRSS_DF_Contest_Samples_TR.roi,选择经典ROI的关联栅格。
- 使用ENVI打开2013_IEEE_GRSS_DF_Contest_Samples_VA.roi,选择经典ROI的关联栅格。
- 单击“感兴趣区域(ROI)工具”->“选项”->“从ROIs创建分类图像”->“选择所有项目”。
3.转换格式,代码如下:
转换数据集代码:
from osgeo.gdal_array import DatasetReadAsArray
from osgeo import gdal
import scipy.io as sio
houston = gdal.Open("....../2013_DFTC/2013_IEEE_GRSS_DF_Contest_CASI.tif") # Change it
data = DatasetReadAsArray(houston)
print(data.shape, data.dtype)
houston = data.transpose()
print(houston.shape)
sio.savemat('Houston.mat', {'Houston': houston})
转换groun_truth 代码:
from osgeo import gdal
from osgeo.gdal_array import DatasetReadAsArray
houston = gdal.Open("Houston_gt.tif")
data = DatasetReadAsArray(houston)
print(data.shape, data.dtype)
houston = data.transpose()
print(houston.shape)
# import matplotlib.pyplot as plt
# plt.imshow(houston)
houston = houston.reshape(1905*349, 3)
import numpy as np
def list_to_colormap(x_list):
y = np.zeros(x_list.shape[0])
for i in range(x_list.shape[0]):
if (x_list[i] == np.array([0, 0, 0])).all(): #background
y[i] = 0
if (x_list[i] == np.array([0, 205, 0])).all(): #grass_healthy
y[i] = 1
if (x_list[i] == np.array([127, 255, 0])).all(): #grass_stressed
y[i] = 2
if (x_list[i] == np.array([46, 139, 87])).all(): #grass_synthetic
y[i] = 3
if (x_list[i] == np.array([0, 139, 0])).all(): #tree
y[i] = 4
if (x_list[i] == np.array([160, 82, 45])).all(): #soil
y[i] = 5
if (x_list[i] == np.array([0, 255, 255])).all(): #water
y[i] = 6
if (x_list[i] == np.array([255, 255, 255])).all(): #residential
y[i] = 7
if (x_list[i] == np.array([216, 191, 216])).all(): #commercial
y[i] = 8
if (x_list[i] == np.array([255, 0, 0])).all(): # road
y[i] = 9
if (x_list[i] == np.array([139, 0, 0])).all(): #highway
y[i] = 10
if (x_list[i] == np.array([205, 205, 0])).all(): #railway
y[i] = 11
if (x_list[i] == np.array([255, 255, 0])).all(): #parking_lot1
y[i] = 12
if (x_list[i] == np.array([238, 154, 0])).all(): #parking_lot2
y[i] = 13
if (x_list[i] == np.array([85, 26, 139])).all(): #tennis_court
y[i] = 14
if (x_list[i] == np.array([255, 127, 80])).all(): #running_track
y[i] = 15
return y
gt = list_to_colormap(houston)
nb_classes = int(max(gt))
cls, count = np.unique(gt, return_counts=True)
TOTAL_SIZE = np.sum(count[1:])
print(cls, count)
print('The class numbers of the HSI data is:', nb_classes)
print('The total size of the labeled data is:', TOTAL_SIZE)
import scipy.io as sio
gt = gt.reshape(1905, 349)
sio.savemat('datasets/Houston_gt.mat', {'gt': gt})
注意事项:
对于ROI分类值分配,我们需要将“ railway-> Class Value 26”更改为“ railway-> Class Value 11”。原因是:Class 11的伪色为黑色,背景也为黑色。将其转换为分类图像后,ENVI将转换训练集的Class 11和Class 26 分为两个不同的伪颜色,但其他伪颜色将与’2013_IEEE_GRSS_DF_Contest_Samples_TR.txt’一致。