Python处理GRIB文件

啵啵O0o0O

已于 2022-10-20 16:27:20 修改

阅读量943

点赞数

分类专栏： GIS 文章标签： python

于 2022-09-21 11:20:28 首次发布

本文链接：https://blog.csdn.net/qq_44264594/article/details/126969118

版权

GIS 专栏收录该内容

3 篇文章

订阅专栏

本文介绍如何将GRIB2文件转换为NetCDF格式，并进一步处理生成TIFF图像文件。涉及wgrib2工具的使用、GeoTIFF的创建及重采样方法，以及如何根据Shapefile裁剪TIFF文件。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

import numpy as np
import netCDF4 as nc
from osgeo import gdal, osr, ogr
import glob
import os
from zipfile import ZipFile
import shutil

'''
grb2文件解析转换为多个tiffs文件
'''
# 配置依赖路径避免报错
os.environ['PROJ_LIB'] = r'C:\Users\AppData\Local\Programs\Python\Python37\Lib\site-packages\osgeo\data\proj'


# grb2--->nc文件
def grb2_to_nc(grb2_file_folder, out_tif_file_path, shp_file_path):
    file_list = []
    for i in os.listdir(grb2_file_folder):  # 遍历整个文件夹
        path = os.path.join(grb2_file_folder, i)
        if os.path.isfile(path):  # 判断是否为一个文件，排除文件夹
            if os.path.splitext(path)[1] == ".GRB2":  # 判断文件扩展名是否为“.GRB2”
                file_list.append(i)

    for j in file_list:
        file_name = j.split("_")[8][0:10] + '.nc'
        output_nc_file_name = out_tif_file_path + os.path.sep + file_name
        input_grb2_file_name = grb2_file_folder + os.path.sep + j
        # wgrib2 data.GRB2 -netcdf data.nc   这里需要依赖到wgrib2来做数据转换
        cmd_grb2_nc = 'wgrib2 %s -netcdf %s' % (input_grb2_file_name, output_nc_file_name)
        os.system(cmd_grb2_nc)
        nc_tiffs(output_nc_file_name, shp_file_path)
    return


# nc--->tiffs
def nc_tiffs(input_nc_file, shp_file_path):
    # 创建最后输出的list
    out_paths = list()

    # 使用netCDF读取nc文件
    nc_data_obj = nc.Dataset(input_nc_file)
    # 获取时间，经度，纬度，波段的名称信息，这些可能是不一样的
    key = list(nc_data_obj.variables.keys())
    # 模糊查找属于经度的名称所在的位置
    lon_index = \
        [i for i, x in enumerate(key) if (x.find('longitude'.upper()) != -1 or x.find('longitude'.lower()) != -1)][0]
    # 模糊查找属于纬度的名称所在的位置
    lat_index = \
        [i for i, x in enumerate(key) if (x.find('latitude'.upper()) != -1 or x.find('latitude'.lower()) != -1)][0]
    # 模糊查找属于时间的名称所在的位置
    time_index = [i for i, x in enumerate(key) if (x.find('time'.upper()) != -1 or x.find('time'.lower()) != -1)][0]
    # 名称所在的位置(通过测试确定了位置)
    band_index = 3
    # 获取波段的名称
    key_band = key[band_index]
    # 获取时间的名称
    time_name = key[time_index]
    # 获取经度的名称
    key_lon = key[lon_index]
    # 获取纬度的名称
    key_lat = key[lat_index]
    # 获取每个像元的经度,得到一个数组
    arr_lon = nc_data_obj.variables[key_lon][:]
    # 获取每个像元的纬度,得到一个数组
    arr_lat = nc_data_obj.variables[key_lat][:]
    # 时间的格式转换,得到一个数组
    time = nc_data_obj.variables[time_name]
    arr_time = nc.num2date(time[:], time.units)
    # 获取对应波段的像元的值，类型为数组
    arr_band = np.asarray(nc_data_obj.variables[key_band]).astype(float)

    # 影像的对角坐标,确定其位置
    lon_min, lat_max, lon_max, lat_min = [arr_lon.min(), arr_lat.max(), arr_lon.max(), arr_lat.min()]

    # 分辨率计算
    n_lat = len(arr_lat)
    if arr_lon.ndim == 1:
        n_lon = len(arr_lon)  # 获取长度
    else:
        n_lon = len(arr_lon[0])
    lon_res = (lon_max - lon_min) / (float(n_lon) - 1)
    lat_res = (lat_max - lat_min) / (float(n_lat) - 1)

    # 一个grb2文件处理到一个文件夹下面
    output_tif_folder = os.path.dirname(input_nc_file)
    out_folder = output_tif_folder + os.path.sep + os.path.basename(input_nc_file).split('.')[0]

    # 创建输出的子文件夹
    if os.path.exists(out_folder):
        shutil.rmtree(out_folder)
    os.makedirs(out_folder)

    count = 0

    # 创建tif文件
    for i in range(arr_band.shape[0]):
        # 加载驱动
        driver = gdal.GetDriverByName('GTiff')
        # 获取不同时间段的数据, 代表三维数组获取第一个维度下所有的数据
        attr_data = arr_band[i, :, :]

        # 格式化datetime '20220827.09.tif'
        file_name = arr_time[i].strftime("%H")
        count = count + 1

        numString = "00" + str(3 * count)

        output_tif_path = out_folder + os.sep + os.path.basename(input_nc_file).split('.')[0] + '.' + numString[
                                                                                                      -3:] + ".tif"

        out_tif = driver.Create(output_tif_path, n_lon, n_lat, 1, gdal.GDT_Float32)
        # 设置影像的显示范围 -Lat_Res一定要是-的 六个元素的元组:左上角x坐标，水平分辨率，旋转参数，左上角y坐标，旋转参数，竖直分辨率
        geo_transform = (lon_min, lon_res, 0, lat_max, 0, -lat_res)
        out_tif.SetGeoTransform(geo_transform)
        # 获取地理坐标系统信息, 定义输出的坐标系为"WGS 84"，AUTHORITY["EPSG","4326"]
        srs = osr.SpatialReference()
        srs.SetWellKnownGeogCS('WGS84')
        # 给图层赋予投影信息
        out_tif.SetProjection(srs.ExportToWkt())

        # 更改异常值  int是16位有符号数  -32768~32767
        attr_data[attr_data[:, :] > 1000000] = -32767
        # 数据写出
        if attr_data.ndim == 2:
            # 如果本来就是二维数组就不变
            a = attr_data[:, :]
        else:
            # 将三维数组转为二维
            a = attr_data[0, :, :]
        # 这里是需要倒置一下的
        reversed_arr = a[::-1]
        out_tif.GetRasterBand(1).WriteArray(reversed_arr)
        out_tif.GetRasterBand(1).SetNoDataValue(-32767)

        # 重采样算法:三次卷积, 重采样参数设置
        resample_factor = 16
        out_tif_re_sam = driver.Create(output_tif_path, n_lon * resample_factor, n_lat * resample_factor, 1,
                                       gdal.GDT_Float32)
        geo_transform_re_sam = (lon_min, lon_res / resample_factor, 0, lat_max, 0, -lat_res / resample_factor)
        out_tif_re_sam.SetGeoTransform(geo_transform_re_sam)
        out_tif_re_sam.SetProjection(srs.ExportToWkt())
        out_tif_re_sam.GetRasterBand(1).WriteArray(reversed_arr)
        out_tif_re_sam.GetRasterBand(1).SetNoDataValue(-32767)

        gdal.ReprojectImage(
            out_tif,
            out_tif_re_sam,
            out_tif.GetProjection(),
            out_tif_re_sam.GetProjection(),
            gdal.gdalconst.GRA_CubicSpline,
            0.0, 0.0,
        )

        out_tif_re_sam.FlushCache()  # 将数据写入硬盘
        del out_tif  # 注意必须关闭tif文件
        del out_tif_re_sam  # 注意必须关闭tif文件

        # 根据shp文件裁剪刚刚生成好的tif文件
        out_path = tif_clib_by_shp(output_tif_path, out_folder,
                                   shp_file_path)
        out_paths.append(out_path)
    return out_paths


# 裁剪shp文件指定区域的tif文件
def tif_clib_by_shp(input_file_path, output_tif_folder, shapefile_path):
    src_file_name = os.path.basename(input_file_path)
    output_tif_path = output_tif_folder + os.sep + src_file_name.split('.')[0] + '.' + src_file_name.split('.')[
        1] + "_clib.tif"
    
    # 核心方案, 可以百度查看参数效果
    result = gdal.Warp(output_tif_path, input_file_path, cutlineDSName=shapefile_path, cropToCutline=True, dstNodata=0,
                       format='GTiff')
    result.FlushCache()
    del result
    # 删除原有tif, 修改新的tif文件名
    os.remove(input_file_path)
    os.renames(output_tif_path, input_file_path)
    return input_file_path


# 栅格重采样,三重卷积算法(gdal.gdalconst.GRA_NearestNeighbour),删除原有文件,输出重采样后的文件路径
def re_sam_tif(input_file, output_tif_folder):
    # 重采样算法:三次卷积
    resample_factor = gdal.gdalconst.GRA_CubicSpline
    # 载入原始栅格
    dataset = gdal.Open(input_file, gdal.GA_ReadOnly)
    src_projection = dataset.GetProjection()
    src_geoTransform = dataset.GetGeoTransform()
    src_width = dataset.RasterXSize
    src_height = dataset.RasterYSize
    src_bound_count = dataset.RasterCount
    src_no_data = [
        dataset.GetRasterBand(bandIndex).GetNoDataValue()
        for bandIndex in range(1, src_bound_count + 1)
    ]
    src_band_data_type = dataset.GetRasterBand(1).DataType
    src_file_name = os.path.basename(input_file)
    out_file_name = os.path.splitext(src_file_name)[0] + '_reSam'
    # 创建重采样后的栅格
    out_file_name = out_file_name + ".tif"
    out_file_path = os.path.join(output_tif_folder, out_file_name)
    driver = gdal.GetDriverByName('GTiff')
    out_width = int(src_width * resample_factor)
    out_height = int(src_height * resample_factor)
    out_dataset = driver.Create(
        out_file_path,
        out_width,
        out_height,
        src_bound_count,
        src_band_data_type
    )
    geo_transforms = list(src_geoTransform)
    geo_transforms[1] = geo_transforms[1] / resample_factor
    geo_transforms[5] = geo_transforms[5] / resample_factor
    out_geo_transform = tuple(geo_transforms)
    out_dataset.SetGeoTransform(out_geo_transform)
    out_dataset.SetProjection(src_projection)
    for bandIndex in range(1, src_bound_count + 1):
        band = out_dataset.GetRasterBand(bandIndex)
        band.SetNoDataValue(src_no_data[bandIndex - 1])
    gdal.ReprojectImage(
        dataset,
        out_dataset,
        src_projection,
        src_projection,
        gdal.gdalconst.GRA_CubicSpline,
        0.0, 0.0,
    )
    return out_file_path

if __name__ == "__main__":
    grb2_to_nc(
        grb2_file_folder=r'C:\Users\Desktop\GRIB',
        out_tif_file_path=r'F:\GRIB_out_lsh',
        shp_file_path=r'C:\Users\Desktop\shape\LSH.shp')

说明几点问题:

裁剪文件的时候遇到的bug

Warning 1: Ring Self-intersection at or near point 112.48666420300003 34.830899357000078
ERROR 1: Cutline polygon is invalid.

解决方法:

参考链接: shp文件自相交处理_GIS开发者的博客-CSDN博客

上面的nc—>tiffs, 是针对我的GRIB2转出来的nc文件进行的操作, 还需要根据读者的数据来修改部分地方的参数
重采样是为了数据不那么锯齿状
我不是专业的GIS人员,只是一个软件开发, 很多地方不专业读者见谅