ICESat2学习笔记10 ：python批量读取处理ATL08数据

晚秋10

已于 2024-05-27 08:23:15 修改

阅读量4.2k

点赞数 15

分类专栏： ICESat 文章标签： ICESat-2 ATL08 python

于 2021-04-02 19:26:09 首次发布

本文链接：https://blog.csdn.net/chenguizhenaza/article/details/115400890

版权

ICESat 专栏收录该内容

10 篇文章 72 订阅

订阅专栏

在ICESat2学习笔记9 ：python读取ATL08数据基础上，完成批量处理工作：

import icepyx as ipx
import numpy as np
import pandas as pd
import h5py
import os,json
from pprint import pprint
import dask.dataframe as dd

# put the full filepath to a data file here. You can get this in JupyterHub by navigating to the file,
# right clicking, and selecting copy path. Then you can paste the path in the quotes below.
def read_atl08(fname, bbox=None):
    """ 
    Read 1 ATL08 file and output 6 reduced files. 
    
    Extract variables of interest and separate the ATL08 file 
    into each beam (ground track) and ascending/descending orbits.
    """

    # Each beam is a group
    group = ['/gt1l', '/gt1r', '/gt2l', '/gt2r', '/gt3l', '/gt3r']

    # Loop trough beams
    for k,g in enumerate(group):
    
        #-----------------------------------#
        # 1) Read in data for a single beam #
        #-----------------------------------#
    
        # Load variables into memory (more can be added!)
        with h5py.File(fname, 'r') as fi:
            lat = fi[g+'/land_segments/latitude'][:]
            lon = fi[g+'/land_segments/longitude'][:]
            canopy_h_metrics = fi[g+'/land_segments/canopy/canopy_h_metrics'][:]
            canopy_openness = fi[g+'/land_segments/canopy/canopy_openness'][:]


        #---------------------------------------------#
        # 2) Filter data according region and quality #
        #---------------------------------------------#
        
        # Select a region of interest
        if bbox:
            lonmin, lonmax, latmin, latmax = bbox
            bbox_mask = (lon >= lonmin) & (lon <= lonmax) & \
                        (lat >= latmin) & (lat <= latmax)
        else:
            bbox_mask = np.ones_like(lat, dtype=bool)  # get all
            
        # Only keep good data, and data inside bbox
        #mask = (q_flag == 0) & (np.abs(h_li) < 10e3) & (bbox_mask == 1)
        

        # Test for no data
        if len(canopy_h_metrics) == 0: continue    
        
        # Define output file name
        ofile = fname.replace('.h5', '_'+g[1:]+'.h5')
                
        # Save variables
        with h5py.File(ofile, 'w') as f:
            f['lon'] = lon
            f['lat'] = lat
            f['canopy_h_metrics'] = canopy_h_metrics
            f['canopy_openness'] = canopy_openness
            print('out ->', ofile)
        # save as csv
        ofilecsv = fname.replace('.h5', '_'+g[1:]+'.csv')
        result = pd.DataFrame()
        result['lon'] = lon
        result['lat'] = lat
        result['canopy_h_metrics_0'] = canopy_h_metrics[:,0]
        result['canopy_h_metrics_1'] = canopy_h_metrics[:,1]
        result['canopy_h_metrics_2'] = canopy_h_metrics[:,2]
        result['canopy_h_metrics_3'] = canopy_h_metrics[:,3]
        result['canopy_h_metrics_4'] = canopy_h_metrics[:,4]
        result['canopy_h_metrics_5'] = canopy_h_metrics[:,5]
        result['canopy_h_metrics_6'] = canopy_h_metrics[:,6]
        result['canopy_h_metrics_7'] = canopy_h_metrics[:,7]
        result['canopy_h_metrics_8'] = canopy_h_metrics[:,8]
        result['canopy_openness'] = canopy_openness
        print('out ->', ofilecsv)
        result.to_csv(ofilecsv,index=None)

# 输入参数为h5文件所在目录
def readMultiH5(dir):
    dfs = []
    # 遍历文件目录，将所有文件类型为h5的文件处理
    # for root_dir, sub_dir, files in os.walk(r'' + dir): 
    # 第一个为起始路径，第二个为起始路径下的文件夹，第三个是起始路径下的文件。
    for root_dir,sub_dir,files in os.walk(dir):
        for file in files:
            if file.endswith('h5'):
                #构造绝对路径
                file_name = os.path.join(root_dir,file)
                read_atl08(file_name, None)

#调用并执行
root_dir = r'G:\6.ICESat2 data\5000000951831-08'
readMultiH5(root_dir)