Datawhale AI夏令营——降水预测-CSDN博客

本文链接：https://blog.csdn.net/m0_56648914/article/details/140772181

Datawhale AI夏令营

Task 2 抽丝剥茧——降水预测baseline详解

分析数据集特征

feature_path和gt_path是官方提供的train.xxx数据和gt.xxx数据存放的路径，挑选想尝试的数据集，并更改列表中相应的字符串，Feature类和GroundTruth类是数据集的定义方便后续自定义数据集和数据加载类.

class Feature:
   def __init__(self):
       self.path = feature_path
       self.years = years
       self.fcst_steps = fcst_steps
       self.features_paths_dict = self.get_features_paths()

   def get_features_paths(self):
       init_time_path_dict = {}
       for year in self.years:
           init_time_dir_year = os.listdir(os.path.join(self.path, year))
           for init_time in sorted(init_time_dir_year):
               init_time_path_dict[pd.to_datetime(init_time)] = os.path.join(self.path, year, init_time)
       return init_time_path_dict

   def get_fts(self, init_time):
       return xr.open_mfdataset(self.features_paths_dict.get(init_time) + '/*').sel(lead_time=self.fcst_steps).isel(
           time=0)
   
class GT:
   def __init__(self):
       self.path = gt_path
       self.years = years
       self.fcst_steps = fcst_steps
       self.gt_paths = [os.path.join(self.path, f'{year}.nc') for year in self.years]
       self.gts = xr.open_mfdataset(self.gt_paths)

   def parser_gt_timestamps(self, init_time):
       return [init_time + pd.Timedelta(f'{fcst_step}h') for fcst_step in self.fcst_steps]

   def get_gts(self, init_time):

       return self.gts.sel(time=self.parser_gt_timestamps(init_time))

路径修改

# path config
feature_path = '/mnt/workspace/AICamp_earth_baseline/feature/' #自定义路径并修改为自己的路径
gt_path = '/mnt/workspace/AICamp_earth_baseline/truth' #自定义路径并修改为自己的路径
years = ['2020']
fcst_steps = list(range(1, 73, 1))