使用观测资料编码prepbufr文件(python)

prepbufr编码

prepbufr数据的解码在之前的文章有介绍过,这里简单介绍一下利用已知的观测数据(excel格式)取编码prepbufr二进制数据



一、编码

ncepbufr的git网页给的编码程序text_write.py
值得注意的是,它只适用于添加或者补录单个观测数据,
我的目的是循环读取不同时间、层次、属性、类型数据并编码进去。

from __future__ import print_function
import ncepbufr
import numpy as np

# run test.py first to create prepbufr.table file.

hdstr='SID XOB YOB DHR TYP ELV SAID T29'
obstr='POB QOB TOB ZOB UOB VOB PWO MXGS HOVI CAT PRSS TDO PMO'
qcstr='PQM QQM TQM ZQM WQM NUL PWQ PMQ'
oestr='POE QOE TOE NUL WOE NUL PWE'

# open prepbufr file.

bufr = ncepbufr.open('data/prepbufr2','w',table='prepbufr.table')
idate=2010050700 # cycle time: YYYYMMDDHH
subset='ADPSFC'  # surface land (SYNOPTIC, METAR) reports
bufr.open_message(subset, idate)

hdr = bufr.missing_value*np.ones(len(hdstr.split()),np.float)
hdr[0] = np.fromstring('KTKI    ',dtype=np.float)[0]
hdr[1]=263.4; hdr[2]=33.2; hdr[3] = -0.1; hdr[4]=287; hdr[5]=179
# encode header for wind obs
bufr.write_subset(hdr,hdstr)
# set obs, qcf, oer for  wind
obs = bufr.missing_value*np.ones(len(obstr.split()),np.float)
oer = bufr.missing_value*np.ones(len(oestr.split()),np.float)
qcf = bufr.missing_value*np.ones(len(qcstr.split()),np.float)
obs[0]=985.2; obs[4]=-2.8; obs[5]=-7.7; obs[7]=6.0
qcf[0]=2.0  ; qcf[4]=2.0; oer[4] = 1.6
# encode wind obs
bufr.write_subset(obs,obstr)
# encode wind ob err
bufr.write_subset(oer,oestr)
# encode quality flags, end subset.
bufr.write_subset(qcf,qcstr,end=True)
# set obs, qcf, oer for  temperature and moisture
hdr[4]=187          # report type
# encode header
bufr.write_subset(hdr,hdstr)
obs[:]=bufr.missing_value; qcf[:]=bufr.missing_value; oer[:]=bufr.missing_value
obs[0]=985.2;obs[1]=12968.0;obs[2]=31.3;obs[3]=179.0;obs[7]=0.0
qcf[0]=2.0  ;qcf[1]=2.0    ;qcf[2]=2.0 ;qcf[3]=2.0
oer[0]=0.5  ;oer[1]=0.6    ;oer[2]=2.3
# encode temperature and moisture obs, obs err and qc flags.
bufr.write_subset(obs,obstr)
bufr.write_subset(oer,oestr)
bufr.write_subset(qcf,qcstr,end=True) # end subset
# close bufr message
bufr.close_message()
# create a message with upper-air (raob) data.
subset='ADPUPA'  # upper-air (raob, drops) reports
bufr.open_message(subset, idate)

# set header
hdr[:]=bufr.missing_value
hdr[0] = np.fromstring('72293   ',dtype=np.float)[0]
hdr[1]=242.9; hdr[2]=32.9; hdr[3]=0.0; hdr[5]=134.0

# set obs, qcf, oer for  wind
nlvl=3
hdr[4]=220          # report type: sounding
obs = bufr.missing_value*np.ones((len(obstr.split()),nlvl),np.float)
oer = bufr.missing_value*np.ones((len(oestr.split()),nlvl),np.float)
qcf = bufr.missing_value*np.ones((len(qcstr.split()),nlvl),np.float)
obs[0,0]=998.0; obs[4,0]=4.6 ;obs[5,0]=2.2 ;obs[7,0]=3.0
qcf[0,0]=2.0  ; qcf[4,0]=2.0
oer[4,0]=2.3
obs[0,1]=850.0; obs[4,1]=2.0 ;obs[5,1]=-1.7;obs[7,1]=1.0
qcf[0,1]=2.0  ; qcf[4,1]=2.0
oer[4,1]=2.6
obs[0,2]=700.0; obs[4,2]=12.1;obs[5,2]=-4.4;obs[7,2]=1.0
qcf[0,2]=2.0  ; qcf[4,2]=2.0
oer[4,2]=2.5
# encode  wind obs
bufr.write_subset(hdr,hdstr)
bufr.write_subset(obs,obstr)
bufr.write_subset(oer,oestr)
bufr.write_subset(qcf,qcstr,end=True) # end subset
# set obs, qcf, oer for  temperature and moisture
nlvl=4
obs = bufr.missing_value*np.ones((len(obstr.split()),nlvl),np.float)
oer = bufr.missing_value*np.ones((len(oestr.split()),nlvl),np.float)
qcf = bufr.missing_value*np.ones((len(qcstr.split()),nlvl),np.float)
hdr[4]=120          # report type: sounding
obs[0,0]=998.0;obs[1,0]=8112.0;obs[2,0]=22.3;obs[3,0]=134.0;obs[7,0]=0.0
qcf[0,0]=2.0  ;qcf[1,0]=2.0   ;qcf[2,0]=2.0 ;qcf[3,0]=2.0
oer[0,0]=0.7  ;oer[1,0]=0.7   ;oer[2,0]=1.4
obs[0,1]=925.0;obs[1,1]=6312.0;obs[2,1]=14.1;obs[3,1]=779.0;obs[7,1]=1.0
qcf[0,1]=2.0  ;qcf[1,1]=2.0   ;qcf[2,1]=2.0 ;qcf[3,1]=2.0
oer[1,1]=0.9   ;oer[2,1]=1.5
obs[0,2]=850.0;obs[1,2]=2161.0;obs[2,2]=14.8;obs[3,2]=1493.;obs[7,2]=1.0
qcf[0,2]=2.0  ;qcf[1,2]=2.0   ;qcf[2,2]=2.0 ;qcf[3,2]=2.0
oer[1,2]=1.1   ;oer[2,2]=1.4
obs[0,3]=700.0;obs[1,3]=2131.0;obs[2,3]=9.2 ;obs[3,3]=3118.;obs[7,3]=1.0
qcf[0,3]=2.0  ;qcf[1,3]=2.0   ;qcf[2,3]=2.0 ;qcf[3,3]=2.0
oer[1,3]=1.4   ;oer[2,3]=1.0
# encode temperature and moisture
bufr.write_subset(hdr,hdstr)
bufr.write_subset(obs,obstr)
bufr.write_subset(oer,oestr)
bufr.write_subset(qcf,qcstr,end=True) # end subset
# close bufr message
bufr.close_message()
# close bufr file
bufr.close()

# open bufr file, append another message to it.
bufr = ncepbufr.open('data/prepbufr2','a')
# set data values
hdr = bufr.missing_value*np.ones(len(hdstr.split()),np.float)
obs = bufr.missing_value*np.ones(len(obstr.split()),np.float)
oer = bufr.missing_value*np.ones(len(oestr.split()),np.float)
qcf = bufr.missing_value*np.ones(len(qcstr.split()),np.float)
hdr[0] = np.fromstring('KBOU    ',dtype=np.float)[0]
hdr[1]=-105.0;hdr[2]=40.0;hdr[3]=-1.0;hdr[4]=181
obs[0]=300.0
idate=2008120101  # YYYYMMDDHH
subset='ADPSFC'   # surface land reports
bufr.open_message(subset, idate)
bufr.write_subset(hdr,hdstr)
bufr.write_subset(obs,obstr)
bufr.write_subset(oer,oestr)
bufr.write_subset(qcf,qcstr,end=True) # end subset
bufr.close_message()
bufr.close()

# read prepbufr file back in.

bufr = ncepbufr.open('data/prepbufr2')
#bufr.print_table() # print embedded table
while bufr.advance() == 0: # loop over messages.
    print(bufr.msg_counter, bufr.msg_type, bufr.msg_date)
    while bufr.load_subset() == 0: # loop over subsets in message.
        hdr = bufr.read_subset(hdstr).squeeze()
        station_id = hdr[0].tostring()
        obs = bufr.read_subset(obstr)
        nlevs = obs.shape[-1]
        oer = bufr.read_subset(oestr)
        qcf = bufr.read_subset(qcstr)
        print('station_id, lon, lat, time, station_type, levels =',\
        station_id,hdr[1],hdr[2],hdr[3],int(hdr[4]),nlevs)
        for k in range(nlevs):
            if nlevs > 1:
                print('level =',k+1)
            print('obs',obs[:,k])
            print('oer',oer[:,k])
            print('qcf',qcf[:,k])
bufr.close()

所以针对我们的使用目的我稍微改进了代码

import ncepbufr
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

# BUFR 文件配置
hdstr = 'SID XOB YOB DHR TYP ELV SAID T29'
obstr = 'POB QOB TOB ZOB UOB VOB PWO MXGS HOVI CAT PRSS TDO PMO'
qcstr = 'PQM QQM TQM ZQM WQM NUL PWQ PMQ'
oestr = 'POE QOE TOE NUL WOE NUL PWE'

# 读取 Excel 文件并解析时间戳
file_path = '/Users/xychen/福建测风数据/平潭外海_4686&4828.xlsx'
df = pd.read_excel(file_path, header=12, parse_dates=[0], date_parser=lambda x: pd.to_datetime(x, format='%Y/%m/%d %H:%M:%S'))

# 设置常量
lat = 25.82822
lon = 119.9537
start_time_point = 72
num_time_points = 6
output_folder = '/Users/xychen/assimilation/GSI/GSI-Docker/2019080112/obs/'
heights = [100, 70, 20]
elevations = [15 + h for h in heights]
report_types = [287, 187]  # 使用了用户提供的报告类型
dhr_values = [-1.0, -0.83, -0.67, -0.5, -0.33, -0.16, 0.0, 0.16, 0.33, 0.5, 0.67, 0.83, 1.0]  # 新的时间间隔

for time_point in range(start_time_point, start_time_point + num_time_points * 72, 72):
    timestamp = df.iloc[time_point, 0]
    idate = timestamp.strftime('%Y%m%d%H')
    output_file = f'{output_folder}{idate}.prepbufr.nr'
    
    bufr = ncepbufr.open(output_file, 'w', table='/Users/xychen/pythonpainting/bufr_work/prebufr_data/prepbufr.table')
    subset = 'ADPSFC'
    bufr.open_message(subset, idate)

    for i, dhr in enumerate(dhr_values):
        t = time_point - 6 + i
        for height, elevation in zip(heights, elevations):
            # 获取风速和风向数据
            wind_speed = df.iloc[t, {'100': 1, '70': 16, '20': 26}[str(height)]]
            wind_dir = df.iloc[t, {'100': 31, '70': 34, '20': 37}[str(height)]]

            # 检查风速或风向是否为空值,如果为空则跳过当前高度
            if pd.isna(wind_speed) or pd.isna(wind_dir):
                continue

            # 处理压力和温度数据
            pressure = df.iloc[t, 40] * 10
            temperature = df.iloc[t, {'100': 48, '70': 60, '20': 68}[str(height)]]

            # 计算 U 和 V 分量
            u = wind_speed * np.sin(np.deg2rad(wind_dir))
            v = wind_speed * np.cos(np.deg2rad(wind_dir))

            for report_type in report_types:
                hdr = bufr.missing_value * np.ones(len(hdstr.split()), float)
                hdr[0] = np.frombuffer(b'PTWH    ', dtype=float)[0]
                hdr[1] = lon
                hdr[2] = lat
                hdr[3] = dhr
                hdr[4] = report_type
                hdr[5] = elevation

                bufr.write_subset(hdr, hdstr)

                obs = bufr.missing_value * np.ones(len(obstr.split()), float)
                oer = bufr.missing_value * np.ones(len(oestr.split()), float)
                qcf = bufr.missing_value * np.ones(len(qcstr.split()), float)
                if report_type == 287:
                    obs[0] = pressure
                    obs[4] = u
                    obs[5] = v
                    obs[7] = 6.0
                    qcf[0] = 2.0; qcf[4] = 2.0; oer[4] = 1.6
                elif report_type == 187:
                    obs[0] = pressure
                    obs[2] = temperature
                    obs[3] = elevation
                    obs[7] = 0.0
                    qcf[0] = 2.0; qcf[1] = 2.0; qcf[2] = 2.0; qcf[3] = 2.0; oer[0] = 0.5; oer[1] = 0.6; oer[2] = 2.3
                
                bufr.write_subset(obs, obstr)
                bufr.write_subset(oer, oestr)
                bufr.write_subset(qcf, qcstr, end=True)

    bufr.close_message()
    bufr.close()

print("所有 BUFR 数据已生成。")




# 重新读取 prepbufr 文件
bufr = ncepbufr.open("/Users/xychen/assimilation/GSI/GSI-Docker/2019080112/obs/2019080112.prepbufr.nr")
# bufr.print_table() # 打印嵌入的表格
while bufr.advance() == 0: # 循环遍历消息
    print(bufr.msg_counter, bufr.msg_type, bufr.msg_date)
    while bufr.load_subset() == 0: # 循环遍历消息中的子集
        hdr = bufr.read_subset(hdstr).squeeze()
        station_id = hdr[0].tostring()
        obs = bufr.read_subset(obstr)
        nlevs = obs.shape[-1]
        oer = bufr.read_subset(oestr)
        qcf = bufr.read_subset(qcstr)
        print('station_id, lon, lat, time, station_type, levels =',\
        station_id,hdr[1],hdr[2],hdr[3],int(hdr[4]),nlevs)
        for k in range(nlevs):
            if nlevs > 1:
                print('level =',k+1)
            print('obs',obs[:,k])
            print('oer',oer[:,k])
            print('qcf',qcf[:,k])
bufr.close()

我们选取最适合的prepbufr type 风数据用287,标量使用187,循环读取20m、70m、100m的数据并编码进去,得到新的prepbufr数据
再次对其解码大致如下
编译后结果

二、同化使用

步骤与同化GDAS数据的步骤一致但有需要注意的地方

1. 修改数据选取范围

就是修改 * “/GSI/comGSIv3.7_EnKFv1.3/fix/global_convinfo.txt” * 这个文件,
文件内容
将对应的属性和type后面的iuse改为1就能把对应的数据同化进去
如图所示

2. 修改背景误差协方差和参数方案

再你运行GSI同化的脚本里,如图
文件内容
经过多次尝试,将其改为NAM才能展现出同化的效果

3.同化初始场增量展示(代码之前的文章里有详细展示)

结果大致如下
初始场同化增量

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

阿翔_ocean

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值