prepbufr编码
prepbufr数据的解码在之前的文章有介绍过,这里简单介绍一下利用已知的观测数据(excel格式)取编码prepbufr二进制数据
一、编码
ncepbufr的git网页给的编码程序text_write.py
值得注意的是,它只适用于添加或者补录单个观测数据,
我的目的是循环读取不同时间、层次、属性、类型数据并编码进去。
from __future__ import print_function
import ncepbufr
import numpy as np
# run test.py first to create prepbufr.table file.
hdstr='SID XOB YOB DHR TYP ELV SAID T29'
obstr='POB QOB TOB ZOB UOB VOB PWO MXGS HOVI CAT PRSS TDO PMO'
qcstr='PQM QQM TQM ZQM WQM NUL PWQ PMQ'
oestr='POE QOE TOE NUL WOE NUL PWE'
# open prepbufr file.
bufr = ncepbufr.open('data/prepbufr2','w',table='prepbufr.table')
idate=2010050700 # cycle time: YYYYMMDDHH
subset='ADPSFC' # surface land (SYNOPTIC, METAR) reports
bufr.open_message(subset, idate)
hdr = bufr.missing_value*np.ones(len(hdstr.split()),np.float)
hdr[0] = np.fromstring('KTKI ',dtype=np.float)[0]
hdr[1]=263.4; hdr[2]=33.2; hdr[3] = -0.1; hdr[4]=287; hdr[5]=179
# encode header for wind obs
bufr.write_subset(hdr,hdstr)
# set obs, qcf, oer for wind
obs = bufr.missing_value*np.ones(len(obstr.split()),np.float)
oer = bufr.missing_value*np.ones(len(oestr.split()),np.float)
qcf = bufr.missing_value*np.ones(len(qcstr.split()),np.float)
obs[0]=985.2; obs[4]=-2.8; obs[5]=-7.7; obs[7]=6.0
qcf[0]=2.0 ; qcf[4]=2.0; oer[4] = 1.6
# encode wind obs
bufr.write_subset(obs,obstr)
# encode wind ob err
bufr.write_subset(oer,oestr)
# encode quality flags, end subset.
bufr.write_subset(qcf,qcstr,end=True)
# set obs, qcf, oer for temperature and moisture
hdr[4]=187 # report type
# encode header
bufr.write_subset(hdr,hdstr)
obs[:]=bufr.missing_value; qcf[:]=bufr.missing_value; oer[:]=bufr.missing_value
obs[0]=985.2;obs[1]=12968.0;obs[2]=31.3;obs[3]=179.0;obs[7]=0.0
qcf[0]=2.0 ;qcf[1]=2.0 ;qcf[2]=2.0 ;qcf[3]=2.0
oer[0]=0.5 ;oer[1]=0.6 ;oer[2]=2.3
# encode temperature and moisture obs, obs err and qc flags.
bufr.write_subset(obs,obstr)
bufr.write_subset(oer,oestr)
bufr.write_subset(qcf,qcstr,end=True) # end subset
# close bufr message
bufr.close_message()
# create a message with upper-air (raob) data.
subset='ADPUPA' # upper-air (raob, drops) reports
bufr.open_message(subset, idate)
# set header
hdr[:]=bufr.missing_value
hdr[0] = np.fromstring('72293 ',dtype=np.float)[0]
hdr[1]=242.9; hdr[2]=32.9; hdr[3]=0.0; hdr[5]=134.0
# set obs, qcf, oer for wind
nlvl=3
hdr[4]=220 # report type: sounding
obs = bufr.missing_value*np.ones((len(obstr.split()),nlvl),np.float)
oer = bufr.missing_value*np.ones((len(oestr.split()),nlvl),np.float)
qcf = bufr.missing_value*np.ones((len(qcstr.split()),nlvl),np.float)
obs[0,0]=998.0; obs[4,0]=4.6 ;obs[5,0]=2.2 ;obs[7,0]=3.0
qcf[0,0]=2.0 ; qcf[4,0]=2.0
oer[4,0]=2.3
obs[0,1]=850.0; obs[4,1]=2.0 ;obs[5,1]=-1.7;obs[7,1]=1.0
qcf[0,1]=2.0 ; qcf[4,1]=2.0
oer[4,1]=2.6
obs[0,2]=700.0; obs[4,2]=12.1;obs[5,2]=-4.4;obs[7,2]=1.0
qcf[0,2]=2.0 ; qcf[4,2]=2.0
oer[4,2]=2.5
# encode wind obs
bufr.write_subset(hdr,hdstr)
bufr.write_subset(obs,obstr)
bufr.write_subset(oer,oestr)
bufr.write_subset(qcf,qcstr,end=True) # end subset
# set obs, qcf, oer for temperature and moisture
nlvl=4
obs = bufr.missing_value*np.ones((len(obstr.split()),nlvl),np.float)
oer = bufr.missing_value*np.ones((len(oestr.split()),nlvl),np.float)
qcf = bufr.missing_value*np.ones((len(qcstr.split()),nlvl),np.float)
hdr[4]=120 # report type: sounding
obs[0,0]=998.0;obs[1,0]=8112.0;obs[2,0]=22.3;obs[3,0]=134.0;obs[7,0]=0.0
qcf[0,0]=2.0 ;qcf[1,0]=2.0 ;qcf[2,0]=2.0 ;qcf[3,0]=2.0
oer[0,0]=0.7 ;oer[1,0]=0.7 ;oer[2,0]=1.4
obs[0,1]=925.0;obs[1,1]=6312.0;obs[2,1]=14.1;obs[3,1]=779.0;obs[7,1]=1.0
qcf[0,1]=2.0 ;qcf[1,1]=2.0 ;qcf[2,1]=2.0 ;qcf[3,1]=2.0
oer[1,1]=0.9 ;oer[2,1]=1.5
obs[0,2]=850.0;obs[1,2]=2161.0;obs[2,2]=14.8;obs[3,2]=1493.;obs[7,2]=1.0
qcf[0,2]=2.0 ;qcf[1,2]=2.0 ;qcf[2,2]=2.0 ;qcf[3,2]=2.0
oer[1,2]=1.1 ;oer[2,2]=1.4
obs[0,3]=700.0;obs[1,3]=2131.0;obs[2,3]=9.2 ;obs[3,3]=3118.;obs[7,3]=1.0
qcf[0,3]=2.0 ;qcf[1,3]=2.0 ;qcf[2,3]=2.0 ;qcf[3,3]=2.0
oer[1,3]=1.4 ;oer[2,3]=1.0
# encode temperature and moisture
bufr.write_subset(hdr,hdstr)
bufr.write_subset(obs,obstr)
bufr.write_subset(oer,oestr)
bufr.write_subset(qcf,qcstr,end=True) # end subset
# close bufr message
bufr.close_message()
# close bufr file
bufr.close()
# open bufr file, append another message to it.
bufr = ncepbufr.open('data/prepbufr2','a')
# set data values
hdr = bufr.missing_value*np.ones(len(hdstr.split()),np.float)
obs = bufr.missing_value*np.ones(len(obstr.split()),np.float)
oer = bufr.missing_value*np.ones(len(oestr.split()),np.float)
qcf = bufr.missing_value*np.ones(len(qcstr.split()),np.float)
hdr[0] = np.fromstring('KBOU ',dtype=np.float)[0]
hdr[1]=-105.0;hdr[2]=40.0;hdr[3]=-1.0;hdr[4]=181
obs[0]=300.0
idate=2008120101 # YYYYMMDDHH
subset='ADPSFC' # surface land reports
bufr.open_message(subset, idate)
bufr.write_subset(hdr,hdstr)
bufr.write_subset(obs,obstr)
bufr.write_subset(oer,oestr)
bufr.write_subset(qcf,qcstr,end=True) # end subset
bufr.close_message()
bufr.close()
# read prepbufr file back in.
bufr = ncepbufr.open('data/prepbufr2')
#bufr.print_table() # print embedded table
while bufr.advance() == 0: # loop over messages.
print(bufr.msg_counter, bufr.msg_type, bufr.msg_date)
while bufr.load_subset() == 0: # loop over subsets in message.
hdr = bufr.read_subset(hdstr).squeeze()
station_id = hdr[0].tostring()
obs = bufr.read_subset(obstr)
nlevs = obs.shape[-1]
oer = bufr.read_subset(oestr)
qcf = bufr.read_subset(qcstr)
print('station_id, lon, lat, time, station_type, levels =',\
station_id,hdr[1],hdr[2],hdr[3],int(hdr[4]),nlevs)
for k in range(nlevs):
if nlevs > 1:
print('level =',k+1)
print('obs',obs[:,k])
print('oer',oer[:,k])
print('qcf',qcf[:,k])
bufr.close()
所以针对我们的使用目的我稍微改进了代码
import ncepbufr
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
# BUFR 文件配置
hdstr = 'SID XOB YOB DHR TYP ELV SAID T29'
obstr = 'POB QOB TOB ZOB UOB VOB PWO MXGS HOVI CAT PRSS TDO PMO'
qcstr = 'PQM QQM TQM ZQM WQM NUL PWQ PMQ'
oestr = 'POE QOE TOE NUL WOE NUL PWE'
# 读取 Excel 文件并解析时间戳
file_path = '/Users/xychen/福建测风数据/平潭外海_4686&4828.xlsx'
df = pd.read_excel(file_path, header=12, parse_dates=[0], date_parser=lambda x: pd.to_datetime(x, format='%Y/%m/%d %H:%M:%S'))
# 设置常量
lat = 25.82822
lon = 119.9537
start_time_point = 72
num_time_points = 6
output_folder = '/Users/xychen/assimilation/GSI/GSI-Docker/2019080112/obs/'
heights = [100, 70, 20]
elevations = [15 + h for h in heights]
report_types = [287, 187] # 使用了用户提供的报告类型
dhr_values = [-1.0, -0.83, -0.67, -0.5, -0.33, -0.16, 0.0, 0.16, 0.33, 0.5, 0.67, 0.83, 1.0] # 新的时间间隔
for time_point in range(start_time_point, start_time_point + num_time_points * 72, 72):
timestamp = df.iloc[time_point, 0]
idate = timestamp.strftime('%Y%m%d%H')
output_file = f'{output_folder}{idate}.prepbufr.nr'
bufr = ncepbufr.open(output_file, 'w', table='/Users/xychen/pythonpainting/bufr_work/prebufr_data/prepbufr.table')
subset = 'ADPSFC'
bufr.open_message(subset, idate)
for i, dhr in enumerate(dhr_values):
t = time_point - 6 + i
for height, elevation in zip(heights, elevations):
# 获取风速和风向数据
wind_speed = df.iloc[t, {'100': 1, '70': 16, '20': 26}[str(height)]]
wind_dir = df.iloc[t, {'100': 31, '70': 34, '20': 37}[str(height)]]
# 检查风速或风向是否为空值,如果为空则跳过当前高度
if pd.isna(wind_speed) or pd.isna(wind_dir):
continue
# 处理压力和温度数据
pressure = df.iloc[t, 40] * 10
temperature = df.iloc[t, {'100': 48, '70': 60, '20': 68}[str(height)]]
# 计算 U 和 V 分量
u = wind_speed * np.sin(np.deg2rad(wind_dir))
v = wind_speed * np.cos(np.deg2rad(wind_dir))
for report_type in report_types:
hdr = bufr.missing_value * np.ones(len(hdstr.split()), float)
hdr[0] = np.frombuffer(b'PTWH ', dtype=float)[0]
hdr[1] = lon
hdr[2] = lat
hdr[3] = dhr
hdr[4] = report_type
hdr[5] = elevation
bufr.write_subset(hdr, hdstr)
obs = bufr.missing_value * np.ones(len(obstr.split()), float)
oer = bufr.missing_value * np.ones(len(oestr.split()), float)
qcf = bufr.missing_value * np.ones(len(qcstr.split()), float)
if report_type == 287:
obs[0] = pressure
obs[4] = u
obs[5] = v
obs[7] = 6.0
qcf[0] = 2.0; qcf[4] = 2.0; oer[4] = 1.6
elif report_type == 187:
obs[0] = pressure
obs[2] = temperature
obs[3] = elevation
obs[7] = 0.0
qcf[0] = 2.0; qcf[1] = 2.0; qcf[2] = 2.0; qcf[3] = 2.0; oer[0] = 0.5; oer[1] = 0.6; oer[2] = 2.3
bufr.write_subset(obs, obstr)
bufr.write_subset(oer, oestr)
bufr.write_subset(qcf, qcstr, end=True)
bufr.close_message()
bufr.close()
print("所有 BUFR 数据已生成。")
# 重新读取 prepbufr 文件
bufr = ncepbufr.open("/Users/xychen/assimilation/GSI/GSI-Docker/2019080112/obs/2019080112.prepbufr.nr")
# bufr.print_table() # 打印嵌入的表格
while bufr.advance() == 0: # 循环遍历消息
print(bufr.msg_counter, bufr.msg_type, bufr.msg_date)
while bufr.load_subset() == 0: # 循环遍历消息中的子集
hdr = bufr.read_subset(hdstr).squeeze()
station_id = hdr[0].tostring()
obs = bufr.read_subset(obstr)
nlevs = obs.shape[-1]
oer = bufr.read_subset(oestr)
qcf = bufr.read_subset(qcstr)
print('station_id, lon, lat, time, station_type, levels =',\
station_id,hdr[1],hdr[2],hdr[3],int(hdr[4]),nlevs)
for k in range(nlevs):
if nlevs > 1:
print('level =',k+1)
print('obs',obs[:,k])
print('oer',oer[:,k])
print('qcf',qcf[:,k])
bufr.close()
我们选取最适合的prepbufr type 风数据用287,标量使用187,循环读取20m、70m、100m的数据并编码进去,得到新的prepbufr数据
再次对其解码大致如下
二、同化使用
步骤与同化GDAS数据的步骤一致但有需要注意的地方
1. 修改数据选取范围
就是修改 * “/GSI/comGSIv3.7_EnKFv1.3/fix/global_convinfo.txt” * 这个文件,
将对应的属性和type后面的iuse改为1就能把对应的数据同化进去
2. 修改背景误差协方差和参数方案
再你运行GSI同化的脚本里,如图
经过多次尝试,将其改为NAM才能展现出同化的效果
3.同化初始场增量展示(代码之前的文章里有详细展示)
结果大致如下