1 需求说明
在odps的的数仓中,需要获取目标表清单的数据字段字典,下面的脚本可以获取相应的数据信息。
2.脚本实现
"""
-- 获得stg表的字段注释信息
-- 获取数据说明:
|stg表名|字段名称|字段注释|
"""
import time
import datetime
from odps import ODPS
import sys
import io
reload(sys)
sys.setdefaultencoding("utf-8")
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8' )
odps = ODPS('xfc******BY9Y', 'cc58*******PMos', '****_odps_****',
endpoint='********')
# project = odps.get_project()
print("正常连接到odps({})的项目空间----".format('****_odps_****'))
# 1 获取目标数据
sql1 = 'select source_name,stg_table_name from ****_odps_****.stg_0601temp_qlqys limit 30;'
with odps.execute_sql(sql1).open_reader() as reader:
table_list = []
for row in reader:
table_list.append(str(row[1]))
count = len(table_list)
print("阶段1:读取目标数据完成!")
print("读取目标数据{}条".format(count))
# 2 获取目标注释
aim_columns = [] # 目标注释格式
failure = []
num = 0
print("阶段2:")
for i in range(len(table_list)):
table_name = table_list[i]
if odps.exist_table(table_name) :
table_columns = []
t = odps.get_table(table_name)
te = t.schema
for col in te:
col_name = col.name
col_comment = te['{}'.format(col_name)].comment
get_data = (table_name,col_name,col_comment)
aim_columns.append(get_data)
time.sleep(1)
now = datetime.datetime.now()
print("第{}条数据获取中".format(i+1)+ '.'*10)
print(">>>{} 表{}已经完成数据获取!".format(now,table_name))
else:
failure.append(table_list[i])
num += 1
if num == 0:
print("目标表全部存在!")
else:
print('对不起,有{}张表不存在!'.format(num))
print(failure)
# 3.获取数据落表
aim_table = odps.create_table(
'stg_col_commens_0610',
'table_name string, col_name string, col_comment string',
if_not_exists = True
)
# #写入数据
with aim_table.open_writer() as writer:
for j in range(len(aim_columns)):
writer.write(aim_columns[j])
print("阶段3:目标注释信息已经写入表stg_col_commens_0610")
# over