import os
import sys
import json
import time
import odps
from odps.df import DataFrame
reload(sys)
sys.setdefaultencoding('utf-8' )
# 远程连接到ODPS,本地连接就不需要这一步
# o = odps. ODPS('Endpoint', 'AccessKeyId', 'AcessKeySecret', 'Project')
# 从ODPS获取DataFrame
iris = DataFrame(o.get_table('表名'))
partition_column = None #odps表没有分区
partition_column_ds = 'ds'
table_list = []
column_list = []
# 遍历 iris DataFrame 的每一行数据
for record in iris.execute():
status = 0
project_space = record['name_space']
ename = record['table_name']
# 检查表是否存在
if o.exist_table(ename, project=project_space):
status = 1
t = o.get_table(ename, project=project_space)
id = t.table_id # 表主键
data_size = t.size # 存储量
desc = t.comment # 描述
create_time = t.creation_time # 创建时间
update_time = t.last_modified_time # 最后修改时间
table_last_meta_modified_time = t.last_meta_modified_time # 最后DDL时间
# 执行SQL查询统计表记录数
with o.execute_sql('SELECT COUNT(1) as cs FROM ' + ename, project=project_space).open_reader() as reader:
count = reader[0]['cs']
print('Count:', count)
table_list.append([id, ename, None, None, project_space, None, None, None, status, update_time, create_time, data_size, count, desc, None])
print([id, ename, None, None, project_space, None, None, None, status, update_time, create_time, data_size, count, desc, None])
table_columns = t.schema.names # 字段
for col_ename in table_columns:
col_name = t.schema[col_ename].comment # 字段中文名
type = str(t.schema[col_ename].type) # 类型
column_list.append([None, id, ename, col_name, col_ename, None, None, type, ename, project_space])
else:
table_list.append([None, ename, None, None, project_space, None, None, None, status, None, None, None, count, None, None])
# 获取 'fct_cst_tables_df' 表和 'fct_cst_columns_df' 表对象
_table = o.get_table('fct_cst_tables_df', project='project_space')
_column = o.get_table('fct_cst_columns_df', project='project_space')
# 将结果写入 'fct_cst_tables_df' 表和 'fct_cst_columns_df' 表
o.write_table('project_space.fct_cst_tables_df', table_list)
o.write_table('project_space.fct_cst_columns_df', column_list)