![a298140687639dfdf2e3f018230833a6.png](https://img-blog.csdnimg.cn/img_convert/a298140687639dfdf2e3f018230833a6.png)
注意:阅读本文前,请先了解阿里云的原教程 21分钟教会你分析MaxCompute账单,本篇操作是此教程的优化。
原教程有个痛点,成本表无法匹配到原始的 SQL,需要手动打开 Logview 链接,查看每条 SQL,当 SQL 超过 100 条的话,人工操作成本太高
我们使用一段 python 脚本,自动化补齐原始 SQL
执行代码前,请先完成以下操作:
- 支持 python 3.7 及以上版本
- 安装依赖
python3 -m pip install pandas pyodps requests
- 补充这些变量值
ODPS_ACCESS_ID, ODPS_ACCESS_KEY, ODPS_PROJECT, ODPS_ENDPOINT
import json
from urllib import parse
import odps
import requests
ODPS_ACCESS_ID = ""
ODPS_ACCESS_KEY = ""
ODPS_PROJECT = ""
ODPS_ENDPOINT = ""
SQL = """
SELECT to_char(endtime,'yyyymmdd') as ds,feeid as instanceid
,projectid
,computationsqlcomplexity
,SUM((computationsqlinput / 1024 / 1024 / 1024)) as computationsqlinput
,SUM((computationsqlinput / 1024 / 1024 / 1024)) * computationsqlcomplexity * 0.3 AS sqlmoney
FROM maxcomputefee
WHERE to_char(endtime,'yyyymmdd') >= '20190101'
GROUP BY to_char(endtime,'yyyymmdd'),feeid
,projectid
,computationsqlcomplexity
ORDER BY sqlmoney DESC
LIMIT 10000
"""
odps_object = odps.ODPS(
ODPS_ACCESS_ID,
ODPS_ACCESS_KEY,
project=ODPS_PROJECT,
endpoint=ODPS_ENDPOINT
)
if __name__ == "__main__":
with odps_object.execute_sql(SQL).open_reader() as reader:
df = reader.to_pandas()
result = {}
for instance_id in df['instanceid'].tolist():
print(f"Start request instance_id: {instance_id}")
instance = odps_object.get_instance(instance_id)
logview_url = instance.get_logview_address()
query_dict = dict(parse.parse_qsl(parse.urlsplit(logview_url).query))
url = f"https://logview.aliyun.com/logview/project/genie_core/instance/{instance_id}/sourceEntity?h={ODPS_ENDPOINT}&token={query_dict.get('token')}"
response = requests.get(url)
# parse response data
if not response.ok:
print(f"Response Code Error {response.status_code}, instance_id: {instance_id}")
continue
resp_json = response.json()
if resp_json and resp_json.get("data", {}):
result[instance_id] = resp_json.get("data", {}).get("script")
else:
print(f"Response Error text: {response.text}, instance_id: {instance_id}")
df["sql"] = df["instanceid"].map(result)
# Output dataframe to csv
df.to_csv("odps_cost.csv", sep='t')