#!/usr/bin/env python
#encoding:utf-8
import getopt
import sys
import commands
from commonutil.dbUtil import PostgresqlClient
from commonutil.dateUtil import DateUtil
from commonutil.logUtil import LogsMaker
##连接pg库
def connpg97_8():
host='10.-------'#IP地址
username='用户名'
password='密码'
port='5432'
database='数据库名'
client = PostgresqlClient(host, username, password, database, port)
return client
if __name__ == '__main__':
##配置log日志目录
logger=LogsMaker(filepath='/home/log').makeLogs(sys.argv, True)
###输入参数
###账期
op_time=sys.argv[1]
##连接数据库
cursor = connpg97_8()
#这里是删除操作,往里面插入之前需要先把哪一行记录删除掉
del_cql='''delete from sch_sdq.hdfs_location where op_time = '%s' '''%(op_time)
cursor.execute(del_cql)
cursor.commit()
#查询出表的字段,然后赋值,最后插入到新表中
conf_sql=''' select dom_id, dom_name, sub_dom_id, sub_dom_name, data_sch, hdfs_location from sch_sdq.conf_hdfs_location where is_valid=1'''
result= cursor.query(conf_sql)
try:
for line in result:
dom_id = line[0]
dom_name = line[1]
sub_dom_id = line[2]
sub_dom_name=line[3]
data_sch = line[4]
hdfs_location =line[5]
#这里是用的awk统计某个目录下所有文件的大小
code1 ='''hadoop fs -du %s |awk '{sum += $1/1024/1024/1024};END {print sum}' '''%(hdfs_location)
code2 ='''hadoop fs -test -e %s'''%(hdfs_location)#先检查这个目录是否存在
status,logger=commands.getstatusoutput(code2)
if status==0:
print("hdfs文件存在")
status,output=commands.getstatusoutput(code1)
output_list=output.split('/n')
if len(output_list)>5:
size=output_list[-1]
else:
size=output_list[0]
insert_sql='''
insert into sch_sdq.rpt_hdfs_location_size_d
(
op_time,
dom_id,
dom_name,
sub_dom_id,
sub_dom_name,
data_sch,
hdfs_location,
hdfs_size
)
VALUES
(
'%s','%s','%s','%s','%s','%s','%s','%s'
);
'''%(op_time,dom_id,dom_name,sub_dom_id,sub_dom_name,data_sch,hdfs_location,size)
cursor.execute(insert_sql)
cursor.commit()
cursor.close()
except Exception,e:
logger.info(str(e))
logger.info('统计出错了')
cursor.close()
finally:
logger.info('统计执行成功')
cursor.close()