python操控oracle数据库

oracle数据库控制

# -*- coding:utf-8 -*- 

# 忽略警告信息
import warnings
warnings.simplefilter(action = "ignore", category = FutureWarning)
warnings.filterwarnings("ignore")



import re
import pandas as pd 
import numpy as np
import itertools
import cx_Oracle
from sqlalchemy import create_engine



class control_db():
    '''
    控制数据库
    '''
    def __init__(self,str_link):
        self.erSql = open('./error_sql.txt','w',encoding='utf-8')
        self.str_link = str_link
        if self.str_link:
            self.connect(str_link)
        self.sql_cnt = itertools.count(1)


    def __del__(self):
        self.erSql.close()

        if self.str_link:
            self.curs.close()

            self.conn.commit()
            self.conn.close()


    def connect(self,str_link):
        # 链接数据库
        str_ls = re.split('[/@:]',str_link)
        
        uname = str_ls[0]
        pwd = str_ls[1]
        ip = str_ls[2]
        port = str_ls[3]
        tnsname = str_ls[4]

        dsnStr = cx_Oracle.makedsn(ip, port, service_name=tnsname)      
        self.conn = cx_Oracle.connect(uname, pwd, dsnStr)
        self.curs = self.conn.cursor()
        # self.conn.commit() 
        self.engine = create_engine('oracle://%s:%s@%s' %(uname, pwd, dsnStr), encoding='utf-8')       


    def exeSql(self,sql):
        try:
            self.curs.execute(sql)
            if mod(next(self.sql_cnt),5000) == 0:
                self.conn.commit()
        except Exception as e:
            print('Error:',repr(e),sql)


    def exeSqls(self,sqls,log=False):
        # 执行sql语句列表,写错误文件
        sqls = [sql.split() for sql in sql.split(';')]
        for sql in sqls:
            self.exeSql(sql,log)


    def select(self,sql):
        ret = pd.read_sql(sql,con=self.engine);
        print (ret);
        return ret;


    def exeFile(self,file,encoding='utf-8'):
        with open(file,'r',encoding=encoding) as file:
            sql = file.read()
        sqls = [sql.strip() for sql in re.split(';+',sql)[:-1]]
        self.exeSqls(sqls)


    def getTbls(self,user):
        '''
        取用户的所有表名
        '''
        sql = ''' SELECT TABLE_NAME FROM ALL_TABLES WHERE OWNER = '%s' '''%user
        tbls = pd.read_sql(sql,con=self.engine)['table_name'].to_list()
        return tbls


    def getCols(self,tbname):
        '''
        获取表的列名
        '''
        sql = ''' SELECT COLUMN_NAME FROM USER_COL_COMMENTS WHERE TABLE_NAME = '%s' '''%tbname
        cols = pd.read_sql(sql,con=self.engine)['column_name'].to_list()
        return cols        
      

    def getcols2(self,sql,synonym):
        # 获取列名,加上表别名显示
        ls  = re.split(r'[,,]',synonym)
        left = ls[0]
        right = ls[1]   

        tbl = pd.read_sql(sql,con=self.engine);
        if left and right:
            ret = '\n,'.join(['%s.%s = %s.%s'%(left,name,right,name) for name in tbl.columns])
        elif left:
            ret = '\n,'.join(['%s.%s '%(left,name) for name in tbl.columns])
        else:
            ret = '\n,'.join(tbl.columns)
        ## ret = ret+'\n\n'
        return ret


    def dropTbls(self,tbls,ktbls=[]):
        '''
        批量删除表
        '''
        sqls = ['DROP TABLE "%s"'%tbname for tbname in tbls if tbname not in ktbls]
        self.exeSqls(sqls)


    def addDatPartition(self,tbname,fmt='PT_%Y%m'):
        '''
        对按月分区的表增加月分区到当前日期
        '''
        # 读取最大分区名,按月份
        sql = 'select HIGH_VALUE from user_tab_partitions where table_name = \'%s\''%tbname
        maxval = pd.read_sql(sql,con=self.engine)['high_value'].sort_values(ascending=False).iloc[0]
        maxdate = pd.Timestamp(re.search(r'(\d+-\d+-\d+)',maxval).group(1))+DateOffset(months=1)

        # 最大时间到现在的序列
        opt_df = pd.DataFrame(pd.period_range(start=maxdate, end=pd.Timestamp.now(),freq='M')[1:].rename('high_value'))
        if opt_df.empty:
            print('不需要增加分区')
            return -1            
        opt_df['high_value'] =  opt_df['high_value'].map(lambda x:x.start_time)
        opt_df['pt_name'] = opt_df['high_value'].map(lambda x:x.strftime(fmt))
        opt_df['high_value'] = (opt_df['high_value']+DateOffset(months=1)).astype(str)

        # 生成sql语句
        sql = ''' ALTER TABLE {tbname} ADD PARTITION {pt_name} VALUES LESS THAN (TO_DATE('{high_value}','yyyy-mm-dd')) '''
        opt_df['sql'] = ''
        for idx,row in opt_df.iterrows():
            opt_df.loc[idx,'sql'] = sql.format(
                tbname = tbname,
                pt_name = opt_df.loc[idx,'pt_name'],
                high_value = opt_df.loc[idx,'high_value']
            )

        # 执行sql
        self.exeSqls(opt_df['sql'])


    def initDB(self,user,create_path,datpar_tbls,ktbls=[]):
        '''
        初始化数据库
        '''
        print('=== 开始初始化数据库\n')
        # 删除表
        tbls = self.getTbls(user)
        self.dropTbls(tbls,ktbls)
        self.dropTbls(ktbls)

        # 新建表
        self.exeFile(create_path)

        # 加时间分区
        for datpar_tbl in datpar_tbls:
            self.addDatPartition(datpar_tbl)


    def obj2inSql(self,tbname,obj):
        '''
        行转换为插入语句
        '''
        base_sql = ''' INSERT INTO "{tbname}" ({cols}) VALUES ({vals}) '''

        cols = []
        vals = []
        for k,v in obj.items():
            # 字段
            cols.append('"%s"'%k)

            # 值
            v = str(v)
            if (v.upper() == 'NULL') or ('TO_DATE' in v.upper()):
                vals.append(v)
            else:
                vals.append('\'%s\''%v)

        # 组合生成
        cols = ','.join(cols)
        vals = ','.join(vals)
        sql = base_sql.format(tbname=tbname,cols=cols,vals=vals)
        return sql


    def obj2upSql(self,tbname,obj,uids):
        '''
        把行转换为更新语句
        '''
        # base_sql = ''' UPDATE "{tbname}" SET ({cols}) VALUES ({vals}) WHERE {condi} '''
        base_sql = ''' UPDATE "{tbname}" SET {kvals} WHERE {condi} '''

        kvals = []
        for k,v in obj.items():
            if k in uids:
                continue
            if (v.upper() == 'NULL') or ('TO_DATE' in v.upper()):
                kvals.append('"%s" = %s'%(k,v))
            else:
                kvals.append('"%s" = \'%s\''%(k,v))

        # 条件
        condi = []
        for uid in uids:
            c = '"%s" = \'%s\''%(uid,obj.get(uid))
            condi.append(c)

        # 组合生成
        kvals = ','.join(kvals)
        condi = ' and '.join(condi)
        sql = base_sql.format(tbname=tbname,kvals=kvals,condi=condi)
        return sql


    def __fmtFrame(self,frame,fields=None,dat_fields=None,rename=None):
        # 格式化
        def toDate(x):
            try:
                x = x.strftime('%Y-%m-%d') 
            except Exception as e:
                pass
            return x
        if dat_fields:
            frame[dat_fields] = frame[dat_fields].astype(np.datetime64).apply(lambda se:se.map(lambda x:toDate(x)))
            frame[dat_fields] = frame[dat_fields].apply(lambda se:se.map(lambda x:"TO_DATE('%s','yyyy-mm-dd')"%x))
        if fields:
            frame = frame[fields]
        if rename:
            frame = frame.rename(columns=rename)
        frame = frame.replace(['',' ',None,np.nan],'NULL')
        return frame.astype(np.str)


    def frame2inSql(self,tbname,frame,fields=None,dat_fields=None,rename=None):
        '''
        插入数据,可指定字段对齐数据库,用rename更改为数据库的名字
        '''
        # 格式化
        frame = self.__fmtFrame(frame,fields,dat_fields,rename)
        frame['etl_date'] = "to_date('%s','yyyy-mm-dd hh24:mi:ss')"%pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')
        frame.columns = frame.columns.map(lambda x:x.upper())
        
        # 迭代行写出sql
        for idx,row in frame.iterrows():
            sql = self.obj2inSql(tbname,row.to_dict())
            yield sql+';\n'


    def build_model(self,tbls):
        tmps = []
        for tbl in tbls:
            # 读表
            sql = f'select * from {tbl} where rownum < 5'
            df = pd.read_sql(sql,con=self.engine)

            # 判断维度
            dim_list = []
            for col in df.columns:
                if isinstance(df[col][0],np.float) or isinstance(df[col][0],np.int) :
                    continue
                dim_list.append(col)

            # 重组
            cols = df.columns
            tmp = pd.DataFrame(data=1.0,index= cols[~cols.isin(dim_list)],columns = cols[cols.isin(dim_list)])
            tmps.append(tmp)

        # 合并过滤
        result = pd.concat(tmps,axis=0)
        result = result.groupby(result.index).min()
        return result



    def write_out(self,des):
        '''
        把库的表写到文件
        '''
        # 获取表名
        tb_names = pd.read_sql('select table_name from user_tables',con=db.engine).ix[:,0]  

        # 迭代表名
        for tb_name in tb_names:
            # 判断行数
            cnt = pd.read_sql(f'select count(1) from {tb_name}',con=db.engine).ix[0,0]
            if cnt < 5:
                continue    

            # 新建目录
            if not os.path.exists(des):
                os.mkdir(des)  

            # 分块读取
            rts = pd.read_sql(f'select * from {tb_name}',con=db.engine,chunksize=10000) 
            it_nums = itertools.count(1)
            file = open(os.path.join(des,f'{tb_name}.csv'),'w',encoding='utf-8-sig')
            for rt in rts:
                # 写入流,判断是否写字段
                strio = StringIO()
                num = next(it_nums)
                if num == 1:
                    rt.to_csv(strio,index=False,header=True)
                else:
                    rt.to_csv(strio,index=False,header=False)   

                # 写入文件
                rt_str = strio.getvalue().replace('\r\n','\n')
                file.write(rt_str)  

            # 完成一个文件
            file.close()
            print(tb_name,'is save')




if __name__=='__main__':
    dcb = control_db('investment/z123@10.10.10.10:1521/orcl')
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值