orecal python_用 Python开发Oracle入门-CSDN博客

# -*- coding: utf-8 -*-

"""Created on Mon Sep 18 19:20:24 2017@author: zimuliu"""

import cx_Oracle, csv, os, sys, time, math

import pandas as pd

"""create oracle db class"""

class oracle_db(object):

def __init__(self, db_info):

self.db_info = db_info

self.connection = None

"""build datebase connection"""

def connect(self):

self.connection = cx_Oracle.connect(self.db_info)

"""export oracle table to csv file"""

def oracle_to_csv(self, table_name, output_path, max_rows, file_split = 1):

# record the start time

run_time = time.time()

# set output path

os.chdir(output_path)

# build datebase connection

self.connect()

# select all the data

sql = "select * from " + table_name

curs_rows = self.connection.cursor()

curs_rows.execute(sql)

# get count of rows

sql = "select count(*) from " + table_name

curs_cnt = self.connection.cursor()

curs_cnt.execute(sql)

n = curs_cnt.fetchone()[0]

if max_rows: n = min(n, max_rows)

# get rows of each split file, as least 1

rows_split = math.ceil(n / file_split)

if rows_split * (file_split - 1) > n:

rows_split = max(1, (rows_split - 1))

# insert column names into csv file

# description includes: column name, column type, display size,

# internal size, precision, scale and whether null is possible

table_info = curs_rows.description

col_names = tuple(x[0] for x in table_info)

# insert rows into csv file

j = 1

for i, row_data in enumerate(curs_rows):

if i == n:

break

elif i % rows_split == 0 and j <= file_split:

# create and open a CSV file split

# the extra rows will be insert into the last file

# because no more file will be create when j == file_split

csv_file_dest = table_name + "_" + str(j) + ".csv"

outputFile = open(csv_file_dest,'w', encoding = 'utf-8', newline = '')

output = csv.writer(outputFile, dialect = 'excel', delimiter = '|')

output.writerow(col_names)

j += 1

output.writerow(row_data)

sys.stdout.write("%s: Writing%drows of%drows to file\n" \

% (table_name, i + 1, n))

sys.stdout.flush()

# close file, cursor and database connection

if 'outputFile' in locals(): outputFile.close()

curs_rows.close()

self.connection.close()

# calculate the runtime

run_time = time.time() - run_time

time_units = ['seconds', 'minutes', 'hours']

time_ratio = min(2, max(0, math.floor(math.log(run_time, 60))))

time_unit = time_units[time_ratio]

run_time = run_time / math.pow(60, time_ratio)

print("Table%shas been exported in%.2f%s!\n" % \

(table_name, run_time, time_unit))

"""export oracle table columns data type to csv file"""

def oracle_dtype_to_csv(self, table_name, output_path):

# set output path

os.chdir(output_path)

# build datebase connection

self.connect()

# select all the data

sql = "select table_name,column_name,data_type from user_tab_columns\where table_name = '%s'" % table_name

table_dtype = pd.read_sql(sql, self.connection)

table_dtype.to_csv(output_path + '/' + table_name + "_dtype.csv")

# close database connection

self.connection.close()

"""get oracle all the table names"""

def get_table_names(self):

# build datebase connection

self.connect()

# select all the table names

sql = "select table_name from user_tables"

curs = self.connection.cursor()

curs.execute(sql)

table_names = set(x[0] for x in curs.fetchall())

# close cursor and database connection

curs.close()

self.connection.close()

return table_names

"""get table sizes"""

def get_table_sizes(self, table_names):

table_names_str = "('" + "','".join(table_names) + "')"

# build datebase connection

self.connect()

# select all the table names

sql = """select a.segment_name, a.tablespace_name, cast(sum(a.bytes)/1024/1024 as float) MB, cast(sum(a.bytes)/1024/1024/1024 as float) GBfrom(select segment_name, tablespace_name, bytesfrom user_segmentswhere segment_name in """ + table_names_str + """) agroup by a.segment_name, a.tablespace_name"""

table_sizes = pd.read_sql(sql, self.connection)

# close database connection

self.connection.close()

return table_sizes