1 #-*- coding: utf-8 -*-
2 importos,sys,datetime3 importmysql.connector4 importxlrd5
6 '''
7 the main function to import data8 username: username of mysql database9 password: password for username10 database: a specific database in mysql11 datapath: the absolute path or relative path of data folder12 '''
13 defimportDataHelper(username, password, database, datapath):14 '''import data helper'''
15 '''
16 Step 0: Validate input database parameters17 '''
18 try:19 conn = mysql.connector.connect(user=username, password=password, database=database, use_unicode=True)20 exceptmysql.connector.errors.ProgrammingError as e:21 printe22 return -1
23 '''
24 Step 1: Traverse files in datapath, store file paths and corresponding table names in lists25 lists[0] is the list of files paths26 lists[1] is the list of table names27 '''
28 lists =getFilesList(datapath)29 nfiles =len(lists[0])30 '''
31 Step 2: Store data in mysql via a for-loop32 '''
33 cursor =conn.cursor()34 for file_idx inxrange(0, nfiles):35 file_path =lists[0][file_idx]36 print "processing file(%d/%d):[ %s ]"%(file_idx+1, nfiles, file_path)37 table_name = lists[1][file_idx]38 num =storeData(file_path, table_name, cursor)39 if num >=0:40 print "[ %d ] data have been stored in TABLE:[ %s ]"%(num, table_name)41 conn.commit()42 cursor.close()43 '''
44 Step 3: Close connection45 '''
46 conn.close()47
48 '''
49 get files list in the dir, including the files in its sub-folders50 the return list contain two elements, the first element is a file names list51 and the second element is a table names list(will be used for creating tables in database),52 '''
53 defgetFilesList(dir):54 path_list =[]55 table_list =[]56 file_name_list =os.listdir(dir)57 for file_name infile_name_list:58 path =os.path.join(dir, file_name)59 ifos.path.isdir(path):60 '''get the files in sub folder recursively'''
61 tmp_lists =getFilesList(path)62 path_list.extend(tmp_lists[0])63 table_list.extend(tmp_lists[1])64 else:65 path_list.append(path)66 '''convert file name to mysql table name'''
67 file_name = file_name.split('.')[0] #remove .xls
68 #file_name = file_name.split('from')[0] #remove characters after 'from'
69 file_name = file_name.strip()#remove redundant space at both ends
70 file_name = file_name.replace(' ','_') #replace ' ' with '_'
71 file_name = file_name.replace('-','_') #replace ' ' with '_'
72 file_name = file_name.lower() #convert all characters to lowercase
73 table_list.append(file_name)74 return[path_list, table_list]75
76 '''
77 store the data of file file_path in table table_name78 file_path: file location79 table_name: name of the table that will be created in database80 cursor: a mysql cursor81 '''
82 defstoreData(file_path, table_name, cursor):83 ret =084 '''open an excel file'''
85 file =xlrd.open_workbook(file_path)86 '''get the first sheet'''
87 sheet =file.sheet_by_index(0)88 '''get the number of rows and columns'''
89 nrows =sheet.nrows90 ncols =sheet.ncols91 '''get column names'''
92 col_names =[]93 for i inrange(0, ncols):94 title = sheet.cell(1, i).value95 title =title.strip()96 title = title.replace(' ','_')97 title =title.lower()98 col_names.append(title)99 '''create table in mysql'''
100 sql = 'create table'\101 +table_name+'('\102 +'id int NOT NULL AUTO_INCREMENT PRIMARY KEY,'\103 +'at_company varchar(10) DEFAULT \'821\','
104
105 for i inrange(0, ncols):106 sql = sql + col_names[i] + 'varchar(150)'
107 if i != ncols-1:108 sql += ','
109 sql = sql + ')'
110 try:111 cursor.execute(sql)112 exceptmysql.connector.errors.ProgrammingError as e:113 printe114 #return -1
115
116 '''insert data'''
117 #construct sql statement
118 sql = 'insert into'+table_name+'('
119 for i in range(0, ncols-1):120 sql = sql + col_names[i] + ','
121 sql = sql + col_names[ncols-1]122 sql += ') values ('
123 sql = sql + '%s,'*(ncols-1)124 sql += '%s)'
125 #get parameters
126 parameter_list =[]127 for row in xrange(2, nrows):128 for col inrange(0, ncols):129 cell_type =sheet.cell_type(row, col)130 cell_value =sheet.cell_value(row, col)131 if cell_type ==xlrd.XL_CELL_DATE:132 dt_tuple =xlrd.xldate_as_tuple(cell_value, file.datemode)133 meta_data = str(datetime.datetime(*dt_tuple))134 else:135 meta_data =sheet.cell(row, col).value136 parameter_list.append(meta_data)137 #cursor.execute(sql, parameter_list)
138 try:139 cursor.execute(sql, parameter_list)140 parameter_list =[]141 ret += 1
142 exceptmysql.connector.errors.ProgrammingError as e:143 printe144 #return -1
145 returnret146
147
148
149 if __name__ == "__main__":150 if len(sys.argv)<5:151 print "Missing Parameters"
152 sys.exit()153 elif len(sys.argv)>5:154 print "Too Many Parameters"
155 sys.exit()156 username = sys.argv[1]157 password = sys.argv[2]158 database = sys.argv[3]159 datapath = sys.argv[4]160 importDataHelper(username, password, database, datapath)