我已经设置了一个sqlite3数据库。我填充了一些数据(大约400万条记录~1.2Gb数据)。
然后我做一些查询(选择/删除/更新)。
问题是,有时在插入之后脚本会停止而不会出现错误。有时它会正常运行到最后。
以下是我运行的查询类型:from __future__ import print_function
import sqlite3
import csv
import os
import glob
import sys
import time
db = 'test.db'
conn = sqlite3.connect(db)
conn.text_factory = str # allows utf-8 data to be stored
c = conn.cursor()
i = 0
### traverse the directory and process each .csv file
##print("debug")
csvfile =('/home/Desktop/Untitled Folder/Crimes_-_2001_to_present.csv')
with open(csvfile, "rb") as f:
reader = csv.reader(f)
t = time.time()
header = True
for row in reader:
if header:
# gather column names from the first row of the csv
header = False
sql = "DROP TABLE IF EXISTS test_table"
c.execute(sql)
#print("debug 1")
sql = "CREATE TABLE test_table (ID INTEGER,FBI_Code INTEGER,Updated_On TEXT,District TEXT,Beat INTEGER,Primary_Type TEXT,Location BLOB,Latitude REAL,Arrest INTEGER,Domestic INTEGER,Longitude REAL,Community_Area INTEGER,Case_Number INTEGER,Block TEXT,Location_Description TEXT,Ward INTEGER,IUCR INTEGER,Year INTEGER, Date TEXT,Y_Coordinate INTEGER,Description TEXT,X_Coordinate INTEGER);"
c.execute(sql)
#print("debug 2")
insertsql = "INSERT INTO test_table VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"
rowlen = len(row)
#print("debug 3")
else:
# skip lines that don't have the right number of columns
#print("debug 4")
#if len(row) == rowlen:
#print("debug 5")
try:
c.execute(insertsql, row)
except:
print("problem in row %d" % i)
print(row)
continue
# print("debug 6")
i +=1
if i == 1000:
conn.commit()
#### if i == 4000000:
#### break
## #print (row)
conn.commit()
print('\nTime for Insertions TOTAL~> \n')
print(float( time.time() -t ))
print('\nTime for Insertions per Query~> \n')
print(float( time.time() -t )/i)
del rows
rows = list()
print('\nTime for Selections ~> Domestic\n')
t = time.time()
c.execute("SELECT * FROM test_table WHERE Domestic == 'false'")
rows = c.fetchall()
print(float( time.time() -t ))
print(len(rows))
del rows
rows = list()
print('\nTime for Selections ~> Arrests\n')
t = time.time()
c.execute("SELECT * FROM test_table WHERE Arrest == 'false'")
rows = c.fetchall()
print(float( time.time() -t ))
print(len(rows))
del rows
rows = list()
print('\nTime for Selections ID~> \n')
t = time.time()
c.execute("SELECT * FROM test_table WHERE ID < 9938614")
rows = c.fetchall()
print(float( time.time() -t ))
print(len(rows))
del rows
rows = list()
print('\nTime for Selections ~> Primary_Type\n')
t = time.time()
c.execute("SELECT * FROM test_table WHERE Primary_Type == 'BATTERY'")
rows = c.fetchall()
print(float( time.time() -t ))
print(len(rows))
del rows
rows = list()
print('\nTime for Selections Year~> \n')
t = time.time()
c.execute("SELECT * FROM test_table WHERE Year <= 2014")
rows = c.fetchall()
print(float( time.time() -t ))
print(len(rows))
del rows
rows = []
print('\nTime for Updates ~> YEAR\n')
t = time.time()
c.execute("UPDATE test_table SET Year = '2016' WHERE Year == '2014'")
print(float( time.time() -t ))
print('\nTime for Selections Year~> \n')
t = time.time()
c.execute("SELECT * FROM test_table WHERE Year <= 2014")
rows = c.fetchall()
print(float( time.time() -t ))
print(len(rows))
print('\nTime for DELETIONS ~> Domestic\n')
t = time.time()
c.execute("DELETE FROM test_table WHERE Domestic == 'false'")
rows = c.fetchall()
print(float( time.time() -t ))
print(len(rows))
del rows
c.close()
conn.close()
每次我重新分配行列表时,因为在一些查询之后,我的内存用完了。但我不认为这是问题所在(以防我使用delrows&重新分配它,这样做比较慢)。不管怎样,在这些查询之后,脚本会毫无错误地停止运行,我不知道为什么,因为有些时候它运行正常。在
编辑
我已经包含了上面的代码。问题是,在插入部分之后,当我执行查询时,脚本将被终止,没有任何错误。在
例如一直到这里:...
Time for Selections ~> Arrests
123.231
3928182
然后就结束了。在第一种方法中,我没有删除列表,当我试图重新声明列表时,Cython产生了核心转储错误。现在我删除并声明列表Cython运行正常。我的问题是为什么python没有捕捉到任何异常?在
在重新分配列表之后,垃圾回收器会清除垃圾箱数据(正如我在linux监视器中看到的那样),但它会毫无错误地崩溃。最烦人的是有时它会一直运行到最后。在