在 Django 应用中导入一个城市列表的 CSV 文件时,导入速度会随着插入数据的增加而变慢。最初的 25,000 行数据可以在 5 分钟内导入,但接下来的 25,000 行数据却需要 2 个小时。即使重新启动导入,问题仍然存在。
2、解决方案
要解决此问题,可以在代码中使用批量插入操作。以下是优化后的代码示例:
def add_country(isocode, name, created_by, changed_by, country_list):
country_list.append(Country(name=name.strip().replace('"', ''), isocode=isocode.strip()))
def add_state(country, isocode, name, statetype, created_by, changed_by, state_list):
country_model = Country.objects.get(isocode=country.strip().lower())
state_list.append(State(name=name.strip().replace('"', ''), isocode=isocode.strip().lower().replace('"', ''), country=country_model, statetype=statetype.strip().replace('"', '')))
def add_city(country, state, name, created_by, changed_by, city_list):
country_model = Country.objects.get(isocode=country.strip().lower().replace('"', ''))
try:
state_model = State.objects.get(name=state.strip().replace('"', ''), country=country_model)
except State.DoesNotExist:
state_model = None
city_list.append(City(name=name.strip().replace('"', ''), state=state_model, postcode=''))
country_list = []
state_list = []
city_list = []
print("Countries")
print(time.strftime("%H:%M:%S"))
with open('country.csv', 'rb') as csvfile:
myreader = csv.reader(csvfile, delimiter=',', quotechar='"')
for counrow in myreader:
add_country(counrow[0], counrow[1], adminuser, adminuser, country_list)
Country.objects.bulk_create(country_list)
print("States")
print(time.strftime("%H:%M:%S"))
with open('state.csv', 'rb') as csvfile:
myreader = csv.reader(csvfile, delimiter=',', quotechar='"')
for counrow in myreader:
add_state(counrow[0], counrow[1], counrow[2], counrow[3], adminuser, adminuser, state_list)
State.objects.bulk_create(state_list)
print("Cities 1")
print(time.strftime("%H:%M:%S"))
with open('city1.csv', 'rb') as csvfile:
myreader = csv.reader(csvfile, delimiter=',', quotechar='"')
for counrow in myreader:
add_city(counrow[0], counrow[1], counrow[2], adminuser, adminuser, city_list)
City.objects.bulk_create(city_list)
print("Cities 2")
print(time.strftime("%H:%M:%S"))
city_list = []
with open('city2.csv', 'rb') as csvfile:
myreader = csv.reader(csvfile, delimiter=',', quotechar='"')
for counrow in myreader:
add_city(counrow[0], counrow[1], counrow[2], adminuser, adminuser, city_list)
City.objects.bulk_create(city_list)
print("Cities 3")
print(time.strftime("%H:%M:%S"))
city_list = []
with open('city3.csv', 'rb') as csvfile:
myreader = csv.reader(csvfile, delimiter=',', quotechar='"')
for counrow in myreader:
add_city(counrow[0], counrow[1], counrow[2], adminuser, adminuser, city_list)
City.objects.bulk_create(city_list)
在优化后的代码中,使用了 bulk_create()
方法来批量插入数据。这种方法比逐行插入数据要快得多,因此可以显著提高导入速度。
此外,在代码中还使用了 time.strftime()
函数来记录导入每个文件的开始时间和结束时间,以便更好地观察导入速度的变化。