问题分析
我的需求
- 我需要打开.xlsx文件进行读写,所使用的库是
openpyxl
,使用时直接用代码openpyxl.load_workbook(path)
打开文件。 - 我希望每个程序跑完能够自动结果写入这个.xlsx文件。
问题
- 这样就产生了问题:资源访问冲突。如果程序A和程序B在写入结果的过程发生了重叠,那么先打开文件的程序才能写入,另一个后打开的就会写入失败。
解决方案
- 当程序B打算写入结果时,如果我知道.xlsx文件是否被打开了,那我就等一段时间再看看能不能写入结果。
- 基于以上想法,尝试一:直接看.xlsx文件是否被打开。但是如上面所说,使用
openpyxl.load_workbook(path)
打开文件,并不能使用下方tools.py
的代码检测到它是否被打开。(可能和open(file)
不同) - 尝试二:准备一个
lock.txt
文件,用作是锁。当我需要访问.xlsx文件时,先open("lock.txt","w")
占用着;当我结束使用.xlsx文件时,再close()
释放lock.txt
(这个思路和操作系统中学到的进程锁是相同的)。还有一个问题需要考虑,当发现lock.txt
被使用时,我要等多少时间再次申请打开呢?这里借鉴了计算机网络中CDMA/CD
的思想。第N次申请,申请失败就等待 m a x ( r a n d o m ( 2 N − 1 , 2 N ) , M A X _ V A L U E ) max(random(2^{N-1},2^{N}), MAX\_VALUE) max(random(2N−1,2N),MAX_VALUE).(可以看看代码~)
代码
# tools.py
import os
class Usefile(object):
# 资源锁
def __init__(self, file_path):
if not os.path.exists(file_path):
raise OSError('{file_path} not exist'.format(file_path=file_path))
self.file_path = os.path.abspath(file_path)
self.ob = None
def start(self):
# 开始对资源访问,打开锁
self.ob = open(self.file_path, "w")
def close(self):
# 结束对资源访问,释放锁
self.ob.close()
def status(self):
# 检查指定文件是否正在被使用(打开)
open_fd_list = self.__get_all_fd()
open_count = len(open_fd_list)
is_opened = False
if open_count > 0:
is_opened = True
return is_opened
def __get_all_pid(self):
"""获取当前所有进程"""
return [_i for _i in os.listdir('/proc') if _i.isdigit()]
def __get_all_fd(self):
"""获取所有已经打开该文件的fd路径"""
all_fd = []
for pid in self.__get_all_pid():
_fd_dir = '/proc/{pid}/fd'.format(pid=pid)
if os.access(_fd_dir, os.R_OK) == False:
continue
for fd in os.listdir(_fd_dir):
fd_path = os.path.join(_fd_dir, fd)
if os.path.exists(fd_path) and os.readlink(fd_path) == self.file_path:
all_fd.append(fd_path)
return all_fd
# main.py
import os
import openpyxl
import datetime
import time
import random
from tools import Usefile
# 写入xlsx文件
def write(data_name, method, ratio, learning_rate, batch_size, opt, result, path="record.xlsx"):
# dataset
if not os.path.isfile(path):
wb = openpyxl.Workbook()
wb.save(path)
wb = openpyxl.load_workbook(path)
sheetnames = wb.sheetnames
col_names = ["method", "ratio"]
base_col_num = len(col_names)
if data_name == "wiki":
col_names += ["acc", "pre", "recall", "f1"]
else:
raise ValueError("data_name has no col_names!")
col_names += ["learning_rate", "batch_size", "opt", "valid_time"]
if data_name not in sheetnames:
ws = wb.create_sheet(data_name)
# write into the first row
for i in range(0, len(col_names)):
ws["%s1" % chr(ord('A') + i)] = col_names[i]
# get dataset sheet
ws = wb[data_name]
row_id = 2 # data is start at the second row of ws
# find the row id which can insert new data
while True:
method_had = ws["A%d" % row_id].value
if method_had is None:
break
row_id += 1
valid_time = datetime.datetime.now()
writing_list = [method, ratio] + result + [learning_rate, batch_size, opt, valid_time]
# write the new data
for i in range(0, len(col_names)):
ws["%s%d" % (chr(ord('A') + i), row_id)] = writing_list[i]
wb.save(filename=path)
if __name__ == '__main__':
lockfile = Usefile('lock.txt')
# 申请资源访问
if lockfile.status() is False:
#
cot = 0
MAX_TIME = 25
while lockfile.status() is False:
wait_time = random.randint(2 ** cot, 2 ** (cot + 1))
time.sleep(wait_time)
cot += 1
# 占用锁
lockfile.start()
# 具体操作(测试)
write("wiki", "avsav", 0.1, 0.0001, -1000, "asdsad", [1, 2, 3, 4], path="record.xlsx")
# 释放锁
lockfile.close()
参考: