问题场景
业务需要对网页进行PDF转换,用到了python的pdfkit模块。因报告是成批下来,每批几百例需要转换,为了加快报告生成效率,python里采用了多线程写法。笔者语言是java,所以利用了**Runtime.getRuntime().exec(cmd)**进行调用。
环境:jdk8, python 2.7, idea
CODE
mul_result.py 写法一:线程太多,容易OOM
import threading
import ast
import pdfkit
import sys
import getopt
import codecs
class MyThread(threading.Thread):
def __init__(self, func, args=()):
super(MyThread, self).__init__()
self.func = func
self.args = args
def run(self):
self.result = self.func(*self.args)
def get_result(self):
try:
return self.result
except Exception:
return None
def createpdf(url, path):
options = {
'margin-top': '0in',
'margin-right': '0in',
'margin-bottom': '0in',
'margin-left': '0in',
'encoding': "UTF-8",
'javascript-delay': '60000',
}
num = 0
compete = pdfkit.from_url(url, path, options=options)
if compete:
num = 1
return num
if __name__ == '__main__':
# Get the parameter list
parameterList = sys.argv[1:]
url = ''
file_name = ''
opts, args = getopt.getopt(parameterList, "u:f:", ['url=', 'file_name='])
for opt, arg in opts:
if opt in ("-u", "--url"):
url = arg
elif opt in ("-f", "--file_name"):
file_name = arg
# print('url:', url)
# print('file_name:', file_name)
# 参数太长,Process传参有32K限制,故把参数写到文件里再读出来用,机智有木有
sample_map = {}
f = codecs.open(filename=file_name, mode="r+", encoding='utf-8')
lines = f.readlines()
sample_map_string = ''
for line in lines:
sample_map_string = line
break
sample_map = ast.literal_eval(sample_map_string)
# print('sample_map:', sample_map)
li = []
for i in sample_map.keys():
t = MyThread(createpdf, args=(url + '?sample_sn=%s' % i, sample_map.get(i)))
li.append(t)
t.start()
compete_num = 0
for t in li:
t.join()
compete_num = t.get_result() + compete_num
print "completed:" + bytes(compete_num)
mul_result.py 写法二:开启固定线程数,队列阻塞调用,不会OOM
import threading
import ast
from Queue import Queue
import pdfkit
import sys
import getopt
import codecs
class MyThread(threading.Thread):
def __init__(self, q):
super(MyThread, self).__init__()
self.q = q
def run(self):
while True:
url_path = self.q.get()
url_in = url_path[0]
path = url_path[1]
createpdf(url=url_in, path=path)
self.q.task_done()
def createpdf(url, path):
options = {
'margin-top': '0in',
'margin-right': '0in',
'margin-bottom': '0in',
'margin-left': '0in',
'encoding': "UTF-8",
'javascript-delay': '200000',
}
num = 0
compete = pdfkit.from_url(url, path, options=options)
if compete:
num = 1
return num
if __name__ == '__main__':
parameterList = sys.argv[1:]
url = ''
file_name = ''
opts, args = getopt.getopt(parameterList, "u:f:", ['url=', 'file_name='])
for opt, arg in opts:
if opt in ("-u", "--url"):
url = arg
elif opt in ("-f", "--file_name"):
file_name = arg
# print('url:', url)
# print('file_name:', file_name)
sample_map = {}
f = codecs.open(filename=file_name, mode="r+", encoding='utf-8')
lines = f.readlines()
sample_map_string = ''
for line in lines:
sample_map_string = line
break
sample_map = ast.literal_eval(sample_map_string)
queue = Queue()
for x in range(4):
worker = MyThread(queue)
worker.daemon = True
worker.start()
for i in sample_map.keys():
url_path_list = [url + '?sample_sn=%s' % i, sample_map.get(i)]
queue.put(url_path_list)
queue.join()
print "completed:" + bytes(len(sample_map))
/**
* 批量生成报告
* @param map
* @param productMap
* @param sampleList
* @param storeDir
* @param url
* @param pyPre
* @param uuid
* @param storePrefix
* @return
*/
public static int createFor(Map<String, String> map, Map<String, String> productMap,
List<String> sampleList, String storeDir, String url, String pyPre, String uuid, String storePrefix) {
String date = DateUtil.date2Str(new Date());
StringBuilder pathTemp = new StringBuilder("");
String companyId;
String productCode;
String cmd;
int sum = 0;
Map<String, String> sampleMap = new LinkedHashMap<>(sampleList.size());
String paraFileName;
try {
String path;
for (String sampleCode : sampleList) {
companyId = map.get(sampleCode);
productCode = productMap.get(sampleCode);
pathTemp.append(storeDir).append(date).append("-").append(uuid).append(File.separator).append(companyId).append(File.separator).append(productCode);
path = pathTemp.toString();
pathTemp.setLength(0);
File file = new File(path);
if (!file.exists()) {
file.mkdirs();
}
path += File.separator + sampleCode + "-" + productCode + ".pdf";
path = path.replace("\\", "/");
sampleMap.put(sampleCode, path);
}
paraFileName = storePrefix + DateUtil.date2Str(new Date()) + "-" + EncryUtil.getUUID() + ".txt";
boolean success = writeMapFile(sampleMap, paraFileName);
if (success) {
cmd = "python " + pyPre + "mul_result.py -u " + url + " -f " + paraFileName;
Process pr = Runtime.getRuntime().exec(cmd);
BufferedReader in = new BufferedReader(new InputStreamReader(pr.getInputStream()));
String result = null;
String line;
while ((line = in.readLine()) != null) {
result = line;
System.out.println(result);
log.info("creating: {}", result);
}
if (result != null && result.contains("completed:")) {
sum = Integer.parseInt(result.split(":")[1]);
}
in.close();
pr.waitFor();
}
} catch (Exception e) {
e.printStackTrace();
}
return sum;
}
/**
* map写进文件里
* // a = {'a':'hangge','b':'man','school':'wust'}
* @param sampleMap
* @param paraFileName
* @return
*/
public static boolean writeMapFile(Map<String, String> sampleMap, String paraFileName) {
boolean res = false;
BufferedWriter bw = null;
try {
File file = new File(paraFileName);
if (!file.exists()) {
CommonUtil.createFile(paraFileName);
}
bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(paraFileName)));
if (sampleMap.size() > 0) {
bw.write('{');
int index = 0;
for (String key : sampleMap.keySet()) {
bw.write('\'');
bw.write(key);
bw.write('\'');
bw.write(':');
bw.write('\'');
bw.write(sampleMap.get(key));
bw.write('\'');
if (index < sampleMap.size() - 1) {
bw.write(',');
}
index++;
}
bw.write('}');
res = true;
}
} catch (Exception e) {
e.printStackTrace();
}try {
if (bw != null) {
bw.close();
}
} catch (IOException e) {
e.printStackTrace();
}
return res;
}