In this Passage, we will talk about the ThreadPool for both Python and Java. We will see the basic usage of them but we will not talk about the detail of them.
1. Python concurrent.futures
We will see an example directly used when I worked with.
a. create Job.py for jobs in your ThreadPool
import traceback
import json
import re
import JIRAapi
from Logger import logging
import xlwt
import SVNapi
import githubapi
from concurrent import futures
class Job:
def __init__(self,project,projectSection,sheet):
self.i = 1
self.counter_lock = threading.Lock()
self.project = project
self.projectSection = projectSection
self.sheet = sheet
self.futureset = set()
self.pool = futures.ThreadPoolExecutor()
self.pattern = re.compile(r'(EE|AYT|AS|ADO|AMP|JET|SEC|XAF|APS|ADR|AO|SAF|BIQ|BTS|CAL|CRM|CAP|CDP|CMSD|CR|CCL|CSS|CCD|CLD|CM|COKM|COSD|CO|PROJOPS|CORCA|COSR|UI|CMP|SL|CTM|CRF|XPI|CCT|DCF|ENGSERV|DEVIT|DBPERF|DAP|EGL|ECT|EDU|BUILD|DBREVIEW|ENGPROC|ESS|EAP|EN|FEDD|GRR|GE|TGM|HCPEXT|HMP|HMG|INT|IMP|IO|JAT|KB|KM|LANG|LRN|LESC|MOB|MTR|NCC|NGP|OBX|ONB|OWFP|PARTNEROPS|PTCH|PAY|PLT|PTK|PMT|PMR|PMU|TRVW|PDZ|PE|REC|PROV|AUT|RTO|RCM|RMK|RMKIT|RP|RPG|RPI|SFRE|RMDA|RPT|RNR|COPS|SAASOPS|SAS|MSS|SM|SRSD|SR|SHPT|SMT|SMB|CUB|SCO|SFT|SCM|STE|SML|SP|TCR|TFT|TD|TLS|UXN|VRP|VONE|API|WFP|WFA|WST|CALC)-[0-9]{1,7}')
def processSection(self,section):
url = "{}/src/main/java/com/successfactors/{}".format(self.projectSection,section)
packageList = githubapi.githubContent(self.project,url)
self.iteration(packageList)
try:
for future in futures.as_completed(self.futureset):
err = future.exception()
if err is not None:
raise err
exit()
except KeyboardInterrupt:
print("stopped by hand")
self.writeExcel(0,1,"CLASSPATH")
self.writeExcel(0,2,"COMMENT")
self.writeExcel(0,3,"TICKET")
self.writeExcel(0,4,"PROJECTNAME")
self.writeExcel(0,5,"PROJECTID")
self.writeExcel(0,6,"COMPONENTSNAME")
self.writeExcel(0,7,"COMPONENTSID")
def processMyself(self):
url = "{}/src/main/java/com/successfactors".format(self.projectSection)
packageList = githubapi.githubContent(self.project,url)
self.iteration(packageList)
try:
for future in futures.as_completed(self.futureset):
err = future.exception()
if err is not None:
raise err
exit()
except KeyboardInterrupt:
print("stopped by hand")
self.writeExcel(0,1,"CLASSPATH")
self.writeExcel(0,2,"COMMENT")
self.writeExcel(0,3,"TICKET")
self.writeExcel(0,4,"PROJECTNAME")
self.writeExcel(0,5,"PROJECTID")
self.writeExcel(0,6,"COMPONENTSNAME")
self.writeExcel(0,7,"COMPONENTSID")
def iteration(self,packageList):
if isinstance(packageList,dict):
return
for package in packageList:
path = package["path"]
if(path.endswith(".java")):
logging.info("[Start INFO]"+path + " in process")
future = self.pool.submit(self.processJava,path)
self.futureset.add(future)
continue
self.iteration(githubapi.githubContent(self.project,path))
def writeExcel(self,row,column,data):
self.sheet.write(row,column,data)
def processJava(self,path):
try:
if self.counter_lock.acquire():
n = self.i
self.i = self.i + 1
self.counter_lock.release()
historyList = githubapi.githubHistory(self.project,path)
for x in range(len(historyList)-1, -1, -1):
history = historyList[x]
commitInfo = history["commit"]["message"]
if "based on" in commitInfo and commitInfo.startswith("Create modules"):
match = SVNapi.getTicket(path,self.project)
if match:
commitInfo = match.string
self.writeExcel(n, 3 , match.group())
jiraInfo = JIRAapi.getProjectandComponetByKEY(match.group())
if jiraInfo:
self.writeExcel(n, 4 , jiraInfo["project"]["name"])
self.writeExcel(n, 5 , jiraInfo["project"]["id"])
componentNamestr = ""
componentIDstr = ""
for component in jiraInfo["components"]:
componentNamestr = componentNamestr + component["name"]
componentIDstr = componentIDstr + component["id"]
self.writeExcel(n, 6 , componentNamestr)
self.writeExcel(n, 7 , componentIDstr)
else:
logging.warning("[WARNING INFO] " + path + " -------- COMMENT:" + commitInfo)
self.writeExcel(n, 4 ,"INVALID ID")
break
else:
commitInfo = SVNapi.getFirstHistory(path,self.project)
match = re.search(self.pattern,commitInfo)
if match:
self.writeExcel(n, 3 , match.group())
jiraInfo = JIRAapi.getProjectandComponetByKEY(match.group())
if jiraInfo:
self.writeExcel(n, 4 , jiraInfo["project"]["name"])
self.writeExcel(n, 5 , jiraInfo["project"]["id"])
componentNamestr = ""
componentIDstr = ""
for component in jiraInfo["components"]:
componentNamestr = componentNamestr + component["name"]
componentIDstr = componentIDstr + component["id"]
self.writeExcel(n, 6 , componentNamestr)
self.writeExcel(n, 7 , componentIDstr)
else:
logging.warning("[WARNING INFO] " + path + " -------- COMMENT:" + commitInfo)
self.writeExcel(n, 4 ,"INVALID ID")
break
else:
continue
self.writeExcel(n, 1 , path)
self.writeExcel(n, 2, commitInfo)
logging.info("[Finish INFO] " + path + " -------- COMMENT:" + commitInfo)
if not match:
logging.info("[NO MATCH] No jira ticket match on path" + path)
self.writeExcel(n, 3 , "NULL")
except Exception, e:
logging.error("[[[ERROR!!!!!!!!!!!!!!!!!!!!!!!! :" + path + "]]]" + "\n" + traceback.format_exc())
The logic of this code content is a bit complex, which we will not talk about too much on it. But we noticed that we create a threading.Lock() in the construction of this class. This is a lock to ensure that when the ThreadPool execute. Different threads will not make change to the same variable at the same time.
b. the analysis of this code :
self.counter_lock = threading.Lock()
self.futureset = set()
self.pool = futures.ThreadPoolExecutor()
These three properties are the most important for the future package. And the variable futureset is used to store all the future return value when the thread begins to execute.
c. examples of how it return :
When you need to run a threading pool
1. create a threadpool
self.pool = futures.ThreadPoolExecutor()
write a function to tell the threadpool to execute which function
def task(self, param1, param2): # Param1 and Param2 are defined by your function
submit the function and collect the return value
future = self.pool.submit(self.task,param1, param2)
self.futureset.add(future)
Noticed that the variable future will contain the function’s return value.
- get the sync task done
try:
for future in futures.as_completed(self.futureset):
err = future.exception()
if err is not None:
raise err
exit()
except KeyboardInterrupt:
print("stopped by hand")
When we call futures.as_completed(self.futureset)
The program will stop and wait until all the tasks in the threadpool are done and return us the future value
- more info
For more info on python future package
This document will help:
http://xiaorui.cc/2014/11/15/%E4%BD%BF%E7%94%A8python%E7%9A%84%E4%B8%8A%E5%B1%82%E5%B0%81%E8%A3%85%E5%B9%B6%E5%8F%91%E5%BA%93concurrent-futures%E5%AE%9E%E7%8E%B0%E5%BC%82%E6%AD%A5/
Java threadpool
In my program, I only used newCachedThreadPool.
Creates a thread pool that creates new threads as needed, but will reuse previously constructed threads when they are available. These pools will typically improve the performance of programs that execute many short-lived asynchronous tasks. Calls to execute will reuse previously constructed threads if available. If no existing thread is available, a new thread will be created and added to the pool. Threads that have not been used for sixty seconds are terminated and removed from the cache. Thus, a pool that remains idle for long enough will not consume any resources. Note that pools with similar properties but different details (for example, timeout parameters) may be created using ThreadPoolExecutor constructors.
This is like the most used pool.
usage
- Let’s take a look at a product method
public JiraInfoVO getJiraInfoMutl(String location) {
ExecutorService cachedThreadPool = Executors.newCachedThreadPool();
JiraInfoVO jiraInfoVO = new JiraInfoVO();
String projectName = location.split("/")[2];
String projectPath = location.substring(location.indexOf(location.split("/")[3]));
GitCommitGraphBean gitCommitBean = GithubGraphqlAPI.getClassHistoryComments(projectName, projectPath);
JsonObject countMapping = new JsonObject();
List<RelatedIssue> relatedIssues = new ArrayList<RelatedIssue>();
HashMap<String, String> componentMap = new HashMap<String, String>();
List<IssueSearchBean> issueSearchBeans = new ArrayList<IssueSearchBean>();
List<Future<IssueSearchBean>> blockingQueue = new ArrayList<Future<IssueSearchBean>>();
List<String> relatedTicketNumbers = new ArrayList<String>();
try {
for (int i =
gitCommitBean.getData().getRepository().getRef().getTarget().getHistory().getEdges().size() - 1;
i >= 0; i--) {
GitCommitGraphBean.DataEntity.RepositoryEntity.RefEntity.TargetEntity.HistoryEntity.EdgesEntity edgesEntity = gitCommitBean
.getData().getRepository().getRef().getTarget().getHistory().getEdges().get(i);
String message = edgesEntity.getNode().getMessage();
Matcher m = pattern.matcher(message);
if (m.find()) {
String ticketNumber = m.group();
JiraGetTask jiraGetTask = new JiraGetTask(ticketNumber);
Future<IssueSearchBean> future = cachedThreadPool.submit(jiraGetTask);
blockingQueue.add(future);
logger.info("Find ticketNumber" + ticketNumber);
}
}
cachedThreadPool.shutdown();
logger.info("Blockingqueue size" + blockingQueue.size());
for (int i = 0; i < blockingQueue.size(); i++) {
issueSearchBeans.add(blockingQueue.get(i).get());
}
} catch (Exception e) {
}
for (int i = 0; i < issueSearchBeans.size(); i++) {
logger.info("issue search size: " + issueSearchBeans.size());
IssueSearchBean issueBean = issueSearchBeans.get(i);
//Create related Issue
if (relatedIssues.size() <= 5) {
if (!relatedTicketNumbers.contains(issueBean.getKey())) {
relatedTicketNumbers.add(issueBean.getKey());
RelatedIssue relatedIssue = new RelatedIssue();
relatedIssue.setKey(issueBean.getKey());
relatedIssue.setSummary(issueBean.getFields().getSummary());
relatedIssues.add(relatedIssue);
}
}
String project = issueBean.getFields().getProject().getName();
logger.info("Find " + project + " in " + issueBean.getFields().getSummary());
if (countMapping.has(project)) {
countMapping.getAsJsonObject(project)
.addProperty("counting", countMapping.getAsJsonObject(project).get("counting").getAsInt() + 1);
JsonObject components = countMapping.getAsJsonObject(project).getAsJsonObject("components");
for (int j = 0; j < issueBean.getFields().getComponents().size(); j++) {
IssueSearchBean.FieldsEntity.ComponentsEntity componentsEntity = issueBean.getFields()
.getComponents()
.get(j);
componentMap.put(componentsEntity.getName(), componentsEntity.getId());
if (components.has(componentsEntity.getName() + "")) {
components.addProperty(componentsEntity.getName() + "",
Integer.parseInt(components.get(componentsEntity.getName() + "").toString()) + 1);
} else
components.addProperty(componentsEntity.getName() + "", 1);
}
} else {
JsonObject child = new JsonObject();
child.addProperty("counting", 1);
JsonObject components = new JsonObject();
for (int j = 0; j < issueBean.getFields().getComponents().size(); j++) {
IssueSearchBean.FieldsEntity.ComponentsEntity componentsEntity = issueBean.getFields()
.getComponents()
.get(j);
components.addProperty(componentsEntity.getName() + "", 1);
}
child.add("components", components);
countMapping.add(project, child);
}
}
String suggestProject = "";
String suggestComponent = "";
List<TicketTypeVO> ticketTypeVOS = new ArrayList<TicketTypeVO>();
Iterator<Map.Entry<String, JsonElement>> iterator = countMapping.entrySet().iterator();
int max = -1;
while (iterator.hasNext()) {
TicketTypeVO ticketTypeVO = new TicketTypeVO();
Map.Entry<String, JsonElement> re = iterator.next();
int counting = Integer.parseInt(re.getValue().getAsJsonObject().get("counting").toString());
ticketTypeVO.setCount(counting + "");
ticketTypeVO.setName(re.getKey());
ArrayList<TicketComponentVO> ticketComponentVOS = new ArrayList<TicketComponentVO>();
Iterator<Map.Entry<String, JsonElement>> componentIterator = countMapping.getAsJsonObject(re.getKey())
.getAsJsonObject("components").entrySet().iterator();
while (componentIterator.hasNext()) {
Map.Entry<String, JsonElement> comre = componentIterator.next();
int comcounting = Integer.parseInt(comre.getValue().toString());
TicketComponentVO ticketComponentVO = new TicketComponentVO();
ticketComponentVO.setComponentName(comre.getKey());
ticketComponentVO.setCount(comcounting + "");
ticketComponentVOS.add(ticketComponentVO);
}
ticketTypeVO.setTicketComponentVOS(ticketComponentVOS);
ticketTypeVOS.add(ticketTypeVO);
if (counting > max) {
max = counting;
suggestProject = re.getKey();
}
}
Iterator<Map.Entry<String, JsonElement>> componentIterator = countMapping.getAsJsonObject(suggestProject)
.getAsJsonObject("components").entrySet().iterator();
max = -1;
while (componentIterator.hasNext()) {
Map.Entry<String, JsonElement> re = componentIterator.next();
int counting = Integer.parseInt(re.getValue().toString());
if (counting > max) {
max = counting;
suggestComponent = re.getKey();
}
}
String componentId = componentMap.get(suggestComponent);
ComponentBean componentBean = JiraRESTAPI.getTeamOwnerByComponentId(componentId);
String leader = componentBean.getLead().getDisplayName();
logger.info("Get countMap" + new Gson().toJson(countMapping));
jiraInfoVO.setHistory(ticketTypeVOS);
jiraInfoVO.setComponent(suggestComponent);
jiraInfoVO.setProject(suggestProject);
jiraInfoVO.setRelatedIssue(relatedIssues);
jiraInfoVO.setTeamOwner(leader);
return jiraInfoVO;
}
- Analysis the code
We create a threadingpool at first
ExecutorService cachedThreadPool = Executors.newCachedThreadPool();
and then also create future objects as well
List
Future<IssueSearchBean> future = cachedThreadPool.submit(jiraGetTask);
blockingQueue.add(future);
after insert all tasks we need to do.
cachedThreadPool.shutdown();
This commend will wait until all the task is done.
The result value can be get from the commend:
blockingQueue.get(i).get()