python多核运行程序怎么关闭_在多核上运行程序

我正在用Python运行一个程序,使用线程来并行执行任务。这个任务是简单的字符串匹配,我要将大量的短字符串匹配到数据库中的长字符串。当我试图将它并行化时,我决定将短字符串列表拆分为若干个子列表,这些子列表与核心数量相等,并分别在不同的核心上运行它们。但是,当我在5个或10个内核上运行任务时,它比仅在一个内核上运行要慢两倍。原因是什么?我怎么可能解决它?

编辑:我的代码可以在下面看到import sys

import os

import csv

import re

import threading

from Queue import Queue

from time import sleep

from threading import Lock

q_in = Queue()

q_out = Queue()

lock = Lock()

def ceil(nu):

if int(nu) == nu:

return int(nu)

else:

return int(nu) + 1

def opencsv(csvv):

with open(csvv) as csvfile:

peptides = []

reader = csv.DictReader(csvfile)

k = 0

lon = ""

for row in reader:

pept = str(row["Peptide"])

pept = re.sub("\((\+\d+\.\d+)\)", "", pept)

peptides.append(pept)

return peptides

def openfasta(fast):

with open(fast, "r") as fastafile:

dic = {}

for line in fastafile:

l = line.strip()

if l[0] == ">":

cur = l

dic[l] = ""

else:

dic[cur] = dic[cur] + l

return dic

def match(text, pattern):

text = list(text.upper())

pattern = list(pattern.upper())

ans = []

cur = 0

mis = 0

i = 0

while True:

if i == len(text):

break

if text[i] != pattern[cur]:

mis += 1

if mis > 1:

mis = 0

cur = 0

continue

cur = cur + 1

i = i + 1

if cur == len(pattern):

ans.append(i - len(pattern))

cur = 0

mis = 0

continue

return ans

def job(pepts, outfile, genes):

c = 0

it = 0

towrite = []

for i in pepts:

# if it % 1000 == 0:

# with lock:

# print float(it) / float(len(pepts))

it = it + 1

found = 0

for j in genes:

m = match(genes[j], i)

if len(m) > 0:

found = 1

remb = m[0]

wh = j

c = c + len(m)

if c > 1:

found = 0

c = 0

break

if found == 1:

towrite.append("\t".join([i, str(remb), str(wh)]) + "\n")

return towrite

def worker(outfile, genes):

s = q_in.qsize()

while True:

item = q_in.get()

print "\r{0:.2f}%".format(1 - float(q_in.qsize()) / float(s))

if item is None:

break #kill thread

pepts = item

q_out.put(job(pepts, outfile, genes))

q_in.task_done()

def main(args):

num_worker_threads = int(args[4])

pept = opencsv(args[1])

l = len(pept)

howman = num_worker_threads

ll = ceil(float(l) / float(howman * 100))

remain = pept

pepties = []

while len(remain) > 0:

pepties.append(remain[0:ll])

remain = remain[ll:]

for i in pepties:

print len(i)

print l

print "Csv file loaded..."

genes = openfasta(args[2])

out = args[3]

print "Fasta file loaded..."

threads = []

with open(out, "w") as outfile:

for pepts in pepties:

q_in.put(pepts)

for i in range(num_worker_threads):

t = threading.Thread(target=worker, args=(outfile, genes, ))

# t.daemon = True

t.start()

threads.append(t)

q_in.join() # run workers

# stop workers

for _ in range(num_worker_threads):

q_in.put(None)

for t in threads:

t.join()

# print(t)

return 0

if __name__ == "__main__":

sys.exit(main(sys.argv))

在长序列中,长序列中的基因与短序列匹配是很重要的。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值