#!/usr/bin/python
# -*- coding: utf-8 -*-
# @Date : 2017-03-06 22:46:45
# @Author : XYZ (superxyz@vip.qq.com)
# @Link : www.bubblefertilizer.com
# @Version : 1.0
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna
from Bio.SeqRecord import SeqRecord
def add_barcode(path,barcode_seq): #定义一个加标签基因的函数
reads=SeqIO.parse(path,'fastq')
barcode=Seq(barcode_seq,generic_dna)
records_with_barcode=[]
for record in reads: #fastq用的文件格式
seq_with_barcode=barcode+record.seq #序列
record_with_barcode=SeqRecord(seq_with_barcode,id=record.id,description=record.description) #id,注释
record_with_barcode.letter_annotations['phred_quality']=[39,39,39,39,39,39]+record.letter_annotations["phred_quality"] #碱基质量
records_with_barcode.append(record_with_barcode)
return records_with_barcode
file_and_barcode=[] #填写要合并并添加标签的fastq文件路径,以及标签序列
file_and_barcode.append(('D:/Shared_Folder/P101SC16122213-01-B1-3/result/00.RawData/CK1/CK1.extendedFrags.fastq','ACTTGA'))
file_and_barcode.append(('D:/Shared_Folder/P101SC16122213-01-B1-3/result/00.RawData/CK2/CK2.extendedFrags.fastq','GGCTAC'))
file_and_barcode.append(('D:/Shared_Folder/P101SC16122213-01-B1-3/result/00.RawData/CK3/CK3.extendedFrags.fastq','GATCAG'))
file_and_barcode.append(('D:/Shared_Folder/P101SC16122213-01-B1-3/result/00.RawData/OM1/OM1.extendedFrags.fastq','CAGATC'))
file_and_barcode.append(('D:/Shared_Folder/P101SC16122213-01-B1-3/result/00.RawData/OM2/OM2.extendedFrags.fastq','GCCAAT'))
file_and_barcode.append(('D:/Shared_Folder/P101SC16122213-01-B1-3/result/00.RawData/OM3/OM3.extendedFrags.fastq','TAGCTT'))
allrecords=[] #合并文件
for content in file_and_barcode:
print('file_path:',content[0],'barcode_sequnce:',content[1])
allrecords=allrecords+(add_barcode(content[0],content[1])) #这里不能用append
count=SeqIO.write(allrecords,"allrecords_with_barcode.fastq","fastq")
print("Saved %i reads" % count)
#参考网址:http://biopython.org/DIST/docs/api/Bio.SeqIO-module.html