新建xpclr
原有环境下conda install xpclr -c bioconda失败
创建新环境
使用ubuntu:(conda环境)
conda create -n xpclr_env python=3.8
conda activate xpclr_env
conda install -c conda-forge scikit-allel
#xpclr
conda install -c bioconda xpclr
#vcftools
conda install -c bioconda vcftools
报错1:无写入权限:
(xpclr_env) zky@zky:/mnt/c/Users/Administrator$ xpclr --out xpclr_chr1A --format vcf --input chr1A.vcf --samplesA B1.txt --samplesB B2.txt --map chr1A.map --chr 1A --size 50000 --step 25000
Traceback (most recent call last):
File "/home/zky/anaconda3/envs/xpclr_env/bin/xpclr", line 196, in <module>
main()
File "/home/zky/anaconda3/envs/xpclr_env/bin/xpclr", line 87, in main
assert os.access(outdir, os.W_OK), \
AssertionError: No permission to write in the specified directory:
新建目录写入解决
————————————————————————
py脚本:
需要再linux系统下运行
python xpclr_analysis.py
关键问题在于参数的调整
可以尝试使用LD半衰距离或半衰距离的一半
import os
import random
import subprocess
import sys
# 配置参数
# INPUT_VCF = "bcf_output1.vcf"
INPUT_VCF = "clean.vcf"
# OUTPUT_DIR = "/mnt/e/pyProjects/zc/zc_xpclr/xpclr_analysis"
OUTPUT_DIR = "xpclr_analysis"
SAMPLE_RATIO = 0.5
CHROMOSOMES = ['1A', '1B', '1D', '2A', '2B', '2D', '3A', '3B', '3D', '4A', '4B', '4D',
'5A', '5B', '5D', '6A', '6B', '6D', '7A', '7B', '7D']
WINDOW_SIZE = 50000 # 增大窗口大小
STEP_SIZE = 25000 # 减小步长
MAX_SNPS = 500 # 增加最大 SNP 数
MIN_SNPS = 10 # 调整最小 SNP 数
def run_command(command):
try:
result = subprocess.run(command, check=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
text=True)
print(f"执行命令: {command}")
print(result.stdout)
print(result.stderr)
return result.stdout
except subprocess.CalledProcessError as e:
print(f"错误: 执行命令 '{command}' 时出错")
print(f"错误信息: {e}")
print(f"STDOUT: {e.stdout}")
print(f"STDERR: {e.stderr}")
return None
def check_vcf(input_vcf):
print("检查VCF文件")
# 检查文件头
header = run_command(f"grep '^#' {input_vcf} | tail -n 1")
if not header:
print("错误: VCF文件可能没有正确的头部")
sys.exit(1)
# 检查数据行
data = run_command(f"grep -v '^#' {input_vcf} | head -n 5")
if not data:
print("错误: VCF文件可能没有数据行")
sys.exit(1)
print("VCF文件检查通过")
def split_chromosomes(input_vcf, output_dir, chromosomes):
print("步骤1: 拆分染色体")
for chr in chromosomes:
output_file = os.path.join(output_dir, f"{chr}.vcf")
command = f"grep -E '^#|^{chr}\t' {input_vcf} > {output_file}"
run_command(command)
# 检查输出文件
snp_count = int(run_command(f"grep -v '^#' {output_file} | wc -l").strip())
print(f"染色体 {chr} 包含 {snp_count} 个 SNP")
def prepare_map_files(output_dir, chromosomes):
print("步骤2: 准备地图文件")
for chr in chromosomes:
vcf_file = os.path.join(output_dir, f"{chr}.vcf")
freq_file = os.path.join(output_dir, f"{chr}.freq")
map_file = os.path.join(output_dir, f"{chr}.map")
command = f"grep -v '^#' {vcf_file} | awk '{{split($8,a,\";\"); for(i in a) {{if(a[i] ~ /^AF=/) {{split(a[i],b,\"=\"); print $1\"\t\"$2\"\t\"b[2]}}}}}}' > {freq_file}"
run_command(command)
snp_count = 0
with open(map_file, 'w') as mapfile:
with open(freq_file, 'r') as frqfile:
for line in frqfile:
chrom, pos, af = line.strip().split()
mapfile.write(f"{chrom}\t{pos}\t{float(pos) / 1000000}\n")
snp_count += 1
print(f"染色体 {chr} 的地图文件包含 {snp_count} 个 SNP")
def run_xpclr(sample_a_file, sample_b_file, output_dir, chromosomes, window_size, step_size, max_snps, min_snps):
print("步骤3: 运行XPCLR分析")
for chr in chromosomes:
input_file = os.path.join(output_dir, f"{chr}.vcf")
map_file = os.path.join(output_dir, f"{chr}.map")
output_file = os.path.join(output_dir, f"xpclr_{chr}")
# 检查输入文件
command = (f"xpclr --out {output_file} "
f"--format vcf "
f"--input {input_file} "
f"--samplesA {sample_a_file} "
f"--samplesB {sample_b_file} "
f"--map {map_file} "
f"--chr {chr} "
f"--size {window_size} "
f"--step {step_size} "
f"--maxsnps {max_snps} "
f"--minsnps {min_snps}")
result = run_command(command)
if result is None:
print(f"警告: XPCLR 分析在染色体 {chr} 上失败")
else:
print(f"染色体 {chr} 的 XPCLR 分析完成")
def main():
os.makedirs(OUTPUT_DIR, exist_ok=True)
check_vcf(INPUT_VCF)
split_chromosomes(INPUT_VCF, OUTPUT_DIR, CHROMOSOMES)
prepare_map_files(OUTPUT_DIR, CHROMOSOMES)
run_xpclr('/mnt/e/pyProjects/zc/zc_xpclr/B1.txt', '/mnt/e/pyProjects/zc/zc_xpclr/B2.txt', OUTPUT_DIR, CHROMOSOMES, WINDOW_SIZE, STEP_SIZE, MAX_SNPS, MIN_SNPS)
print("XPCLR 分析完成!")
if __name__ == "__main__":
main()


1255

被折叠的 条评论
为什么被折叠?



