RNA代码

feature_selection目录下的代码

在终端输入

python -u ffs_final.py "./data/Final_sample_dataset_v1.csv" "./data/Pairwise_featcorr_0.8.pkl" 3 5 "all" "./output"

显示报错
 

因为数据太大,修改代码

修改ffs_final为如下代码进行运行

import sys
import csv
import pickle
import pandas as pd
from itertools import combinations
from sklearn import linear_model
from scipy.stats import pearsonr
from multiprocessing import Pool
import itertools

data = sys.argv[1]
two_combos = sys.argv[2]
nomit = sys.argv[3]
n_feat = sys.argv[4]
pass_feat = sys.argv[5]
outpath = sys.argv[6]

n_feat = int(n_feat)
nomit = int(nomit)

df = pd.read_csv(data, sep='\t', header=0)

feats_final = []
if pass_feat == "all":
    feats_final = list(df.columns)[nomit:-1]
else:
    with open(pass_feat) as feats:
        for feat in feats.readlines():
            feat = feat.strip()
            feats_final.append(feat)

print("Total no. of features = " + str(len(feats_final)))

with open(two_combos, 'rb') as f:
    pair_corrs = pickle.load(f)

pair_corrs = set(pair_corrs)

def regression_model(data, combination, y):
    X = data[list(combination)]
    model = linear_model.LinearRegression()
    model.fit(X, y)
    predicted = model.predict(X)
    pearson_corr, _ = pearsonr(y, predicted)
    return combination, pearson_corr, model.coef_

def find_pass_combos(combination, pair_corrs, data, y):
    feat_pairs = set(list(combinations(combination, 2)))
    feat_pass = [True for pair in feat_pairs if pair in pair_corrs]
    if len(feat_pass) == len(feat_pairs):
        return regression_model(data, combination, y)

y = df["pKd"]

# Define generator function to yield batches of combinations
def batch_combinations(features, batch_size):
    for i in range(0, len(features), batch_size):
        yield features[i:i+batch_size]

# Process combinations in batches
batch_size = 1000  # Adjust this according to your memory capacity
for n in range(n_feat, n_feat+1):
    feat_combinations = set(combinations(feats_final, n))
    print("No. of possible {} feature combinations: {}".format(n, len(feat_combinations)))

    # Process combinations in batches
    for feat_batch in batch_combinations(list(feat_combinations), batch_size):
        results = []
        for combination in feat_batch:
            result = find_pass_combos(combination, pair_corrs, df, y)
            if result:
                results.append(result)

        with open(outpath + "Aptamers_best_" + str(n) + "_feature_combos.log", 'a') as f:
            for feat_combo, corr, coef in results:
                f.write('\t'.join(feat_combo) + "\t" + str(corr) + "\t" + str(coef) + "\n")

        print("Processed {} combinations".format(len(feat_batch)))

    print(str(n) + " feature combinations done.")
 

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值