python函数传参的一个问题

风语者666

已于 2022-08-18 14:55:10 修改

阅读量653

点赞数

分类专栏： python 生信文章标签： python pandas 开发语言

于 2022-08-18 14:51:38 首次发布

本文链接：https://blog.csdn.net/u014210048/article/details/126405198

版权

python 同时被 2 个专栏收录

32 篇文章 2 订阅

订阅专栏

生信

31 篇文章 3 订阅

订阅专栏

import sys,os,re
import numpy as np
import pandas as pd
import copy
from collections import Counter
import datetime

#用数据第一行的内容来补充和修改data.frame的columns
# df.iloc[0]这个已经是DataFrame的第一行，excel表格的第二行
def conver_data_frame(data_frame):
    # print('=============Test_start=========\n{}=============Test_end========='.format(data_frame.head(3)))
    df = data_frame
    head = list(df.columns)
    for num, value in enumerate(list(df.iloc[0])): #用数据第一行的内容来补充和修改data.frame的columns
        if pd.isna(value):
            pass
        else:
            head[num] = value
    df.columns = head
    data_frame = df.iloc[1:]
    return(data_frame)

class Vividict(dict):  # 为了字典能够无限嵌套，https://www.cnblogs.com/flightless/p/12734807.html
    def __missing__(self, key):
        value = self[key] = type(self)()
        return value

def match_str(Lst, *string1):
    '''
    寻找能匹配某个/某几个字符串的行
    '''
    Lst_new = []
    for content in Lst:
        if pd.isnull(content):
            Lst_new.append(False)
            continue
        content2 = str(content)
        flag = 1
        for string_tmp in string1: #必须同时匹配 *string1中的所有字符，则flag=1
            if re.search(string_tmp, content):
                pass
            else:
                flag = 0
        if flag:
            Lst_new.append(True)
        else:
            Lst_new.append(False)
    return Lst_new

def merge(*LSTs):
    '''
    将匹配行的信息合并：
    比如
    [True, False,True, False]
    [True, False,False, True]
    
    将被合并成
    [True, False, True, True]
    
    '''
    Lst_new = [False for i in LSTs[0]]
    for Lst_tmp in LSTs:
        for idx,value in enumerate(Lst_tmp):
            if value:
                Lst_new[idx] = True
    return Lst_new

in_file = 'E:\省中医\临时存放\送检统计-报告审核用.xlsx'
df = pd.read_excel(in_file,'肿瘤产品线')

df2 = conver_data_frame(df) #用第二行的信息来补全表头

os.chdir('E:/XXX/01.实验室/沟通文件/KRAS-NTRK')

测试一下：

#分别找各列里面包含 KRAS和G12C的
Lst_KRAS_1 = match_str(df2['结果'],'KRAS','G12C')
Lst_KRAS_2 = match_str(df2['突变基因'],'KRAS','G12C')
Lst_KRAS_3 = match_str(df2['靶药相关基因突变'],'KRAS','G12C')
Lst_KRAS_4 = match_str(df2['报告概要解读'],'KRAS','G12C')

#将这些合并起来
Lst_KRAS = merge(Lst_KRAS_1,Lst_KRAS_2,Lst_KRAS_3,Lst_KRAS_4)

#然后得出所有包含KRAS G12C的患者
df_KRAS = df2[Lst_KRAS]
df_KRAS.to_excel('KRAS_G12C.xlsx',index=False)

将上面那个过程写成函数，并输出到指定文件：

def find_samples(df2,out_file, *string1):
    #分别找各列里面包含 KRAS和G12C的
    Lst_KRAS_1 = match_str(df2['结果'],*string1) #这里的*很重要，必须加上
    Lst_KRAS_2 = match_str(df2['突变基因'],*string1) #这里的*很重要，必须加上
    Lst_KRAS_3 = match_str(df2['靶药相关基因突变'],*string1) #这里的*很重要，必须加上
    Lst_KRAS_4 = match_str(df2['报告概要解读'],*string1) #这里的*很重要，必须加上
    
    #将这些合并起来
    Lst_KRAS = merge(Lst_KRAS_1,Lst_KRAS_2,Lst_KRAS_3,Lst_KRAS_4)
    
    #然后得出所有包含KRAS G12C的患者
    df_KRAS = df2[Lst_KRAS]
    df_KRAS2 = df_KRAS.loc[:,['样本编号','姓名','门诊/住院号','性别','年龄','受检者联系电话','院区','送检科室','送检医生']]
    df_KRAS2.to_excel(out_file,index=False)

find_samples(df2,'KRAS_G12C.xlsx','KRAS','G12C') #找出检出了KRAS G12C的人
find_samples(df2,'NTRK.xlsx','NTRK','融合') #找出检出了NTRK融合的人

需要说明的是：

match_str(Lst, *string1) 的用法是 *string1是有多个参数组成的

find_samples(df2,out_file, *string1) 这个函数在调用match_str的时候，不能写：

match_str(df2['结果'],string1)

而必须写：

match_str(df2['结果'],*string1)

相当于： match_str(df2['结果'],'KRAS','G12C')

换句话说，这里如果只写了string1，会被当做一个对象。会报错：

TypeError                                 Traceback (most recent call last)
<ipython-input-68-c8d98d3b7e36> in <module>
----> 1 find_samples(df2,'KRAS_G12C.xlsx','KRAS','G12C') #找出检出了KRAS G12C的人
      2 find_samples(df2,'NTRK.xlsx','NTRK','融合') #找出检出了NTRK融合的人

<ipython-input-67-9f872804e95e> in find_samples(df2, out_file, *string1)
      1 def find_samples(df2,out_file, *string1):
      2     #分别找各列里面包含 KRAS和G12C的
----> 3     Lst_KRAS_1 = match_str(df2['结果'],string1) #这里的*很重要，必须加上
      4     Lst_KRAS_2 = match_str(df2['突变基因'],string1) #这里的*很重要，必须加上
      5     Lst_KRAS_3 = match_str(df2['靶药相关基因突变'],string1) #这里的*很重要，必须加上

<ipython-input-18-69c566be8be2> in match_str(Lst, *string1)
      8         flag = 1
      9         for string_tmp in string1:
---> 10             if re.search(string_tmp, content):
     11                 pass
     12             else:

~\Anaconda3\lib\re.py in search(pattern, string, flags)
    181     """Scan through string looking for a match to the pattern, returning
    182     a Match object, or None if no match was found."""
--> 183     return _compile(pattern, flags).search(string)
    184 
    185 def sub(pattern, repl, string, count=0, flags=0):

~\Anaconda3\lib\re.py in _compile(pattern, flags)
    283         return pattern
    284     if not sre_compile.isstring(pattern):
--> 285         raise TypeError("first argument must be string or compiled pattern")
    286     p = sre_compile.compile(pattern, flags)
    287     if not (flags & DEBUG):

TypeError: first argument must be string or compiled pattern