Pandas处理Excel - 复杂多列到多行转换
用户需求图片
分析:
- 一行变多行,可以用explode实现;
- 要使用explode,需要先将多列变成一列;
- 注意有的列为空,需要做空值过滤;
1. 读取数据
import pandas as pd
file_path = "./course_datas/c39_explode_to_manyrows/读者提供的数据-输入.xlsx"
df = pd.read_excel(file_path)
df
| P/N | Description | Supplier | Supplier PN | Supplier.1 | Supplier PN.1 | Supplier.2 | Supplier PN.2 |
---|
0 | 302-462-326 | CAP CER 0402 100pF 5% 50V | MURATA | GRM1555C1H101JA01D | YAGEO | CC0402JRNPO9BN101 | GRM1555C1H101JA01J | Murata Electronics North America |
---|
1 | 302-462-012 | CAP CER 0402 6.8pF 0.25pF 50V | AVX Corporation | 04025A6R8CAT2A | KEMET | C0402C689C5GACTU | NaN | NaN |
---|
2 | 302-462-009 | CAP CER 0402 3.9pF 0.25pF 50V | AVX Corporation | 04025A3R9CAT2A | NaN | NaN | NaN | NaN |
---|
2. 把多列合并到一列
merge_names = list(df.loc[:, "Supplier":].columns.values)
merge_names
['Supplier',
'Supplier PN',
'Supplier.1',
'Supplier PN.1',
'Supplier.2',
'Supplier PN.2']
def merge_cols(x):
"""
x是一个行Series,把它们按分隔符合并
"""
x = x[x.notna()]
y = x.values
result = []
for idx in range(0, len(y), 2):
result.append(f"{y[idx]}|{y[idx+1]}")
return "#".join(result)
df["merge"] = df.loc[:, "Supplier":].apply(merge_cols, axis=1)
df
| P/N | Description | Supplier | Supplier PN | Supplier.1 | Supplier PN.1 | Supplier.2 | Supplier PN.2 | merge |
---|
0 | 302-462-326 | CAP CER 0402 100pF 5% 50V | MURATA | GRM1555C1H101JA01D | YAGEO | CC0402JRNPO9BN101 | GRM1555C1H101JA01J | Murata Electronics North America | MURATA|GRM1555C1H101JA01D#YAGEO|CC0402JRNPO9BN... |
---|
1 | 302-462-012 | CAP CER 0402 6.8pF 0.25pF 50V | AVX Corporation | 04025A6R8CAT2A | KEMET | C0402C689C5GACTU | NaN | NaN | AVX Corporation|04025A6R8CAT2A#KEMET|C0402C689... |
---|
2 | 302-462-009 | CAP CER 0402 3.9pF 0.25pF 50V | AVX Corporation | 04025A3R9CAT2A | NaN | NaN | NaN | NaN | AVX Corporation|04025A3R9CAT2A |
---|
df.drop(merge_names, axis=1, inplace=True)
df
| P/N | Description | merge |
---|
0 | 302-462-326 | CAP CER 0402 100pF 5% 50V | MURATA|GRM1555C1H101JA01D#YAGEO|CC0402JRNPO9BN... |
---|
1 | 302-462-012 | CAP CER 0402 6.8pF 0.25pF 50V | AVX Corporation|04025A6R8CAT2A#KEMET|C0402C689... |
---|
2 | 302-462-009 | CAP CER 0402 3.9pF 0.25pF 50V | AVX Corporation|04025A3R9CAT2A |
---|
3. 使用explode把一列变多行
df["merge"] = df["merge"].str.split("#")
df
| P/N | Description | merge |
---|
0 | 302-462-326 | CAP CER 0402 100pF 5% 50V | [MURATA|GRM1555C1H101JA01D, YAGEO|CC0402JRNPO9... |
---|
1 | 302-462-012 | CAP CER 0402 6.8pF 0.25pF 50V | [AVX Corporation|04025A6R8CAT2A, KEMET|C0402C6... |
---|
2 | 302-462-009 | CAP CER 0402 3.9pF 0.25pF 50V | [AVX Corporation|04025A3R9CAT2A] |
---|
df_explode = df.explode("merge")
df_explode
| P/N | Description | merge |
---|
0 | 302-462-326 | CAP CER 0402 100pF 5% 50V | MURATA|GRM1555C1H101JA01D |
---|
0 | 302-462-326 | CAP CER 0402 100pF 5% 50V | YAGEO|CC0402JRNPO9BN101 |
---|
0 | 302-462-326 | CAP CER 0402 100pF 5% 50V | GRM1555C1H101JA01J|Murata Electronics North Am... |
---|
1 | 302-462-012 | CAP CER 0402 6.8pF 0.25pF 50V | AVX Corporation|04025A6R8CAT2A |
---|
1 | 302-462-012 | CAP CER 0402 6.8pF 0.25pF 50V | KEMET|C0402C689C5GACTU |
---|
2 | 302-462-009 | CAP CER 0402 3.9pF 0.25pF 50V | AVX Corporation|04025A3R9CAT2A |
---|
4. 将一列还原成结果的多列
df_explode["Supplier"]=df_explode["merge"].str.split("|").str[0]
df_explode["Supplier PN"]=df_explode["merge"].str.split("|").str[1]
df_explode
| P/N | Description | merge | Supplier | Supplier PN |
---|
0 | 302-462-326 | CAP CER 0402 100pF 5% 50V | MURATA|GRM1555C1H101JA01D | MURATA | GRM1555C1H101JA01D |
---|
0 | 302-462-326 | CAP CER 0402 100pF 5% 50V | YAGEO|CC0402JRNPO9BN101 | YAGEO | CC0402JRNPO9BN101 |
---|
0 | 302-462-326 | CAP CER 0402 100pF 5% 50V | GRM1555C1H101JA01J|Murata Electronics North Am... | GRM1555C1H101JA01J | Murata Electronics North America |
---|
1 | 302-462-012 | CAP CER 0402 6.8pF 0.25pF 50V | AVX Corporation|04025A6R8CAT2A | AVX Corporation | 04025A6R8CAT2A |
---|
1 | 302-462-012 | CAP CER 0402 6.8pF 0.25pF 50V | KEMET|C0402C689C5GACTU | KEMET | C0402C689C5GACTU |
---|
2 | 302-462-009 | CAP CER 0402 3.9pF 0.25pF 50V | AVX Corporation|04025A3R9CAT2A | AVX Corporation | 04025A3R9CAT2A |
---|
df_explode.drop("merge", axis=1, inplace=True)
df_explode
| P/N | Description | Supplier | Supplier PN |
---|
0 | 302-462-326 | CAP CER 0402 100pF 5% 50V | MURATA | GRM1555C1H101JA01D |
---|
0 | 302-462-326 | CAP CER 0402 100pF 5% 50V | YAGEO | CC0402JRNPO9BN101 |
---|
0 | 302-462-326 | CAP CER 0402 100pF 5% 50V | GRM1555C1H101JA01J | Murata Electronics North America |
---|
1 | 302-462-012 | CAP CER 0402 6.8pF 0.25pF 50V | AVX Corporation | 04025A6R8CAT2A |
---|
1 | 302-462-012 | CAP CER 0402 6.8pF 0.25pF 50V | KEMET | C0402C689C5GACTU |
---|
2 | 302-462-009 | CAP CER 0402 3.9pF 0.25pF 50V | AVX Corporation | 04025A3R9CAT2A |
---|
5. 输出到结果Excel
df_explode.to_excel("./course_datas/c39_explode_to_manyrows/读者提供的数据-输出.xlsx", index=False)