python pandas 应用举例

Python Pandas 函数应用实例

最新推荐文章于 2025-10-22 22:03:49 发布

kittykittyis

最新推荐文章于 2025-10-22 22:03:49 发布

阅读量1.4k

点赞数 33

CC 4.0 BY-SA版权

文章标签： python

版权声明：本文为博主原创文章，遵循 CC 4.0 BY-SA 版权协议，转载请附上原文出处链接和本声明。

本文链接：https://blog.csdn.net/u011650255/article/details/145465394

##loc,str.replace.sort_values,concat方法举例##

import pandas as pd

import numpy as np

import os

import re

# 清屏命令（仅适用于 Windows）

os.system('cls')

# 读取 Excel 文件数据

data_path = r'C:\Users\Administrator\Desktop\fifth1.xlsx'

df = pd.read_excel(data_path, sheet_name='yuanshishuju')

# 使用正则表达式替换 'phrases' 列中的特定模式

pattern1 = r'ee|ea|oo|ll|th|sh|mm|nn|ar|er|an|ar|ur|ow|ou'

df.loc[df['ID']<=564, 'phrases'] =df.loc[df['ID']<=564, 'phrases'].str.replace(pattern1, '**', regex=True)

pattern2=r'the|our|ere'

df.loc[df['ID']<=564, 'phrases']= df.loc[df['ID']<=564, 'phrases'].str.replace(pattern2, '***', regex=True)

pattern = r'a|e|o|i|u|y'

df.loc[df['ID']<=564, 'phrases'] =df.loc[df['ID']<=564, 'phrases'].str.replace(pattern, '*', regex=True)

## 重置索引并丢弃旧索引##

df = df.reset_index(drop=True)

#随机取出前30个数据

rnd=len(df)

df['randnum']=pd.DataFrame(np.random.rand(rnd, 1))

dfsorted=df.sort_values(by='randnum',ascending=False)

dftop30=dfsorted['phrases'][1:31]

dftop3160=dfsorted['phrases'][31:61]

dftop30.reset_index(inplace=True,drop=True)

dftop3160.reset_index(inplace=True,drop=True)

# 创建一个临时的DataFrame存放序号列

temp_df = pd.DataFrame({'序号': range(1, 31)})

# 合并两个DataFrame，以索引为依据进行合并

dftop30 = pd.concat([temp_df,dftop30], axis=1)

dftop3160=pd.concat([temp_df,dftop3160],axis=1)

# 保存结果到新的Excel文件

dftop30.to_excel('c:/1.xlsx', index=False)

dftop3160.to_excel('c:/2.xlsx', index=False)

##apply,datetime,timedelta,relativedelta,自定义函数的应用举例##

import pandas as pd

import numpy as np

import os

import platform

import re

import time

from datetime import datetime

from dateutil.relativedelta import relativedelta

data='c:/202410.xlsx'

df=pd.read_excel(data,sheet_name='Sheet1')

start_time = time.time()

def xmlx(x):

if '佐' in x:

return '张**'

else:

return '***'

df['姓名类型']=df['姓名'].apply(xmlx)

df

df['姓名类型1']=df['姓名'].apply(lambda x: '张**' if '佐' in x else ('李**' if '刘' in x else '***'))

df

def xmtj(x,y,z):

return x.groupby(y)[z].value_counts().sort_values(ascending=False)

xmtjs=xmtj(df,'姓名','姓名')

xmtjs

def sfz(x):

if '411222' in x:

return '陕州区'

elif '411202' in x:

return '湖滨区'

elif '411282' in x:

return '灵宝市'

else:

return '其他'

df['身份证户籍地0']=df['处理后身份证'].apply(sfz)

df

import numpy as np

# 定义条件列表和对应的选择值列表

conditions = [

df['处理后身份证'].str.startswith('411222'),

df['处理后身份证'].str.startswith('411202'),

df['处理后身份证'].str.startswith('411282')

]

choices = ['陕州区', '湖滨区', '灵宝市']

# 使用 np.select 根据条件选择值，默认值为 '其他'

df['身份证户籍地1'] = np.select(conditions, choices, default='其他')

df

import numpy as np

ct=[df['处理后身份证'].str.startswith('411222'),

df['处理后身份证'].str.startswith('411202'),

df['处理后身份证'].str.startswith('411282')]

cc=['陕州区**','湖滨区','灵宝市']

df['身份证户籍地2']=np.select(ct,cc,default='其他')

end_time=time.time()

etime=end_time-start_time

df

print(f'用时\n{etime}')

def split_id_card(id_card):

# 确保身份证号是字符串类型并且长度为18位

id_card = str(id_card)

if len(id_card) != 18:

return ('错误', '错误', '错误') # 或者根据需求返回其他默认值或抛出异常

# 拆分身份证号为三段，每段6个字符

part1 = id_card[:6]

part2 = id_card[6:12]

part3 = id_card[12:]

return part1, part2, part3

# 使用 apply 方法将函数应用于每一行

df[['身份证号_前6位', '身份证号_中6位', '身份证号_后6位']] = df['处理后身份证'].apply(lambda x: pd.Series(split_id_card(x)))

df

def xmcf(x):

x=str(x)

xing=x[:1]

ming=x[1:]

最低0.47元/天解锁文章

评论

被折叠的条评论为什么被折叠?

到【灌水乐园】发言

查看更多评论

添加红包

成就一亿技术人!

hope_wisdom

发出的红包

实付元

使用余额支付

点击重新获取

扫码支付

钱包余额 0

抵扣说明：

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。