python数据分析之pandas函数库

1:读取+抽取

import pandas
data_info=pandas.read_csv("1.csv")
col_name=data_info.columns.tolist()
gram_columns=[]
for c in col_name:
    if c.endswith("ame"):
        gram_columns.append(c)        
gram_df=data_info[gram_columns]

2:抽取单列的缺失值+计算单列平均值

import pandas as pd
age=tiT["Age"]
age_is_null=pd.isnull(age)
age_null_true=age[age_is_null]
ahe_null_count=len(age_null_true)
correct_mean_age = tiT["Age"].mean()

3:二维对应+计算平均值list

passenger_class=[1,2,3]
fares_by_class={}
#这里用了一下dict
for this_class in passenger_class:
    pclass_rows=tiT[tiT["Pclass"]==this_class]
    pclass_fares=pclss_rows["Fares"]
    fare_for_class=pclass_fares.mean()
    fares_for_class[this_class}=fare_for_class

4:pivot.table(没写aggfunc也会自动计算均值*函数重载)

p_s=tiT.pivot.table(index="Pclass",value="Survived",aggfunc=np.mean)
#index:统计的对象
#value:统计对象的特征
#aggfunc:统计对象特征的平均值

5:dropna:删除缺失值对应行列(subset为检索对象)

#specifying axis=1 or axis='columns' will drop any columns that have null valuesdrop_na_columns = titanic_survival. dropna(axis=1)
new_titanic_survival = titanic_survival. dropna (axis=0, subset=["Age" "sex”])
#print new_titanic_survival

6:定位loc

row_index_1000_pclass = titanic_survival.loc[766,"Pclass"]
print (row_index_83_age)

7:特定列排序+reset index

new_titanic_survival = titanic_survival.sort_values("Age" , ascending=False)
print (new_titanic_survival[0:10])
titanic_reindexed = new_titanic_survival.reset_index(drop=True)
print('-------------')
print(titanic_reindexed.loc[0:10])

8:自定义函数(基本变换)+column/row格式(需要apply)

def not_null_count(column) :
    column_null = pd. isnull(column)
    null = column[column_null]
    return len(null)
column_null_count = titanic_survival.apply(not_null_count)
print (column_nu11_count)
def is_minor(row):
    if row["Age"]< 18:
        return True
    else:
        return False

minors =titanic_survival.apply(is_minor,axis=1)
#print minors

def generate_age_labe1(row):
    age = row["Age"]I
    if pd.isnul1(age) :
        return "unknown"
    elif age< 18:
        return "minor"
    else:
        return "adult"
age_labe1s = titanic_surviva1.apply(generate_age_label, axis=1)
print (age_labels)

titanic_survival['age_labels']=age_labels
age_group_survival=titanic_survival.pivot_table(index="age_labels",values="Survived")
#在原DF后增加新的列,并pivot制表

9:series处理

import pandas
from pandas import Series
data_info=pandas.read_csv("1.csv")
series_File=data_info['FileType']
series_ID=data_info['GlobalID']

File_type=series_File.values
File_ID=series_ID.values
series_custom=Series(File_type,index=File_ID)
temp=series_custom[5:10]
original_index=series_custom.index.tolist()
sorted_index=sorted(original_index)
# sorted_by_index=series_custom.reindex(sorted_index)

sc2=series_custom.sort_index()
sc3=series_custom.sort_values()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值