pandas数据类型之series

Pandas 简介:

python数据分析library
是基于Numpy构建的一个library
有一种用python做Excel/SQL/R的感觉

现在流行的机器学习框架 Tensorflow/PyTorch 等等,语法都与Numpy比较接近

# encoding=utf-8

import numpy as np
import pandas as pd


def main():
    """
    数据结构Series
    Series 是一个一维的数据结构类
    """
    # 示例,可以接收一个list or dictionary
    s = pd.Series(["chenkai", "man", 178.00, 65, -1])
    print(s, "\n", type(s))
    # 1.pandas会默认用0-n来作为Series的index,但是我们也可以自己指定index
    """
    0    chenkai
    1        man
    2        178
    3         65
    4         -1
    dtype: object
    <class 'pandas.core.series.Series'>
    """
    # 说来就来
    s = pd.Series(["chenkai", "man", 178.00, 65, -1],
                  index=["one", "two", "three", "four", "five"])  # index个数和列表长度需要保持一致,不然会报错
    print(s, "\n", type(s))
    """
    one      chenkai
    two          man
    three        178
    four          65
    five          -1
    dtype: object
    <class 'pandas.core.series.Series'>
    """

    # 2.下面我们使用dictionary来构建一个Series,因为Series本来就是 key value pairs
    cities = {"beijing": 50000, "shanghai": 40000, "shenzhen": 40000, "hangzhou": 30000, "wuhan": 20000, "suzhou": None}
    apts = pd.Series(cities)
    print(apts, "\n", type(apts))
    """
    beijing     50000.0
    hangzhou    30000.0
    shanghai    40000.0
    shenzhen    40000.0
    suzhou          NaN
    wuhan       20000.0
    dtype: float64 
    <class 'pandas.core.series.Series'>
    """
    # 3.获取数据,拿一个和拿多个有区别
    print(apts["wuhan"], "\n", type(apts["wuhan"]))  # 20000.0  <class 'numpy.float64'>
    # 取多个值
    many_apts = apts[["beijing", "shanghai", "wuhan"]]
    print(many_apts, "\n", type(many_apts))
    """
    beijing     50000.0
    shanghai    40000.0
    wuhan       20000.0
    dtype: float64 
    <class 'pandas.core.series.Series'>
    """
    # 切片取值,按照默认的index取值的规则是顾前不顾尾
    print("="*50, apts[1:3])
    """
    hangzhou    30000.0
    shanghai    40000.0
    dtype: float64
    """
    # TODO series的行级排序是有序的吗?如果不是有序的, 那么每次切片的结果就不一致了
    print(apts["hangzhou": "wuhan"])  # 按照指定的index取值包含最后一个
    """
    hangzhou    30000.0
    shanghai    40000.0
    shenzhen    40000.0
    suzhou          NaN
    wuhan       20000.0
    dtype: float64
    """

    # 4.numpy中有一个boolean indexing,在pandas中同样适用
    less_tha_30000 = apts < 30000
    print(less_tha_30000, "\n", type(less_tha_30000))
    """
    beijing     False
    hangzhou    False
    shanghai    False
    shenzhen    False
    suzhou      False
    wuhan        True
    dtype: bool 
    <class 'pandas.core.series.Series'>
    """
    print(apts[less_tha_30000], "\n", type(apts[less_tha_30000]))
    # 以上两步可以写成一步:
    print(apts[apts < 30000], "\n", type(apts[apts < 30000]))
    """
    wuhan    20000.0
    dtype: float64 
    <class 'pandas.core.series.Series'>
    """

    # 5.Series元素赋值
    apts["wuhan"] = 25000
    print(apts["wuhan"], "\n", type(apts["wuhan"]))
    """
    25000.0 
    <class 'numpy.float64'>
    """
    # 先用boolean indexing 过滤,然后对得到的数据进行统一赋值
    apts[apts < 30000] = 15000
    print(apts, "\n", type(apts))
    """
    beijing     50000.0
    hangzhou    30000.0
    shanghai    40000.0
    shenzhen    40000.0
    suzhou          NaN
    wuhan       15000.0
    dtype: float64
    <class 'pandas.core.series.Series'>
    """

    # 6.Series数学运算
    # 支持加减乘除 and **(平方:apts ** 2)
    apts = apts / 2
    print(apts, "\n", type(apts))
    """
    beijing     25000.0
    hangzhou    15000.0
    shanghai    20000.0
    shenzhen    20000.0
    suzhou          NaN
    wuhan        7500.0
    dtype: float64 
    <class 'pandas.core.series.Series'>
    """
    # 我们再生成一个Series用来做加法
    cities = {"beijing": 200000, "shanghai": 200000, "shenzhen": 200000, "wuhan": 100000, "tianjin": 150000}
    cars = pd.Series(cities)
    print(apts + cars, "\n", type(apts + cars))
    # 两边只有index相同的部分才相加,不同的为None
    """
    beijing     225000.0
    hangzhou         NaN
    shanghai    220000.0
    shenzhen    220000.0
    suzhou           NaN
    tianjin          NaN
    wuhan       107500.0
    dtype: float64 
    <class 'pandas.core.series.Series'>
    """

    # 7.Series数据缺失
    # 判断是否有相应的index
    print("wuhan" in apts)  # True
    print("wuhan" in cars)  # True
    print("tianjin" in apts)  # False
    # 判断index对应的值是否为空
    print(apts.notnull(), "\n", type(apts))
    """
    beijing      True
    hangzhou     True
    shanghai     True
    shenzhen     True
    suzhou      False
    wuhan        True
    dtype: bool 
    <class 'pandas.core.series.Series'>
    """
    # 还有对应的isnull()函数
    print(apts.isnull())
    # 发挥一下想象力,和 boolean indexing 合并起来使用
    print(apts[apts.isnull() == True])
    """
    suzhou   NaN
    dtype: float64
    """
    # 再来一下
    print(apts[apts.notnull() == True])
    """
    beijing     25000.0
    hangzhou    15000.0
    shanghai    20000.0
    shenzhen    20000.0
    wuhan        7500.0
    dtype: float64
    """


if __name__ == '__main__':
    main()
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值