数据可视化9.19号课后作业

先要安装一个numpy库才能运行。方法和安装其他库一样。不做详解。

随机数生成函数

#coding=utf-8 
import numpy as np
arr_rand=np.random.rand(5,3)
print('以下输出5行3列的随机二维数组')
print(arr_rand)

arr_uniform = np.random.uniform(0, 10, size=5)
print('以下输出0到10之间的5个随机数')
print(arr_uniform)

arr_uni_int = np.random.randint(0, 10, size=5)
print('以下输出0到10之间的5个随机整数')
print(arr_uni_int)

arr_normal = np.random.normal(loc=1, scale=0.2, size=[2, 3])
print('以下输出2行3列均值为1,标准差为0.2的随机数组')
print(arr_normal)

numpy创建数组-常用函数

#coding=utf-8 
import numpy as np
arr_zeros = np.zeros([2,4], dtype=np.int)
print('np.zeros()->')
print(arr_zeros)

b=[[1,2,3],[4,5,6]]
arr_zeros_like=np.zeros_like(b)
print('np.zeros_like()->')
print(arr_zeros_like)

arr_full=np.full((2,4),3)
print('np.full()->')
print(arr_full)

arr_eye=np.eye(4)
print('arr_eye->')
print(arr_eye)

arr_diag=np.diag([1,2,3,4])
print('np.diag()->')
print(arr_diag)

arr_lin = np.linspace(1,12,4)
print('np.linspace()->')
print(arr_lin)

arr_one = np.ones([3,3],dtype=np.float)
print('np.ones()->')
print(arr_one)
b=[1,2,3,4]
arr_ones_like=np.ones_like(b)
print('no.ones_like()->')
print(arr_ones_like)

array和asarray

#coding=utf-8 
import numpy as np

data = np.ones((3,))
arr_ar = np.array(data)
arr_as = np.asarray(data)

print(arr_ar)
print(arr_as)
data[1]=2
print(arr_ar)
print(arr_as)

arr_ar[2]=3
print(data)
arr_as[2]=3
print(data)

本文仅限于交流学习使用。
归纳总结
array和asarray作用:将数组转化为ndarray对象。
区别:当参数为一般数组时,两个函数结果相同;当参数本身就是ndarray类型时,array会新建一个ndarray对象,作为参数的副本,但是asarray不会新建,而是与参数共享同一个内存。重点就是这个共享内存。


10.19日更新

pandas-获取增加删除对象

#coding=utf-8 
import pandas as pd
s1=pd.Series([101,102,103,104])
s2=pd.Series(['Alex','Peter','Lisa'])
df=pd.DataFrame({"id":s1,"name":s2})
print(df['name'])
df['birth']=pd.Series(['199801','200111','200004'])#增加新列
print(df)
#coding=utf-8 
import pandas as pd
import numpy as np
df2=pd.DataFrame(np.arange(12).reshape((3,4)),index=['a','b','c',],columns=['one','two','three','four'])
print(df2)
print(df2.loc['a'])
df2.loc['d']=[5,5,5,5]
print(df2)

#coding=utf-8 
import pandas as pd
import numpy as np
df=pd.DataFrame(np.random.rand(4,3),['one','two','three','four'])
print(df)
print(df.loc['one']>=0.5)

pandas-删除对象

#coding=utf-8 
import pandas as pd
import numpy as np
df2=pd.DataFrame(np.arange(12).reshape((3,4)),index=['a','b','c',],columns=['one','two','three','four'])
print(df2)
print(df2.drop('a'))
print(df2.drop('one',axis=1))
print(df2)
#coding=utf-8 
import pandas as pd
import numpy as np
df2=pd.DataFrame(np.arange(12).reshape((3,4)),index=['a','b','c',],columns=['one','two','three','four'])
print(df2)
df2.drop('b',inplace=True)
print(df2)

pandas-重置索引

#coding=utf-8 
import pandas as pd
import numpy as np
df=pd.DataFrame(np.arange(12).reshape((3,4)),index=['a','b','c',],columns=['one','two','three','four'])
print(df)
df1=df.reset_index()
print(df1)
print(df)
df.reset_index(inplace=True)
print(df)

#coding=utf-8 
import pandas as pd
import numpy as np
df=pd.DataFrame(np.arange(12).reshape((3,4)),index=['a','b','c',],columns=['one','two','three','four'])
print(df)
df['newindex']=['X','Y','Z']
print(df.set_index('newindex'))

pandas-fillna和dropna

#coding=utf-8 
import pandas as pd
import numpy as np
d1=pd.Series([101,102,103,104])
d2=pd.Series(['Alex','Perter','Lias'])
d3=pd.Series([2000,3000])
d4=pd.Series([])
df=pd.DataFrame({'id':d1,'name':d2,'salary':d3,'other':d4})
print(df)
print(df.dropna())
print(df.dropna(axis=1))
print(df.dropna(axis=1,how='all'))
#coding=utf-8 
import pandas as pd
import numpy as np
d1=pd.Series([101,102,103,104])
d2=pd.Series(['Alex','Perter','Lias'])
d3=pd.Series([2000,3000])
d4=pd.Series([])
df=pd.DataFrame({'id':d1,'name':d2,'salary':d3,'other':d4})
print(df.fillna('missing'))
print(df.fillna(df.mean()))
print(df.fillna(method='ffill',limit=1))
print(df.fillna(method='pad'))
df['salary']=df['salary'].ffill()
print(df)
print(df.isnull())
#coding=utf-8 
import pandas as pd
import numpy as np
d1=pd.Series([101,102,103,104])
d2=pd.Series(['Alex','Perter','Lias'])
d3=pd.Series([2000,3000])
d4=pd.Series([])
df=pd.DataFrame({'id':d1,'name':d2,'salary':d3,'other':d4})
print(df)
df1=df.fillna({'salary':5000,'other':'0'})
print(df1)
df2=df2['salary'].fillna(df2['salary'].mean(),inplace=True)
print(df2)


pandas-运算与分组统计

#coding=utf-8 
import pandas as pd
import numpy as np
s1=pd.Series(np.array([1,2,3,4]))
s2=pd.Series(np.array([5,6,7,8]))
df=pd.DataFrame({"a":s1,"b":s2})
print(df)
print(df.a.mean())
print(df.a.sum())
print(df.a.var())
print(df.a.std())
print(df.a.max())


#coding=utf-8
import pandas as pd
import numpy as np
s1=pd.Series(np.array(['流浪星球','新喜剧之王','绿皮书','流浪星球','流浪星球','新喜剧之王']))
s2=pd.Series(np.array([120000,86000,92000,143000,98000,68000]))
s3=pd.Series(pd.to_datetime(['20181203','20181203','20181205','20181206','20180207','20181207']))
df=pd.DataFrame({"电影名":s1,"票房":s2,'日期':s3});
print(df)
group=df.groupby(by=['电影名'])
print(group.max())
print(df.groupby(['电影名']).sum())
print(df.groupby(['电影名','日期']).median())
print(df.sort_values(by='票房',ascending=False))
#coding=utf-8 
import pandas as pd
import numpy as np
s1=pd.Series(np.array(['流浪星球','新喜剧之王','绿皮书','流浪星球','流浪星球','新喜剧之王']))
s2=pd.Series(np.array([120000,86000,92000,143000,98000,68000]))
s3=pd.Series(pd.to_datetime(['20181203','20181203','20181205','20181206','20180207','20181207']))
df=pd.DataFrame({"电影名":s1,"票房":s2,'日期':s3});
print(df)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值