唐宇迪数据分析学习笔记

唐宇迪数据分析学习笔记

第1天 Numpy

import numpy
world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",",dtype='str') #函数genfromtxt是打开txt文件,分隔符是逗号
print(type(world_alcohol))   #ndarray是numpy最核心的结构,不是list,是矩阵
print(world_alcohol)
print(help(numpy.genfromtxt))  #查看函数的参数解释,可以在numpy.genformtxt(里面定义参数)
_____________
<class 'numpy.ndarray'>
[['Year' 'WHO region' 'Country' 'Beverage Types' 'Display Value']
 ['1986' 'Western Pacific' 'Viet Nam' 'Wine' '0']
 ['1986' 'Americas' 'Uruguay' 'Other' '0.5']
 ...
 ['1987' 'Africa' 'Malawi' 'Other' '0.75']
 ['1989' 'Americas' 'Bahamas' 'Wine' '1.5']
 ['1985' 'Africa' 'Malawi' 'Spirits' '0.31']]
#The numpy.array() function can take a list or list of lists as input. When we input a list, we get a one-dimensional array as a result:
vector = numpy.array([5, 10, 15, 20])                          #一维数组:一个中括号
#When we input a list of lists, we get a matrix as a result:  #二维数组:list of list
matrix = numpy.array([[5, 10, 15], [20, 25, 30], [35, 40, 45]])
print (vector)
print (matrix)
______________
[ 5 10 15 20]
[[ 5 10 15]
 [20 25 30]
 [35 40 45]]
#We can use the ndarray.shape property to figure out how many elements are in the array
vector = numpy.array([1, 2, 3, 4])      #np.array里面的元素必须是同样的数据类型
print(vector.shape)
#For matrices, the shape property contains a tuple with 2 elements.
matrix = numpy.array([[5, 10, 15], [20, 25, 30]])
print(matrix.shape)
________________
(4,)
(2, 3)
#Each value in a NumPy array has to have the same data type
#NumPy will automatically figure out an appropriate data type when reading in data or converting lists to arrays. 
#You can check the data type of a NumPy array using the dtype property.
number = numpy.array([1,2,3,4])       #全都是int
print(number)
print(number.dtype)

num = numpy.array([1.0,2,3,4])        #全都是float
print(num)
print(num.dtype)

numbers = numpy.array([1, 2, 3, '4.0'])   #全都是string
print(numbers) 
print(numbers.dtype)
________________________
[1 2 3 4]
int64
[1. 2. 3. 4.]
float64
['1' '2' '3' '4.0']
<U21
#When NumPy can't convert a value to a numeric data type like float or integer, it uses a special nan value that stands for Not a Number
#nan is the missing data
#1.98600000e+03 is actually 1.986 * 10 ^ 3
world_alcohol
_______________
array([[             nan,              nan,              nan,
                     nan,              nan],
       [  1.98600000e+03,              nan,              nan,
                     nan,   0.00000000e+00],
       [  1.98600000e+03,              nan,              nan,
                     nan,   5.00000000e-01],
       ..., 
       [  1.98700000e+03,              nan,              nan,
                     nan,   7.50000000e-01],
       [  1.98900000e+03,              nan,              nan,
                     nan,   1.50000000e+00],
       [  1.98500000e+03,              nan,              nan,
                     nan,   3.10000000e-01]])
world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",", dtype="str", skip_header=1  #跳过头行)
print(world_alcohol)
________________
[['1986' 'Western Pacific' 'Viet Nam' 'Wine' '0']
 ['1986' 'Americas' 'Uruguay' 'Other' '0.5']
 ['1985' 'Africa' "Cte d'Ivoire" 'Wine' '1.62']
 ...
 ['1987' 'Africa' 'Malawi' 'Other' '0.75']
 ['1989' 'Americas' 'Bahamas' 'Wine' '1.5']
 ['1985' 'Africa' 'Malawi' 'Spirits' '0.31']]
uruguay_other_1986 = world_alcohol[0,4]
third_country = world_alcohol[2,3]
print (uruguay_other_1986)
print (third_country)
____________________
0
Wine
vector = numpy.array([5, 10, 15, 20])
print(vector[0:3])  
______________
[ 5 10 15]
matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])
print(matrix)
print(matrix[:,1])   #所有行,第2列
__________________
[[ 5 10 15]
 [20 25 30]
 [35 40 45]]
[10 25 40]
matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])
print(matrix[:,0:2])   #两列切片
________________
[[ 5 10]
 [20 25]
 [35 40]]
matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])
print(matrix[1:3,0:2])
________________
[[20 25]
 [35 40]]
import numpy
#it will compare the second value to each element in the vector
# If the values are equal, the Python interpreter returns True; otherwise, it returns False
vector = numpy.array([5, 10, 15, 20])  #对每个元素进行遍历比较,不相等的话返回false,返回bool值
vector == 15   #“==”会进行判断
______________
array([False, False,  True, False])
vector = numpy.array([5, 10, 15, 20])
print(vector)
_____________
[ 5 10 15 20]
matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])
matrix == 25
______________
array([[False, False, False],
       [False,  True, False],
       [False, False, False]])
#Compares vector to the value 10, which generates a new Boolean vector [False, True, False, False]. It assigns this result to equal_to_ten
vector = numpy.array([5, 10, 15, 20])
vector == 10
# ab = (vector == 10)
# print (ab)
# print(vector[ab])
_________________
array([False,  True, False, False])
ab =(vector == 10)
print(ab)
__________
[False  True False False]
print(vector[ab])    #传入bool值,只会返回下标为true的值,返回一个数组.如果没有true,返回空数组
____________
[10]
matrix = numpy.array([
                [5, 10, 15], 
                [20, 25, 30],
                [35, 40, 45]
             ])
second_column_25 = (matrix[:,1] == 25)     #判断第2列,所有行里面,是否等于25
print second_column_25
print(matrix[second_column_25, :])      #将等于25的那一行,所有列作为matrix的索引,打印出来       
_______________
[False  True False]
[[20 25 30]]

链接:https://pan.baidu.com/s/16KviRbUoV1j4MZKWopz7FA 提取码: 7vrk

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值