1. Array Creation
import numpy as np
import math
## Arrays are displayed as a list or list of lists and can be created through list as well
a = np.array([1,2,3])
print(a)
print(a.ndim) # print the number of dimensions of a list using the ndim attribute
[1 2 3]
1
b = np.array([[1,2,3],[4,5,6]]) ## pass in a list of lists in numpy array, we create a multi-dimensional array, for instance, a matrix
b
b.shape ## print out the length of each dimension by calling the shape attribute
a.dtype ## check the type of items in the array
c = np.array([2.2, 5, 1.1]) ## floats are also accepted in numpy arrays
c.dtype.name
array([[1, 2, 3],
[4, 5, 6]])
(2, 3)
dtype(‘int64’)
‘float64’
d = np.zeros((2,3))
print(d)
e = np.ones((2,3))
print(e)
np.random.rand(2,3) ## generate an array with random numbers
# np.arrange(): 在给定的时间间隔内返回均匀间隔的值。
f = np.arange(10, 50, 2) ## create an array of every even number from ten (inclusive) to fifty (exclusive)
f
# numpy.linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0) Return evenly spaced numbers over a specified interval.
np.linspace( 0, 2, 15 ) ## generate a sequence of floats, the third argument isn't the difference between two numbers, but the total number of items you want to generate
[[0. 0. 0.]
[0. 0. 0.]]
[[1. 1. 1.]
[1. 1. 1.]]
array([[0.14119525, 0.07564666, 0.19510919],
[0.96025668, 0.03462012, 0.54072759]])
array([10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42,
44, 46, 48])
array([0. , 0.14285714, 0.28571429, 0.42857143, 0.57142857,
0.71428571, 0.85714286, 1. , 1.14285714, 1.28571429,
1.42857143, 1.57142857, 1.71428571, 1.85714286, 2. ])
2. Array Operations
a = np.array([10,20,30,40])
b = np.array([1, 2, 3,4])
c = a-b
print(c)
d = a*b
print(d)
farenheit = np.array([0,-10,-5,-15,0])
celcius = (farenheit - 31) * (5/9)
celcius
celcius > -20 ## a boolean array will be returned for any element in the original
celcius%2 == 0 ## check numbers in an array to see if they are even
A = np.array([[1,1],[0,1]])
B = np.array([[2,0],[3,4]])
print(A*B) ## element-wise operations (the asterix)
print(A@B) ## do matrix product, we use the "@" sign or use the dot function, matrix-level operations (the @ sign)
[ 9 18 27 36]
[10 40 90160]
array([-17.22222222, -22.77777778, -20. , -25.55555556,
-17.22222222])
array([ True, False, False, False, True])
array([False, False, True, False, False])
[[2 0]
[0 4]]
[[5 4]
[3 4]]
A.shape ## see the shape of a matrix
array1 = np.array([[1, 2, 3], [4, 5, 6]])
print(array1.dtype)
array2 = np.array([[7.1, 8.2, 9.1], [10.4, 11.2, 12.3]])
print(array2.dtype)
(2, 2)
int64
float64
The 64 in this example refers to the number of bits that the operating system is reserving to represent the number, which determines the size (or precision) of the numbers that can berepresented.
array3=array1+array2
print(array3)
print(array3.dtype)
print(array3.sum())
print(array3.max())
print(array3.min())
print(array3.mean())
[[ 8.1 10.2 12.1]
[14.4 16.2 18.3]]
float64
79.3
18.3
8.1
13.216666666666667
b = np.arange(1,16,1).reshape(3,5)
print(b)
[[1 2 3 4 5]
[6 7 8 910]
[11 12 13 14 15]]
An example
from PIL import Image
from IPython.display import display
im = Image.open('chris.tiff')
display(im)
array=np.array(im)
print(array.shape)
array
(200, 200)
array([[118, 117, 118, …, 103, 107, 110],
[113, 113, 113, …, 100, 103, 106],
[108, 108, 107, …, 95, 98, 102],
…,
[177, 181, 182, …, 193, 198, 192],
[178, 182, 183, …, 193, 201, 189],
[178, 182, 184, …, 193, 201, 187]], dtype=uint8)
The uint means that they are unsigned integers (so no negative numbers) and the 8 means 8 bits per byte.This means that each value can be up to 2222222*2=256 in size (well, actually 255, because we start at zero). For black and white images black is stored as 0 and white is stored as 255. So if we just wanted to invert this image we could use the numpy array to do so
mask=np.full(array.shape,255) ##create an array the same shape
mask
modified_array=array-mask ## subtract that from the modified array
modified_array=modified_array*-1 ## convert all of the negative values to positive values
modified_array=modified_array.astype(np.uint8) ## tell numpy to set the value of the datatype correctly
modified_array
display(Image.fromarray(modified_array))
array([[255, 255, 255, …, 255, 255, 255],
[255, 255, 255, …, 255, 255, 255],
[255, 255, 255, …, 255, 255, 255],
…,
[255, 255, 255, …, 255, 255, 255],
[255, 255, 255, …, 255, 255, 255],
[255, 255, 255, …, 255, 255, 255]])
array([[137, 138, 137, …, 152, 148, 145],
[142, 142, 142, …, 155, 152, 149],
[147, 147, 148, …, 160, 157, 153],
…,
[ 78, 74, 73, …, 62, 57, 63],
[ 77, 73, 72, …, 62, 54, 66],
[ 77, 73, 71, …, 62, 54, 68]], dtype=uint8)
reshaped=np.reshape(modified_array,(100,400))
print(reshaped.shape)
display(Image.fromarray(reshaped))
(100, 400)
3. Indexing, Slicing and Iterating
Indexing
a = np.array([1,3,5,7])
a[2]
a = np.array([[1,2], [3, 4], [5, 6]])
a
a[1,1]
np.array([a[0, 0], a[1, 1], a[2, 1]])
print(a[[0, 1, 2], [0, 1, 1]])
5
array([[1, 2],
[3, 4],
[5, 6]])
4
array([1, 4, 6])
[1 4 6]
Boolean Indexing
print(a >5)
print(a[a>5])
[[False False]
[False False]
[False True]]
[6]
Slicing
a = np.array([0,1,2,3,4,5])
print(a[:3]) ## et elements from index 0 to index 3 (excluding index 3)
print(a[2:4])
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
a
a[:2] ## get all the elements from the first (0th) and second row (1th)
a[:2, 1:3] ## get the first two rows but then the second and third column values only
sub_array = a[:2, 1:3]
print("sub array index [0,0] value before change:", sub_array[0,0])
sub_array[0,0] = 50
print("sub array index [0,0] value after change:", sub_array[0,0])
print("original array index [0,1] value after change:", a[0,1]) # original array is changed to 50 as well
[0 1 2]
[2 3]
array([[ 1, 2, 3, 4],
[5, 6, 7, 8],
[ 9, 10, 11, 12]])
array([[1, 2, 3, 4],
[5, 6, 7, 8]])
array([[2, 3],
[6, 7]])
sub array index [0,0] value before change: 2
sub array index [0,0] value after change: 50
original array index [0,1] value after change: 50
4. Trying Numpy with Datasets
wines = np.genfromtxt("datasets/winequality-red.csv", delimiter=";", skip_header=1) # to load a dataset in Numpy, use the genfromtxt() function
print("one integer 0 for slicing: ", wines[:, 0]) # index[row, column]
print("0 to 1 for slicing: \n", wines[:, 0:1])
wines[:, 0:3]
wines[:, [0,2,4]]
wines[:,-1].mean() # negative numbers mean slicing from the back of the list
## another dataset
graduate_admission = np.genfromtxt('datasets/Admission_Predict.csv',dtype=None, delimiter=',', skip_header=1,
names=('Serial No','GRE Score', 'TOEFL Score', 'University Rating', 'SOP','LOR','CGPA','Research', 'Chance of Admit')) graduate_admission
graduate_admission.shape
graduate_admission['CGPA'][0:5]
graduate_admission['CGPA'] = graduate_admission['CGPA'] /10 *4 graduate_admission['CGPA'][0:20] #let's get 20 values
## boolean masking, use this to find out how many students have had research experience by creating a boolean mask and passing it to the array indexing operator
len(graduate_admission[graduate_admission['Research'] == 1])
## use boolean masking to pull out only those students we are interested in based on their chance of admission, then we pull out only their GPA scores, then we print the mean values
print(graduate_admission[graduate_admission['Chance_of_Admit'] > 0.8]['GRE_Score'].mean())
print(graduate_admission[graduate_admission['Chance_of_Admit'] < 0.4]['GRE_Score'].mean())
graduate_admission[graduate_admission['Chance_of_Admit'] > 0.8]
print(graduate_admission[graduate_admission['Chance_of_Admit'] > 0.8]['CGPA'].mean())
print(graduate_admission[graduate_admission['Chance_of_Admit'] < 0.4]['CGPA'].mean())