Python 核心编程
1. 整型 int
- 没有长度限制
score = 95
type(score)
int
num = 888888888888888888888888888888888888888888888888888888888888888888888888888888
type(num)
int
num
888888888888888888888888888888888888888888888888888888888888888888888888888888
a = 4
b = 3
a + b
7
a - b
1
a * b
12
a / b
1.3333333333333333
a ** b
64
a ** (1 / b)
1.5874010519681994
import math
math.exp(3)
20.085536923187668
math.log(a)
1.3862943611198906
str(a)
‘4’
c = int('5')
c
5
a > b
True
b > a
False
a == b
False
a >= b
True
a <= b
False
a != b
True
2. 浮点数 float
- 浮点数是不精准存储
score = 82.11
type(score)
float
price = 5.55555555555555555555555555555555555555555555555555555555555
price
5.555555555555555
type(price)
float
distance = 1.5e-4
distance
0.00015
a = 1.5
b = 2.7
a + b
4.2
a - b
-1.2000000000000002
a * b
4.050000000000001
a / b
0.5555555555555555
import math
a = 3.5
a ** 2
12.25
a ** (1/2)
1.8708286933869707
a = 5.6
math.exp(a)
270.42640742615254
math.log(a)
1.7227665977411035
math.log2(a)
2.4854268271702415
str(a)
‘5.6’
c = float('2.3132')
c
2.3132
a = 3.23452
math.ceil(a)
4
math.floor(a)
3
int(a)
3
round(a,2)
3.23
a = 1-0.55
a
0.44999999999999996
a = 3.5
b = 6.43
a < b
True
a = 1 - 0.55
a == 0.45
False
# 浮点数的相等不能直接比较
math.fabs(a - 0.45) < 1e-6
True
3. 布尔类型 bool
- True 就是1 , False 就是0
True
True
False
False
result = True
3>2
True
1==2
False
True+1
2
2 ** True
2
1 / False
ZeroDivisionError Traceback (most recent call last)
Cell In[85], line 1
----> 1 1 / False
ZeroDivisionError: division by zero
True and True
True
True and False
False
True or False
True
not False
True
not True
False
4. 字符串 str
- 单引号/双引号/三单引号/三双引号
s1 = '人工智能'
s2 = "人工智能"
type(s1),type(s2)
(str, str)
s3='''这是一个多行字符串
我可以自由换行
'''
s3
‘这是一个多行字符串\n我可以自由换行\n’
s4="""三双引号
一样可以灵活换行"""
s4
‘三双引号\n一样可以灵活换行’
len(s4)
13
s4.__len__()
13
s="qwertyuiopezxcfvgbnhmpdsfkpisjfoid"
s[0]
‘q’
s[10]
‘e’
s[-1]
‘d’
s[-2]
‘i’
s[:3]
‘qwe’
s[3:5]
‘rt’
s[1:-1]
‘wertyuiopezxcfvgbnhmpdsfkpisjfoi’
len(s)
34
#[start:stop:step]
s[1:-4:3]
‘wtiecghdks’
for ele in s[5:-6:4]:
print(ele)
y
p
c
b
p
k
s = " erwreqw"
s.strip()
‘erwreqw’
s = " \t\n AI \t \n"
s
’ \t\n AI \t \n’
s.strip()
‘AI’
s = "人工 \t智能"
s.strip()
‘人工 \t智能’
s.replace("\t","").replace(" ","")
‘人工智能’
s = "1,2,3,4,5,6 "
ls = s.strip().split(",")
ls
[‘1’, ‘2’, ‘3’, ‘4’, ‘5’, ‘6’]
s = ",".join(ls)
s
‘1,2,3,4,5,6’
s = "Hello World"
s.lower()
‘hello world’
s.upper()
‘HELLO WORLD’
s1 = "AI "
s2 = "人工智能 "
s1+s2
'AI 人工智能 ’
s = 3*s1+ 5* s2
s
'AI AI AI 人工智能 人工智能 人工智能 人工智能 人工智能 ’
s.count("AI")
3
'人工' in s
True
'智能' not in s
False
5. 列表 list
- 元素有顺序,元素可以是任意类型,元素可以重复
ls = [1,2,3,4]
type(ls)
list
ls = list("abc")
ls
[‘a’, ‘b’, ‘c’]
[1,2] == [2,1]
False
ls = [1,True,"asd",[1,2,3,4]]
ls
[1, True, ‘asd’, [1, 2, 3, 4]]
len(ls)
4
ls.__len__()
4
ls[0]
1
ls[:2]
[1, True]
ls[::2]
[1, ‘asd’]
ls[::-1]
[[1, 2, 3, 4], ‘asd’, True, 1]
for ele in ls:
print(ele)
1
True
asd
[1, 2, 3, 4]
ls=[]
len(ls)
0
ls.insert(0,"test")
ls
[‘test’]
ls.insert(0,"1")
ls
[‘1’, ‘test’]
ls.insert(100,"2")
ls
[‘1’, ‘test’, ‘2’]
ls.insert(-100,123)
ls
[123, ‘1’, ‘test’, ‘2’]
ls.append([1,2,3])
ls
[123, ‘1’, ‘test’, ‘2’, [1, 2, 3]]
ls.extend("mech")
ls
[123, ‘1’, ‘test’, ‘2’, [1, 2, 3], ‘m’, ‘e’, ‘c’, ‘h’]
ls.remove("1")
ls.remove([1,2,3])
ls
[123, ‘test’, ‘2’, ‘m’, ‘e’, ‘c’, ‘h’]
ls.remove(321)
ValueError Traceback (most recent call last)
Cell In[217], line 1
----> 1 ls.remove(321)
ValueError: list.remove(x): x not in list
ls.pop()
‘h’
ls
[123, ‘test’, ‘2’, ‘m’, ‘e’, ‘c’]
ls.pop(10)
IndexError Traceback (most recent call last)
Cell In[220], line 1
----> 1 ls.pop(10)
IndexError: pop index out of range
ls.pop(-3)
‘m’
ls
[123, ‘test’, ‘2’, ‘e’, ‘c’]
ls[0] = 321
ls
[321, ‘test’, ‘2’, ‘e’, ‘c’]
ls[-3:] = [1.1,2.2]
ls
[321, ‘test’, 1.1, 2.2]
ls.count(1)
0
321 in ls
True
6. 元组 tuple
- 元素有顺序,可以是任意类型,不可修改
t1 = ()
type(t1)
tuple
t1
()
t2 = ("qwe")
t2
‘qwe’
type(t2)
str
t3 = ("12",)
t3
(‘12’,)
type(t3)
tuple
t4=(1,2,True,"ADSAS")
t4
(1, 2, True, ‘ADSAS’)
t5 = tuple("abd")
t5
(‘a’, ‘b’, ‘d’)
t6=tuple([1,2,3,4,5,6])
t6
(1, 2, 3, 4, 5, 6)
# 元素不能修改
t6[3]=0
TypeError Traceback (most recent call last)
Cell In[248], line 2
1 # 元素不能修改
----> 2 t6[3]=0
TypeError: ‘tuple’ object does not support item assignment
t1 = tuple("abc")
t1
(‘a’, ‘b’, ‘c’)
len(t1)
3
t1.__len__()
3
t1[0]
‘a’
t1[-1]
‘c’
t1[1::-1]
(‘b’, ‘a’)
# 省略括号和结构赋值
t1 = (1,4.3)
t2 = 1,4.3
t1==t2
True
p1,p2 = t2
p1,p2
(1, 4.3)
p1
1
p2
4.3
p1,p2,p3 = t2
ValueError Traceback (most recent call last)
Cell In[265], line 1
----> 1 p1,p2,p3 = t2
ValueError: not enough values to unpack (expected 3, got 2)
# 快速交换多个变量的值
A = 4
B = 5
A,B=B,A
A
5
B
4
ls = (1,2,3,True,1,3,4,2,12)
ls.count(1)
3
ls.count(True)
3
True in ls
True
False in ls
False
# tuple 元素不能修改吗?
t2 = (1,2,3)
t2[1]=3
TypeError Traceback (most recent call last)
Cell In[279], line 1
----> 1 t2[1]=3
TypeError: ‘tuple’ object does not support item assignment
t3=(1,2,[4,5,6])
# 列表是可变元素,值变了,但地址没变
t3[-1].append("ABD")
t3
(1, 2, [4, 5, 6, ‘ABD’])
7. 集合 set
- 元素的无序性/确定性/唯一性
s1 = {}
s1
{}
type(s1)
dict
t2 = set()
t2
set()
type(t2)
set
t3={1}
t3
{1}
t4={1,2,3,4,5,6,7}
t4
{1, 2, 3, 4, 5, 6, 7}
t5=set("abscs")
t5
{‘a’, ‘b’, ‘c’, ‘s’}
t6=set([1,2,3,4])
t6
{1, 2, 3, 4}
t7 = {1,1,1,1,2,3,4}
t7
{1, 2, 3, 4}
# 列表是可变的,不能作为集合的元素
t8 = {[1,2,3],True,"absd"}
TypeError Traceback (most recent call last)
Cell In[301], line 2
1 # 列表是可变的,不能作为集合的元素
----> 2 t8 = {[1,2,3],True,“absd”}
TypeError: unhashable type: ‘list’
s = set("asdfdwsf")
len(s)
5
s.__len__()
5
s.add(True)
s
{True, ‘a’, ‘d’, ‘f’, ‘s’, ‘w’}
s.add('s')
s
{True, ‘a’, ‘d’, ‘f’, ‘s’, ‘w’}
s.remove('a')
s
{True, ‘d’, ‘f’, ‘s’, ‘w’}
s.remove('ffff')
KeyError Traceback (most recent call last)
Cell In[313], line 1
----> 1 s.remove(‘ffff’)
KeyError: ‘ffff’
# 随机删除
s.pop()
‘d’
s
{‘s’, ‘w’}
a = {1,2,3,4,5,6,7}
b = {4,5,6,7,8,9}
a.intersection(b)
{4, 5, 6, 7}
b.intersection(a)
{4, 5, 6, 7}
a.union(b)
{1, 2, 3, 4, 5, 6, 7, 8, 9}
b.union(a)
{1, 2, 3, 4, 5, 6, 7, 8, 9}
1 in a
True
10 in b
False
8. 字典 dict
- 元素成对出现,元素没有顺序,key不可变,不重复,value无要求
d1 = {}
d1
{}
type(d1)
dict
len(d1)
0
d2 = {"name":"Tom"}
d2
{‘name’: ‘Tom’}
d3 = {'name': 'Tom','age':12}
d3
{‘name’: ‘Tom’, ‘age’: 12}
len(d3)
2
# 可变类型不可以做key
# 可变类型: list/set/dict
# 不可变类型: int/float/bool/str/tuple*
d4 = {[1,2,3]:12}
TypeError Traceback (most recent call last)
Cell In[339], line 2
1 # 可变类型不可以做key
----> 2 d4 = {[1,2,3]:12}
TypeError: unhashable type: ‘list’
d5 = dict(a=1,b=2,c=3)
d5
{‘a’: 1, ‘b’: 2, ‘c’: 3}
len(d5)
3
d5.__len__()
3
d6 = dict(name='Tom',age=22,school='北京大学')
d6
{‘name’: ‘Tom’, ‘age’: 22, ‘school’: ‘北京大学’}
d6["score"]=87
d6
{‘name’: ‘Tom’, ‘age’: 22, ‘school’: ‘北京大学’, ‘score’: 87}
# 有则改之,无则追加
d2["score"] = 98
d2
{‘name’: ‘Tom’, ‘score’: 98}
d2["score"]
98
# 读取不存在的key会报错
d["asd"]
NameError Traceback (most recent call last)
Cell In[364], line 2
1 # 读取不存在的key会报错
----> 2 d[“asd”]
NameError: name ‘d’ is not defined
# 使用get读取更加安全
d6.get("abc",0)
0
d6
{‘name’: ‘Tom’, ‘age’: 22, ‘school’: ‘北京大学’, ‘score’: 87}
d6.pop('name')
‘Tom’
d6
{‘age’: 22, ‘school’: ‘北京大学’, ‘score’: 87}
d.pop('age1')
NameError Traceback (most recent call last)
Cell In[371], line 1
----> 1 d.pop(‘age1’)
NameError: name ‘d’ is not defined
d6.keys()
dict_keys([‘age’, ‘school’, ‘score’])
d6.values()
dict_values([22, ‘北京大学’, 87])
d6.items()
dict_items([(‘age’, 22), (‘school’, ‘北京大学’), (‘score’, 87)])
# 默认遍历key
for ele in d2:
print(ele)
name
score
# 遍历key
for key in d6.keys():
print(key)
age
school
score
# 遍历value
for value in d6.values():
print(value)
22
北京大学
87
# 遍历键值对
for key,value in d6.items():
print(key,value)
age 22
school 北京大学
score 87
'age' in d6
True
'age1' in d6
False
9. 分支结构和三元表达
if(1<2):
print("1<2")
1<2
age = 25
if age >= 18:
print("成年人")
else:
print("未成年")
成年人
score= 89
if score >=90:
print("优秀")
elif score >= 75:
print("良好")
elif score >= 60:
print("及格")
else:
print("不及格")
良好
# 三元表达
age = 3
True if age >= 18 else False
False
"成年人" if age >=18 else "未成年"
‘未成年’
"成年人" if age >=18 else age
3
10. 循环和遍历
num = 1
while True:
if num < 10:
num+=1
print(num)
else:
break
2
3
4
5
6
7
8
9
10
for _ in range(10):
print("Hello,Python!!!")
Hello,Python!!!
Hello,Python!!!
Hello,Python!!!
Hello,Python!!!
Hello,Python!!!
Hello,Python!!!
Hello,Python!!!
Hello,Python!!!
Hello,Python!!!
Hello,Python!!!
ls = [1,2,3,4,5,"Hello",False]
for ele in ls:
print(ele)
1
2
3
4
5
Hello
False
s = "asdfg"
for ele in s:
print(ele)
a
s
d
f
g
11. 目录和路径
import os
# 路径是否存在(文件夹或文件)
os.path.exists(path="./abc")
True
# 路径拼接
os.path.join(".","abc","ddd")
‘.\abc\ddd’
# 创建目录
save_path = "./abc/saved_weight"
# 如果不存在就创建
if not os.path.exists(path=save_path):
os.makedirs(save_path)
# 删除文件
if os.path.exists(path="ddd.ddd"):
os.remove()
# 删除目录
save_path = "./abc/saves_weights"
if os.path.exists(path=save_path):
print("存在")
os.removedirs(name=save_path)
# 遍历一个目录
root = "./"
for ele in os.listdir(root):
if os.path.isfile(ele):
print(ele,"文件")
elif os.path.isdir(ele):
print(ele,"目录")
.ipynb_checkpoints 目录
12.csv 文件
1234.txt 文件
abc 目录
P1_数据类型.ipynb 文件
P2_逻辑结构及文件操作.ipynb 文件
poem.txt 文件
saved_weight 目录
12. 文件操作
# 打开、增删改查、关闭
f = open(file="./1234.txt",mode="r",encoding="utf8")
f.read()
‘锄禾日当午\n汗滴禾下土’
f.read()
‘’
f.close()
# 更加优雅的写法
with open(file="./1234.txt",mode="r",encoding="utf8") as f:
while True:
line = f.readline().strip()
if line:
print(line)
else:
break
锄禾日当午
汗滴禾下土
# 写入:如果文件不存在则新建,如果存在则清空文件内容
with open(file="poem.txt",mode="w",encoding="utf8") as f:
f.write("举头望明月\n低头思故乡\n")
# 追加内容
with open(file="poem.txt",mode="a",encoding="utf8") as f:
f.write("举头望明月\n低头思故乡\n")
13. 函数
- 位置参数
- 默认参数
- 可变参数
- 匿名函数
# 定义参数
def fun():
pass
# 调用
fun()
callable(fun)
True
# 位置参数
def add(a,b):
return a+b
add(1,2)
3
# 默认参数
def area(r,pi=3.1415926):
return r*r*pi
area(r=3,pi=3.14)
28.26
# 可变参数
def add_num(*args):
s = 0
for ele in args:
s += ele
return s
add_num()
0
add_num(1)
1
add_num(1,2,3,4,5)
15
# 匿名函数
fn = lambda x: x ** 2
fn(3)
9
# 冒号前面写参数,冒号后面写函数体,然后传参进去
(lambda x,y:x+y)(1,2)
3
14. 类
- 封装
- 继承
- 多态
class Car(object):
def __init__(self,color="Black",brand="BYD",price=50):
'''
自定属性
'''
self.color = color
self.brand = brand
self.price = price
def info(self):
'''
自定义方法
'''
print(self.color,self.brand,self.price)
def __repr__(self):
'''
重载父类的方法
'''
return 'Car'
car1 = Car(color="Blue")
car1
Car
print(car1.color)
Blue
car1.info()
Blue BYD 50
# 继承
car1.__repr__()
‘Car’
o = object()
o.__repr__()
‘<object object at 0x0000020EBEBE4ED0>’
# 多态
car1.__repr__()
‘Car’
15. 异常处理
- 异常基类 Exception
- 接收异常 try except else finally
- 抛出异常 raise
def divide(a, b):
return a / b
divide(1,2)
0.5
divide(2,0)
ZeroDivisionError Traceback (most recent call last)
Cell In[13], line 1
----> 1 divide(2,0)
Cell In[11], line 2, in divide(a, b)
1 def divide(a, b):
----> 2 return a / b
ZeroDivisionError: division by zero
"""
异常处理
- 守株待兔的操作
- 如果没有发生异常:不做任何额外处理
- 如果发生了异常,引导程序做出合理化的处理
"""
a = 3
b = 0
try:
result = divide(a,b)
print(result)
except Exception as e:
print(e)
else:
print("没有发生错误")
finally:
print("不管是否发生错误,我都会执行")
print("这里依然可以执行")
division by zero
不管是否发生错误,我都会执行
这里依然可以执行
# 抛出异常
a = 2
b = 3
def divide(a, b):
if isinstance(a, int) and isinstance(b, int) and b != 0:
return a / b
else:
raise Exception("参数错误")
divide(2,1)
2.0
divide(1,0.3)
Exception Traceback (most recent call last)
Cell In[23], line 1
----> 1 divide(1,0.3)
Cell In[21], line 10, in divide(a, b)
8 return a / b
9 else:
—> 10 raise Exception(“参数错误”)
Exception: 参数错误
16. 包和模块
- 包和模块是代码组织的一种方式,包就是一个文件夹,模块就是一个源码文件
- 避免重复造轮子,利用前人写好的包和模块
- 托管平台:pip 和 conda 管理工具
import numpy as np
np.__version__
‘1.23.5’
np.e
2.718281828459045
import os
os.path.exists("./")
True
from matplotlib import pyplot as plt
- 定义自己的模块
- 在项目文件夹中新建一个文件夹名为 utils ,在里面新建一个 math.py文件,然后编辑函数
from utils import math
math.add(3,23)
26
math.sub(43,1)
42
17. 随机数
- 概率论中随机试验产生的结果
- 数据科学中随机数很重要
import random
# 均匀分布
# 按照均匀分布生成一个随机数
random.uniform(0,100)
4.646385375119754
random.randint(0,100)
67
# 高斯分布
random.gauss(mu=0,sigma=1)
-2.9510713969400872
# 洗牌操作
x = [1,2,3,4,5,6,7,8,9,0]
print(x)
random.shuffle(x)
print(x)
[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
[0, 6, 1, 4, 5, 7, 3, 9, 8, 2]
# 随机抽取
x = [1,2,3,4,5,6,7,8,9,0]
result = random.choice(x)
print(result)
9
ls = random.sample(x,3)
print(ls)
[5, 9, 3]
# 固定随机数,方便复现:种子固定了,随机规则就产生了,
random.seed(0)
x = [1,2,3,4,5,6,7,8,9,0]
result = random.sample(x,2)
print(result)
[7, 0]
18. 生成器
- 当数据集很大时,我们很难一次性将所有数据加载到内存中,而是按需加载,这时候就要用到生成器
- 模型训练时,数据经常会打包成生成器
- yield 和 return
- 列表生成器
- 打包数据集
# return 返回并跳出函数
def get_data():
ls = [0,1,2,3,4,5,6,7,8,9]
for ele in ls:
return ele
get_data()
0
# yield 构建生成器
def get_data():
ls = [0,1,2,3,4,5,6,7,8,9]
for ele in ls:
yield ele
gen = get_data()
for ele in gen:
print(ele)
0
1
2
3
4
5
6
7
8
9
ls = list(range(10))
ls
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
# 列表解析式 : for循环 + if判断
ls1 = [ele for ele in ls if ele % 2 == 1]
ls1
[1, 3, 5, 7, 9]
# 这样也可以构建生成器
gen1 = (ele for ele in ls if ele % 2 == 1)
for ele in gen1:
print (ele)
1
3
5
7
9
# 通过生成器来读取数据集
def get_dataset():
with open(file="dataset.csv",mode="r",encoding="utf8") as f:
line = f.readline()
while True:
line = f.readline()
if line:
yield line
else:
break
gen = get_dataset()
# 生成器是一次性的,从头读到尾之后,再想重新读就又要新的生成器了
next(gen)
‘1.70,70,23,89\n’
for ele in gen:
print(ele)
1.67,71,23,79
1.75,72,22,84
1.74,73,23,86
1.79,74,21,56
19. 高阶函数
-
把函数当做参数,自动化的实现底层遍历
-
数据科学中高阶函数很有用,可以极大提升效率
-
map 映射
-
reduce 聚合
-
filter 过滤
-
sorted 排序
-
map
ls = [0,1,2,3,4,5,6,7,8,9]
def add(ele):
return ele + 0.5
ls1 = list(map(add,ls))
ls1
[0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5]
- reduce
from functools import reduce
sum(ls)
45
def add(a,b):
return a+b
reduce(add,ls)
45
reduce(lambda x, y: x * y, ls)
0
- filter
list(filter(lambda x : True if x % 2 == 0 else False, ls))
[0, 2, 4, 6, 8]
- sorted
sorted(ls, reverse = True)
[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
ls = [0,1,2,3,4,5,6,7,8,9]
ls.sort(reverse = True)
ls
[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
ls2 = [(2,4),(1,6),(5,1),(3,8)]
sorted(ls2, key=lambda x:x[0], reverse=True)
[(5, 1), (3, 8), (2, 4), (1, 6)]
sorted(ls2, key=lambda x:x[1],reverse=False)
[(5, 1), (2, 4), (1, 6), (3, 8)]