八、python数据结构

芈月坦途

已于 2022-06-15 22:20:34 修改

阅读量88

点赞数

分类专栏： PYTHON 文章标签： python 数据结构

于 2022-06-15 22:19:42 首次发布

本文链接：https://blog.csdn.net/ON_THE_WAY2/article/details/125290965

版权

PYTHON 专栏收录该内容

11 篇文章 0 订阅

订阅专栏

本文深入探讨了Python的四种主要数据结构：列表、元组、字典和集合。详细介绍了它们的基本概念、操作方法及应用场景，包括增删改查、成员关系判断、遍历、合并与统计等。此外，还对比了列表与元组的可变性，字典与集合的无序性以及它们在数据处理中的独特优势。

摘要由CSDN通过智能技术生成

1. list

1.1 基本概念
	# 概念：有序项目集合，可以存放任何数据类型对象，是可变数据类型
	# 列表的生成：
	lst = []
	lst = list(iterable object)

	# list是可变数据类型；字符串为不可变数据类型
	# >>> id(list5)
	# 140397280838984
	# >>> list5[2]
	# 1
	# >>> list5[2] = 3
	# >>> id(list5)
	# 140397280838984
	# 字符串为不可变数据类型
	# >>> str1 = "abc"
	# >>> str1[1]
	# 'b'
	# >>> str1[1] = 'g'
	# Traceback (most recent call last):
	#   File "<stdin>", line 1, in <module>
	# TypeError: 'str' object does not support item assignment
	# >>>

1.2 列表基本操作
1.2.1 增
	append() 在列表末尾增加一个元素，无返回值
	# .append()在列表末尾增加一个元素
	lst = ['x', 'i', 'love','ff']
	lst.append('mm')  # 在列表末尾添加元素
	print(lst)  # 结果：['x', 'i', 'love', 'ff', 'mm']
	print(lst.append('mm'))  # 无返回值，即返回值为None
	print(lst)  # 结果：['x', 'i', 'love', 'ff', 'mm', 'mm']
	
	# .insert()在指定下标添加元素
	lst.insert(2,"yy")  # 在指定位置添加元素，即在下标为2的地方添加yy
	print(lst)  # 结果：['x', 'i', 'yy', 'love', 'ff', 'mm', 'mm']
	
	# .extend(iterable对象) 参数需要传入可迭代对象如字符串，列表等可以被for循环获取
	lst.extend('gfd')  # 添加元素必须是可迭代对象，将元素拆开加入列表
	print(lst)  # 结果：['x', 'i', 'yy', 'love', 'ff', 'mm', 'mm', 'g', 'f', 'd']
	
	1.2.2 引用传递和copy的区别
	# 引用传递
	list4 = list5 = ['f', 'g']
	list4.append('G')
	print(list4)  # 结果：['f', 'g', 'G']
	print(list5)  # 结果：['f', 'g', 'G']
	print(id(list4),id(list5))  # 1903688659136 1903688659136
	
	# copy
	list6 = list5.copy()
	print(id(list5))  # 1903688659136
	print(id(list6))  # 1903688659072
	list6.append('uu')
	print(list6)  # ['f', 'g', 'G', 'uu']
	print(list5)  # ['f', 'g', 'G']
	lst3 = [1, 2, 3]
	lst4 = lst3.copy()
	print(id(lst4))  # 1903688659264
	lst5 = lst3[::]  # 切片相当于copy复制，内存地址不一样
	print(id(lst5))  # 1903688659648
	print(id(lst3))  # 1903688659200

1.2.3 删
	# .pop() 指定下标删除，默认删除最后一个元素，并将其作为返回值返回
	list1 = ['avb', 'gb', None, True, False, 'abcd']
	print(list1.pop())  # 默认删除最后一个元素，abcd
	print(list1.pop(3))  # 指定删除下标为3的元素，True
	
	# .remove()删除遇到的第一个元素
	lst3 = [7, 8, 9, 10, 10, 7]
	lst3.remove(7)  # 删除遇到的第一个元素
	print(lst3)  # 结果：[8, 9, 10, 10, 7]
	
	# del 关键字删除引用
	list1 = ['a', 'b', 12, 'gf']
	del list1[2:3]  # 删除关键字引用，python根据内存管理机制回收其内存地址
	print(list1)  # 结果:['a', 'b', 'gf']
	
	# .clear()清除列表
	list1.clear()
	print(list1)  # []

1.2.4 查
	# 修改列表 切片为空时，在start位置开始插入元素，切片不空则替换切片处
	lst3 = [8, 9, 10, 10, 7]
	lst3[1:2] = 'abc'  # 在切片处替换元素
	print(lst3)  # [8, 'a', 'b', 'c', 10, 10, 7]
	lst3[3:3] = 'ghj'  # 在start位置插入元素
	print(lst3)  # [8, 'a', 'b', 'g', 'h', 'j', 'c', 10, 10, 7]

1.2.5 成员判断
	# 成员判断 in  not in
	lst = [1,23,'aa']
	print('aa' in lst)  # True

1.2.6 改
	# slice（start，end，step）切片相同时可以模式化操作
	lst2 = ['i', 'love', 'work', 'and', 'you', '?']
	s1 = slice(0, 3, 1)  # 以步长为1，从0开始切至3，左闭右开
	print(s1)  # 返回slice(0, 3, 1)对象
	print(lst2[s1])  # ['i', 'love', 'work']
	
	list1 = [1.2, 3, 4, 5, 6]
	print(list1[::-1])  # [6, 5, 4, 3, 1.2]  # 切片不改变原来的列表
	print(list1)  # [1.2, 3, 4, 5, 6]
	print(list1.reverse())  # None
	print(list1)  # [6, 5, 4, 3, 1.2]  # 调用列表的属性方法，原列表改变
	
	# 列表拼接 ＋ 排序 默认为升序;
	# 列表中的数据类型要一致
	list1 = [1,2,3]
	list2 = [4,5,6]
	list3 = list1 + list2
	print('默认排序：', list3)  # 默认排序： [1, 2, 3, 4, 5, 6]
	list3.sort(reverse=True)
	print('降序：   ', list3)  # 降序：    [6, 5, 4, 3, 2, 1]
	list3.sort(reverse=False)
	print('升序：   ', list3)  # 升序：    [1, 2, 3, 4, 5, 6]
	
	# 字符串排序根据首字母的unicode编码来排序，一样则按第二个字母来排序
	lst = ['你好','back','hello','世界']
	lst.sort(reverse=True)  # 默认为False，即升序；True为降序
	print(lst)  # ['你好', '世界', 'hello', 'back']
	# ord(c, /)  Return the Unicode code point for a one-character strin
	print(ord("b"))  # 98
	print(ord('你'))  # 20320
	print(ord('世'))  # 19990
	print(ord('h'))  # 104

1.2.7 统计
	# lenlst()计算长度
	# .count()统计个数
	# .index() 函数用于从列表中找出某个值第一个匹配项的索引位置。
	print("*"*10)
	lst = [1,2,3,4,2,4,1,1,1,5,7]
	print(len(lst))  # 11
	print(lst.count(1))  # 4
	print(lst.index(2))  # 1

1.2.8 遍历
	# enumerate()返回数组 把下标和元素组成映射
	lst1 = ['a', 'v', 'f', 'g']
	for i, j in enumerate(lst1):
	    print("{} --> {}".format(i, j))
	# 结果：
	# 0 --> a
	# 1 --> v
	# 2 --> f
	# 3 --> g

2. tuple

2.1 list和tuple联系
	# 相同点
	序列的两个主要特点是索引操作符和切片操作符
	# 不同点
	list是可变数据类型，tuple是不可变数据类型，不能修改删除元素，嵌套list时，list可以被修改；元组是处理有序项目的数据结构，属于不可变数据类型，可以包含任何类型的对象

2.2 定义
	a = ()
	print(a)  #()
	print(a,type(a))  # () <class 'tuple'>
	
	a = tuple()
	print(a,type(a))  # () <class 'tuple'>
	a = (1)
	print(a,type(a))  # 1 <class 'int'>
	
	a = (1,)
	print(a,type(a))  # (1,) <class 'tuple'>

2.3 基本操作
2.3.1 索引
	my_tuple = (1,3.14,'cali',False,print,[1,2],(1,2),None)
	print(my_tuple[5])  # [1, 2]
	print(my_tuple[-5])   # False

2.3.2 切片
	my_tuple = (1,3.14,'cali',False,print,[1,2],(1,2),None)
	print(my_tuple[5])  # [1, 2]
	print(my_tuple[-5])  # False
	print(my_tuple[:7:2]) # (1, 'cali', <built-in function print>, (1, 2))
	print(my_tuple[::])  # (1, 3.14, 'cali', False, <built-in function print>, [1, 2], (1, 2), None)
	print(my_tuple[-4:-1])  # (<built-in function print>, [1, 2], (1, 2))
	print(my_tuple[-1:-4])  # ()
	print(my_tuple[-1:-4:-1])  # (None, (1, 2), [1, 2])
	print(slice(0,3),my_tuple[slice(0,3)])  # slice(0, 3, None) (1, 3.14, 'cali')

2.3.3 统计
	my_tuple = (1,3.14,'cali',False,print,[1,2],(1,2),None)
	print(len(my_tuple))  # 8
	my_tuple.count(1)  # 1
	my_tuple.index("orange")  # 查看orange的首个索引值，无则报ValueError
	Traceback (most recent call last):
	  File "<input>", line 1, in <module>
	ValueError: tuple.index(x): x not in tuple

2.3.4 遍历
	m = (1, 3.14, 'cali', False, print, [1, 2], (1, 2), None)
	for i in enumerate(m):
	    print(i)
	# (0, 1)
	# (1, 3.14)
	# (2, 'cali')
	# (3, False)
	# (4, <built-in function print>)
	# (5, [1, 2])
	# (6, (1, 2))
	# (7, None)

3. dictionary

3.1 定义
	字典是python中的一个键值映射的数据结构，字典是无序的，可变对象，key必须唯一，天生去重；	字典的key必须是可hash对象（不可变数据类型）；
	
	# 定义字典
	singer_null = {}
	print(singer_null,type(singer_null))
	
	singer_null = dict()
	print(singer_null,type(singer_null))
	
	# 字典key必须是hash对象 如：字符串，数字，元组，布尔值，None；value可以是任意值
	# list不是hash对象 不可以做为key
	singer_song = {1: 12, '张韶涵': True, ['汪峰']: 'roke'}
	print(singer_song)
	# tuple作为key
	singer_song = {1: 12, '张韶涵': True, ('汪峰',): 'roke'}
	print(singer_song)
	# bool值作为key
	singer_song = {False: 12, '张韶涵': True, ['汪峰']: 'roke'}  # 报错unhashable type：‘list’
	print(singer_song)
	# 两个相同的key，相当于重复定义
	singer_song = {1: 12, '张韶涵': True, '汪峰': 'roke','汪峰':'在春天里'}
	print(singer_song)
	
	
	lst = [(1,'a'),(2,'b'),(3,'c')]
	print(dict(lst))  # {1: 'a', 2: 'b', 3: 'c'}

3.2 查看
	dict[key] ,当key不存在时，返回keyerror
	dicts.get(key,default),当key不存在时返回default默认内容
	singer_song = {False: 12, '张韶涵': True, ('汪峰',): 'roke'}
	print(singer_song[False])  # 12
	print(singer_song.get(False, "没有找到"))  # 12
	print(singer_song.get('张韶涵1', "没有找到"))  # 没有找到
	 
3.3 新增
	# dict[key] = value
	# 当key不存在时表示新增加一个键值对，当key存在时，表示修改对应键的value值
	# 增加修改字典
	singer_song = {False: 12, '张韶涵': True, ('汪峰',): 'roke'}
	singer_song['张韶涵1'] = '隐形的翅膀'
	print(singer_song)  # 	{False: 12, '张韶涵': True, ('汪峰',): 'roke', '张韶涵1': '隐形的翅膀'}
	singer_song['张韶涵'] = '阿刁-new'
	print(singer_song)  # 	{False: 12, '张韶涵': '阿刁-new', ('汪峰',): 'roke', '张韶涵1': '隐形的翅膀'}
	
3.3.1 合并字典
	# dict1.update(dict2) 更新dict1
	singer_song1 = {False: 12, '张韶涵': True, ('汪峰',): 'roke', 1: 3}
	singer_song2 = {False: 14, '张韶涵': True, ('汪峰',): 'roke', 5: 2}
	print(singer_song1.update(singer_song2))  # None
	print(singer_song1)  # {False: 14, '张韶涵': True, ('汪峰',): 'roke', 1: 3, 5: 2}
	
	# dict(dict1,dict2)合并两个字典，但是dict2的key必须是string类型
	singer_song1 = {'False': 12, '张韶涵': True, '汪峰': 'roke', 1: 2}
	singer_song2 = {'False': 14, '张韶涵': True, '汪峰': 'roke'}
	cc = dict(singer_song1, **singer_song2)  # **dict-->key=value
	print(cc)  # {'False': 14, '张韶涵': True, '汪峰': 'roke', 1: 2}
	
	singer_song1 = {'False': 12, '张韶涵': True, '汪峰': 'roke', 1: 2}
	singer_song2 = {False: 14, '张韶涵': True, '汪峰': 'roke'}
	cc = dict(singer_song1, **singer_song2)
	print(cc)  # 报错：keywords must be strings，即第二个字典key必须是string类型
	
	# dict(d1, **d2) ：需要字典键必须为字符串类型，否则报错：TypeError: keywords must be strings，因为**本质为形参和实参对的传递，而参数变量名不可以数字开头
	lst = (1,2)
	print("num1 is {},num2 is {}".format(*lst))  # *lst --> 1,2
	print("num1 is {},num2 is {}".format(1,2))
	
	d1 = {"a":1,"b":2}
	print("a is {a},b is {b}".format(**d1))  # **d1 --> a=1,b=2
	print("a is {a},b is {b}".format(a=1,b=2))
	
	# 结果：
	# num1 is 1,num2 is 2
	# num1 is 1,num2 is 2
	# a is 1,b is 2
	# a is 1,b is 2

	dict1 = {'a':1,'b':2,3:'n',True:'false'}
	dict2 = {'h':4}
	dict(dict1,**dict2)  # {'a': 1, 'b': 2, 3: 'n', True: 'false', 'h': 4}
	dict2 = {('a',):4}
	dict(dict1,**dict2)
	Traceback (most recent call last):
	  File "<stdin>", line 1, in <module>
	TypeError: keyword arguments must be strings
3.4 删除
	# popitem()从字典中移除并返回一个 (键, 值) 对。 键值对会按 LIFO 的顺序被返回。(默认删除最后一个键值对并且以元组的方式将其返回)
	# popitem() 适用于对字典进行消耗性的迭代，这在集合算法中经常被使用。
	# 如果字典为空，调用 popitem() 将引发 KeyError。
	# dicts.pop(key) 删除指定键的键值对，并将其value返回
	singer_song = {False: 12, '张韶涵': True, ('汪峰',): 'roke'}
	print(singer_song.pop(False))  # 12
	print(singer_song)  # {'张韶涵': True, ('汪峰',): 'roke'}
	print(singer_song.popitem()) # (('汪峰',), 'roke')
	print(singer_song) # {'张韶涵': True}

3.5 遍历
	# 默认遍历输出key
	singer_song1 = {'False': 12, '张韶涵': True, '汪峰': 'roke', 1: 2}
	for i in singer_song1:  # 默认遍历只输出key
	    print(i, end=' ')  # False 张韶涵 汪峰 1
	print('\n')
	
	# dict.keys()  # 遍历输出key
	singer_song1 = {'False': 12, '张韶涵': True, '汪峰': 'roke', 1: 2}
	for key in singer_song1.keys():
	    print(key, end=' ')  # False 张韶涵 汪峰 1
	print('\n')

	# dict.values()
	singer_song1 = {'False': 12, '张韶涵': True, '汪峰': 'roke', 1: 2}
	for value in singer_song1.values():  # 遍历输出value 
	    print(value, end=' ')  # 12 True roke 2
	print('\n')

	# dict.items()
	singer_song1 = {'False': 12, '张韶涵': True, '汪峰': 'roke', 1: 2}
	for item in singer_song1.items():  # 遍历输出key和value
	    print(item, end=' ')  # ('False', 12) ('张韶涵', True) ('汪峰', 'roke') (1, 2)
	
3.6 成员关系
	# in   not in
	# 默认为判断dict的key是否存在 判断value是否存在时，用dict1.values()
	singer_song1 = {'False': 12, '张韶涵': True, '汪峰': 'roke', 1: 2}
	print("张韶涵1" in singer_song1)
	print(2 in singer_song1.values())

4. set

4.1 定义
	集合是无序、不重复的只含有key的字典；集合可以去重、求两组数据的交集、并集、差集; 集合元素必须是可hash对象
	
4.2 创建集合
	# set（iterable）
	s1 = set([1,2,3])
	
	s2 = {1,2,3}

4.3 添加
	# s.add(item)
	# s.update(someitems)

	s = {1,2,3}
	print(s)  # {1, 2, 3}
	print(s.add(4))  # None
	print(s)  # {1, 2, 3, 4}
	print(s.update([5,6,7]))  # None
	print(s)   # {1, 2, 3, 4, 5, 6, 7}
	print(s.update('asd'))  # None
	print(s)  # {1, 2, 3, 4, 5, 6, 7, 's', 'a', 'd'}

	
4.4 移除
	# s.remove(item)移除一项，item不存在会出错keyerror
	s = {1,2,3}
	s.remove(2)
	print(s)  # {1, 3}
	
	# s.discard(item)移除一项item不存在时则什么也不做
	s = {1,2,3}
	s.discard(4)
	print(s)  # {1, 2, 3}
4.5 集合操作
	# 并集
	# set1.union(set2)
	# set1 | set2
	
	# 交集
	# set1.intersection(set2)
	# set1 & set2
	
	# 差集
	# set1.difference(set2)
	# set1 - set2
	
	# 对称差集
	# 在set1中或者在set2中，不会同时出现在两个集合中
	# set1.symmetric_difference(set2)
	
	# 实例
	set1 = set([1, 2, 3, 4,'abc'])
	set2 = {1, 2, 3, 'cde'}
	# | 并
	s1 = set1.union(set2)
	print(s1)
	print(set1 | set2)
	
	# & 交
	s2 = set1.intersection(set2)
	print(s2)
	print(set1 & set2)
	
	# ^ 差
	s3 = set1.difference(set2)
	print(s3)
	print(set1 - set2)
	
	# 对称差集
	s4 = set1.symmetric_difference(set2)
	print(s4)
	print(set1 ^ set2)