常用函数用法汇总
1、zip函数
>>>a = [1,2,3]
>>> b = [4,5,6]
>>> c = [4,5,6,7,8]
>>> zipped = zip(a,b) # 打包为元组的列表
[(1, 4), (2, 5), (3, 6)]
>>> zip(a,c) # 元素个数与最短的列表一致
[(1, 4), (2, 5), (3, 6)]
>>> zip(*zipped) # 与 zip 相反,*zipped 可理解为解压,返回二维矩阵式
[(1, 2, 3), (4, 5, 6)]
a = ["adasfds", "dgtyreras", "tergdfgvdf"]
for tmp in zip(*a):
print(tmp)
('a', 'd', 't')
('d', 'g', 'e')
('a', 't', 'r')
('s', 'y', 'g')
('f', 'r', 'd')
('d', 'e', 'f')
('s', 'r', 'g')
2、map函数
>>> def square(x) : # 计算平方数
... return x ** 2
>>> map(square, [1,2,3,4,5]) # 计算列表各个元素的平方
[1, 4, 9, 16, 25]
>>> map(lambda x: x ** 2, [1, 2, 3, 4, 5]) # 使用 lambda 匿名函数
[1, 4, 9, 16, 25]
# 提供了两个列表,对相同位置的列表数据进行相加
>>> map(lambda x, y: x + y, [1, 3, 5, 7, 9], [2, 4, 6, 8, 10])
[3, 7, 11, 15, 19]
3、tensor.masked_fill
>>> input = Variable(torch.randn(5, 5))
>>> input
Variable containing:
2.0344 -0.5450 0.3365 -0.1888 -2.1803
1.5221 -0.3823 0.8414 0.7836 -0.8481
-0.0345 -0.8643 0.6476 -0.2713 1.5645
0.8788 -2.2142 0.4022 0.1997 0.1474
2.9109 0.6006 -0.6745 -1.7262 0.6977
[torch.FloatTensor of size 5x5]
>>> mask = Variable(torch.zeros(5, 5))
>>> mask
Variable containing:
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
0 0 0 0 0
[torch.FloatTensor of size 5x5]
>>> input.masked_fill(mask == 0, -1e9)
Variable containing:
-1.0000e+09 -1.0000e+09 -1.0000e+09 -1.0000e+09 -1.0000e+09
-1.0000e+09 -1.0000e+09 -1.0000e+09 -1.0000e+09 -1.0000e+09
-1.0000e+09 -1.0000e+09 -1.0000e+09 -1.0000e+09 -1.0000e+09
-1.0000e+09 -1.0000e+09 -1.0000e+09 -1.0000e+09 -1.0000e+09
-1.0000e+09 -1.0000e+09 -1.0000e+09 -1.0000e+09 -1.0000e+09
[torch.FloatTensor of size 5x5]
4、torch.transpose
>>> a = torch.randn(4, 3, 5)
>>> print(a.shape)
torch.Size([4, 3, 5])
>>> d = a.transpose(-2, -1)
>>> print(d.shape)
torch.Size([4, 5, 3])
5、tensor.view
# view是将tensor张量摊平后按顺序重组为需要变换的张量
# transpose是将需要变换的维度的后一个按顺序拿出来一行组成变换后的张量
>>> x = torch.randn(4, 4)
>>> x.size()
torch.Size([4, 4])
>>> y = x.view(16)
>>> y.size()
torch.Size([16])
>>> z = x.view(-1, 8) # the size -1 is inferred from other dimensions
>>> z.size()
torch.Size([2, 8])
>>> a = torch.randn(1, 2, 3, 4)
>>> a.size()
torch.Size([1, 2, 3, 4])
>>> b = a.transpose(1, 2) # Swaps 2nd and 3rd dimension
>>> b.size()
torch.Size([1, 3, 2, 4])
>>> c = a.view(1, 3, 2, 4) # Does not change tensor layout in memory
>>> c.size()
torch.Size([1, 3, 2, 4])
>>> torch.equal(b, c)
False
# b和c看着形状是一样的,但是内部结构是不同的,需注意
6、sql语句
cat cooking.stackexchange.txt | sed -e "s/\([.\!?,'/()]\)/ \1 /g" | tr "[:upper:]" "[:lower:]" > cooking.preprocessed.txt
7、文件解压
# 使用gunzip进行解压, 获取cc.zh.300.bin文件
# 第一种
gunzip cc.zh.300.bin.gz
# 第二种
tar xvzf cooking.stackexchange.tar
# 第三种
unzip data/enwik9.zip -d data
8、eval
# eval 方法能使字符串本身的引号去掉,保留字符的原本属性。
>>> a = "123"
>>> type(a)
<class 'str'>
>>> b = eval(a)
>>> b
123
>>> type(b)
<class 'int'>
9、reduce
# 堆叠
from functools import reduce
list = [[1], [2], [3], [4], [5], [6], [7]]
b = reduce(lambda x, y: x + y, list)
print(b)
[1, 2, 3, 4, 5, 6, 7]
10、split
str = "Sun wu kong"
res = str.split(" ")
print(res)
['Sun', 'wu', 'kong']
# split后边括号内是按照需要分割字符串内部的某个特征进行分割的,返回的是列表
11、join
str = ('Sun', 'wu', 'kong')
res = "&".join(str)
print(res)
Sun&wu&kong
# join函数是将自定义的标识把给定的目标连接起来,返回一个字符串类型(目标类型可以是字符串,列表,元组)
12、python自带静态服务器
python3 -m http.server 8000
13、expand
a = torch.tensor([4])
print(a)
tensor([4])
b = a.expand(3, 4)
print(b)
tensor([[4, 4, 4, 4],
[4, 4, 4, 4],
[4, 4, 4, 4]])
14、yield
15、topk
torch.topk(input, k, dim=None, largest=True, sorted=True, out=None) -> (Tensor, LongTensor)
- input:一个tensor数据
- k:指明是得到前k个数据以及其index
- dim: 指定在哪个维度上排序, 默认是最后一个维度
- largest:如果为True,按照大到小排序; 如果为False,按照小到大排序
- sorted:返回的结果按照顺序返回
- out:可缺省,不要
import torch
# k=1
pred = torch.randn((4, 5))
print(pred)
values, indices = pred.topk(1, dim=1, largest=True, sorted=True)
print(indices)
# 用max得到的结果,设置keepdim为True,避免降维。因为topk函数返回的index不降维,shape和输入一致。
_, indices_max = pred.max(dim=1, keepdim=True)
print(indices_max == indices)
# pred
tensor([[-0.1480, -0.9819, -0.3364, 0.7912, -0.3263],
[-0.8013, -0.9083, 0.7973, 0.1458, -0.9156],
[-0.2334, -0.0142, -0.5493, 0.0673, 0.8185],
[-0.4075, -0.1097, 0.8193, -0.2352, -0.9273]])
# indices, shape为 【4,1】,
tensor([[3], #【0,0】代表 第一个样本最可能属于第一类别
[2], # 【1, 0】代表第二个样本最可能属于第二类别
[4],
[2]])
# indices_max等于indices
tensor([[True],
[True],
[True],
[True]])
# 当k=2
import torch
pred = torch.randn((4, 5))
print(pred)
values, indices = pred.topk(2, dim=1, largest=True, sorted=True) # k=2
print(indices)
# pred
tensor([[-0.2203, -0.7538, 1.8789, 0.4451, -0.2526],
[-0.0413, 0.6366, 1.1155, 0.3484, 0.0395],
[ 0.0365, 0.5158, 1.1067, -0.9276, -0.2124],
[ 0.6232, 0.9912, -0.8562, 0.0148, 1.6413]])
# indices
tensor([[2, 3],
[2, 1],
[2, 1],
[4, 1]])
14、审核空文件
# Linux 命令-- 删除当前文件夹下的空文件
find ./ -name "*" -type f -size 0c | xargs -n 1 rm -f
15、isinstance
# isinstance() 会认为子类是一种父类类型,考虑继承关系。
>>>a = 2
>>> isinstance (a,int)
True
>>> isinstance (a,str)
False
>>> isinstance (a,(str,int,list)) # 是元组中的一个返回 True
True
16、gitee命令
git config --global user.name "shimu"
git config --global user.email "641726547@qq.com"
mkdir ss
cd ss
git init
touch README.md
git add README.md
git commit -m "first commit"
git remote add origin https://gitee.com/shemu/ss.git
git push -u origin master