linux简单命令4:管道命令下

最新推荐文章于 2022-11-29 10:25:02 发布

不善言谈者

最新推荐文章于 2022-11-29 10:25:02 发布

阅读量391

点赞数

分类专栏： linux

本文链接：https://blog.csdn.net/bushanyantanzhe/article/details/105907301

版权

linux 专栏收录该内容

6 篇文章 0 订阅

订阅专栏

split

按字节将大文件切分成若干小文件

命令	英文	含义
split -b 10k 文件	byte	将大文件切分成若干10KB的小文件

#准备文件,通过rz命令上传一份大文档5.1M的demo.txt
[root@hadoop01 20200503]# ll
总用量 0
[root@hadoop01 20200503]# rz
rz waiting to receive.
 zmodem trl+C ȡ

  100%    5190 KB 5190 KB/s 00:00:01       0 Errors

[root@hadoop01 20200503]# ll -h
总用量 5.1M
-rw-r--r--. 1 root root 5.1M 11月 10 2018 demo.txt
[root@hadoop01 20200503]# 

#切割,将demo切割成1M大小的若干份小文件

[root@hadoop01 20200503]# split -b 1M demo.txt 
[root@hadoop01 20200503]# ll
总用量 10384
-rw-r--r--. 1 root root 5314652 11月 10 2018 demo.txt
-rw-r--r--. 1 root root 1048576 4月  17 18:44 xaa
-rw-r--r--. 1 root root 1048576 4月  17 18:44 xab
-rw-r--r--. 1 root root 1048576 4月  17 18:44 xac
-rw-r--r--. 1 root root 1048576 4月  17 18:44 xad
-rw-r--r--. 1 root root 1048576 4月  17 18:44 xae
-rw-r--r--. 1 root root   71772 4月  17 18:44 xaf
[root@hadoop01 20200503]#

按行数将大文件切分成若干小文件

命令	英文	含义
split -l 1000 文件	lines	将大文件切分成若干1000行的小文件

#新建01目录,将demo移到该目录下
[root@hadoop01 20200503]# mv demo.txt  01/
[root@hadoop01 20200503]# ll
总用量 5196
drwxr-xr-x. 2 root root    4096 4月  17 18:47 01
-rw-r--r--. 1 root root 1048576 4月  17 18:44 xaa
-rw-r--r--. 1 root root 1048576 4月  17 18:44 xab
-rw-r--r--. 1 root root 1048576 4月  17 18:44 xac
-rw-r--r--. 1 root root 1048576 4月  17 18:44 xad
-rw-r--r--. 1 root root 1048576 4月  17 18:44 xae
-rw-r--r--. 1 root root   71772 4月  17 18:44 xaf
[root@hadoop01 20200503]# cd 01
[root@hadoop01 01]# ll
总用量 5192
-rw-r--r--. 1 root root 5314652 11月 10 2018 demo.txt
[root@hadoop01 01]# 

#查看demo的行数,共有53712行
[root@hadoop01 01]# wc -l demo.txt 
53712 demo.txt

#按照每个10000行切割
[root@hadoop01 01]# split  -l 10000 demo.txt 
[root@hadoop01 01]# ll
总用量 10396
-rw-r--r--. 1 root root 5314652 11月 10 2018 demo.txt
-rw-r--r--. 1 root root 1337588 4月  17 18:49 xaa
-rw-r--r--. 1 root root 1285109 4月  17 18:49 xab
-rw-r--r--. 1 root root 1003928 4月  17 18:49 xac
-rw-r--r--. 1 root root  786525 4月  17 18:49 xad
-rw-r--r--. 1 root root  695600 4月  17 18:49 xae
-rw-r--r--. 1 root root  205902 4月  17 18:49 xaf

#查看切割后小文件的行数
[root@hadoop01 01]# wc -l x*
  10000 xaa
  10000 xab
  10000 xac
  10000 xad
  10000 xae
   3712 xaf
  53712 总用量

8 .awk

通过 awk 实现 模糊查询, 按需提取字段, 还可以进行判断和简单的运算等.

搜索含有某字符串的内容

命令	含义
awk ‘/搜索字符/’ test1.txt	模糊查询

# 准备文档
[root@node01 export]# cat test1.txt 
zhangsan 68 99 26
lisi 98 66 96
wangwu 38 33 86
zhaoliu 78 44 36
maq 88 22 66
zhouba 98 44 46

# 搜索含有 wang 和 zhao 的内容
[root@hadoop01 02]# cat test1.txt | awk '/wang|zhao/'
wangwu 38 33 86
zhaoliu 78 44 36

指定分割符, 根据下标显示内容

命令	含义
awk -F ‘,’ ‘{print $1, $2, $3}’ 文件	操作1.txt文件, 根据逗号分割, 打印第一段第二段第三段内容

选项

选项	英文	含义
`-F ','`	field-separator	使用指定字符分割
`$ + 数字`		获取第几段内容
`$0`		获取当前行内容
`NF`	field	表示当前行共有多少个字段
`$NF`		代表最后一个字段
`$(NF-1)`		代表倒数第二个字段
`NR`		代表处理的是第几行

#按照' '空格切割,打印第一和第三,注意引号问题
[root@hadoop01 02]# cat test1.txt | awk -F ' '{print$1,$3}
[root@hadoop01 02]# cat test1.txt | awk -F ' ' '{print$1,$3}'
zhangsan 99
lisi 66
wangwu 33
zhaoliu 44
maq 22
zhouba 44

指定分割符, 根据下标显示内容

命令	含义
awk -F ’ ’ ,’{OFS="==="}{print $1, $2, $3}’ 1.txt	操作1.txt文件, 根据逗号分割, 打印第一段第二段第三段内容

选项

选项	英文	含义
`OFS="字符"`	output field separator	向外输出时的段分割字符串

转义序列	含义
\b	退格
\f	换页
\n	换行
\r	回车
\t	制表符

#按照"+++++++++++++++++++++++++"分隔打印
[root@hadoop01 02]# cat test1.txt | awk -F ' ' '{OFS="+++++++++++++++++++++++++"}{print$1,$2,43,$4}'
zhangsan+++++++++++++++++++++++++68+++++++++++++++++++++++++43+++++++++++++++++++++++++26
lisi+++++++++++++++++++++++++98+++++++++++++++++++++++++43+++++++++++++++++++++++++96
wangwu+++++++++++++++++++++++++38+++++++++++++++++++++++++43+++++++++++++++++++++++++86
zhaoliu+++++++++++++++++++++++++78+++++++++++++++++++++++++43+++++++++++++++++++++++++36
maq+++++++++++++++++++++++++88+++++++++++++++++++++++++43+++++++++++++++++++++++++66
zhouba+++++++++++++++++++++++++98+++++++++++++++++++++++++43+++++++++++++++++++++++++46
# 按照 制表符tab 进行分割, 打印
[root@hadoop01 02]# cat test1.txt | awk -F ' ' '{OFS="\t"}{print$1,$2,43,$4}'                       
zhangsan        68      43      26
lisi    98      43      96
wangwu  38      43      86
zhaoliu 78      43      36
maq     88      43      66
zhouba  98      43      46

调用 awk 提供的函数

命令	含义
awk -F ‘,’ ‘{print toupper($2)}’ 1.txt	操作1.txt文件, 根据逗号分割, 打印第一段第二段第三段内容

常用函数如下:

函数名	含义	作用
toupper()	upper	字符转成大写
tolower()	lower	字符转成小写
length()	length	返回字符长度

#将第一段大写打印
[root@hadoop01 02]# cat test1.txt | awk -F ' ' '{OFS="+++++++++++++++++++++++++"}{print toupper($1,$2)}'      
awk: 致命错误: 2 是 toupper 的无效参数个数
[root@hadoop01 02]# cat test1.txt | awk -F ' ' '{OFS="+++++++++++++++++++++++++"}{print toupper($1)}'   
ZHANGSAN
LISI
WANGWU
ZHAOLIU
MAQ
ZHOUBA

if语句查询符合条件的数字

命令	含义
awk -F ‘,’ ‘{if($4>60) print $1, $4 }’ test1.txt	如果第四段数字大于60,就显示 $1, $4
awk -F ‘,’ ‘{if($4>60) print $1, $4, “大于60”; else print $1, $4, “小于等于60”}’ test1.txt

选项

参数	含义
if($0 ~ “aa”) print $0	如果这一行包含 “aa”, 就打印这一行内容
if($1 ~ “aa”) print $0	如果第一段包含 “aa”, 就打印这一行内容
if($1 == “lisi”) print $0	如果第一段等于 “lisi”, 就打印这一行内容

# 打印$4大于60的数据,注意括号
[root@hadoop01 02]# cat test1.txt | awk -F ' ' '{if($4>60} print $1,$4,"大于60"'
awk: {if($4>60} print $1,$4,"大于60"
awk:          ^ syntax error
awk: {if($4>60} print $1,$4,"大于60"
awk:            ^ syntax error
awk: 命令行:1: {if($4>60} print $1,$4,"大于60"
awk: 命令行:1:                                  ^ 未预期的新行或字符串结束
[root@hadoop01 02]# cat test1.txt | awk -F ' ' '{if($4>60} print $1,$4,"大于60"}'
awk: {if($4>60} print $1,$4,"大于60"}
awk:          ^ syntax error
awk: {if($4>60} print $1,$4,"大于60"}
awk:            ^ syntax error
[root@hadoop01 02]# cat test1.txt | awk -F ' ' '{if($4>60 print $1,$4,"大于60"}' 
awk: {if($4>60 print $1,$4,"大于60"}
awk:           ^ syntax error
[root@hadoop01 02]# cat test1.txt | awk -F ' ' '{if($4>60) print $1,$4,"大于60"}'
lisi 96 大于60
wangwu 86 大于60
maq 66 大于60

# 打印大于60,和小于等于60的数据
[root@hadoop01 02]# cat test1.txt | awk -F ' ' '{if($4>60) print $1,$4,"大于60";else print $1,$4,"小于等于60"}'
zhangsan 26 小于等于60
lisi 96 大于60
wangwu 86 大于60
zhaoliu 36 小于等于60
maq 66 大于60
zhouba 46 小于等于60

求指定段平均数

命令	含义
awk ‘BEGIN{初始化操作}{每行都执行} END{结束时操作}’ 文件名	BEGIN{ 这里面放的是执行前的语句 } {这里面放的是处理每一行时要执行的语句} END {这里面放的是处理完所有的行后要执行的语句 }


# 查看和  awk -F ' ' 'BEGIN{}{}END{}'    
[root@hadoop01 02]# cat test1.txt | awk -F ' ' 'BEGIN{}{}END{}'                                                
[root@hadoop01 02]# cat test1.txt | awk -F ' ' 'BEGIN{}{total=total+$4}END{print total}'
356

# 查看总和, 总行数
[root@hadoop01 02]# cat test1.txt | awk -F ' ' 'BEGIN{}{total=total+$4}END{print total,NR}'
356 6

# 查看总和, 总行数, 平均分
[root@hadoop01 02]# cat test1.txt | awk -F ' ' 'BEGIN{}{total=total+$4}END{print total,NR,(total/NR)}'
356 6 59.3333

9 .sed

通过 sed 可以实现过滤和替换的功能.

实现查询功能

命令	含义
sed 可选项目标文件	对目标文件进行过滤查询或替换

可选参数

可选项	英文	含义
p	print	打印
$		代表最后一行
`-n`		仅显示处理后的结果
`-e`	expression	根据表达式进行处理

可选项	含义
=	打印当前行号

# 查看所有内容
[root@hadoop01 02]# cat test3.txt 
aaa java root
bbb hello
ccc rt
ddd root nologin
eee rtt
fff ROOT nologin
ggg rttt


# 查看前3~5行内容
[root@hadoop01 02]# cat test3.txt | sed -n -e '3,5p'
ccc rt
ddd root nologin
eee rtt

#查看3到最后一行$的内容
[root@hadoop01 02]# cat test3.txt | sed -n -e '3,$p'
ccc rt
ddd root nologin
eee rtt
fff ROOT nologin
ggg rttt

#查看3到最后一行$的内容,并显示行号
[root@hadoop01 02]# cat -n test3.txt | sed -n -e '3,$p'
     3  ccc rt
     4  ddd root nologin
     5  eee rtt
     6  fff ROOT nologin
     7  ggg rttt
     
     #展示行号
     [root@hadoop01 02]# cat test3.txt | sed -n -e '3,$=' 
3
4
5
6
7
#行号加内容
[root@hadoop01 02]# cat test3.txt | sed -n -e '3,$=' -e '3,$p'
3
ccc rt
4
ddd root nologin
5
eee rtt
6
fff ROOT nologin
7
ggg rttt
   
#找到包含root的内容   
[root@hadoop01 02]# cat test3.txt | sed -n -e '/root/p'
aaa java root
ddd root nologin


[root@hadoop01 02]# cat test3.txt | awk '/root/'
aaa java root
ddd root nologin


[root@hadoop01 02]# cat test3.txt | grep root
aaa java root
ddd root nologin

可选项	英文	含义
I	ignore	忽略大小写

#展示含有root(不区分大小写)的内容和行号
[root@hadoop01 02]# cat -n test3.txt | sed -n -e '/root/Ip'
     1  aaa java root
     4  ddd root nologin
     6  fff ROOT nologin
     
     [root@hadoop01 02]# cat -n test3.txt | awk '/[rR][oO][oO][tT]/'  
     1  aaa java root
     4  ddd root nologin
     6  fff ROOT nologin
     
     [root@hadoop01 02]# cat -n test3.txt | grep -i root
     1  aaa java root
     4  ddd root nologin
     6  fff ROOT nologin

可选项	英文	含义
`-r`	regexp-extended	识别正则

# 字母`r`后面是多个t的行，并显示行号
[root@hadoop01 02]# cat -n test3.txt | sed -n -e '/r+t/Ip'      
[root@hadoop01 02]# cat -n test3.txt | sed -n -e -r '/r+t/Ip'
sed：-e 表达式 #1，字符 1：未知的命令：“-”
[root@hadoop01 02]# cat -n test3.txt | sed -n -r -e '/r+t/Ip'
     3  ccc rt
     5  eee rtt
     7  ggg rttt
[root@hadoop01 02]# cat -n test3.txt | sed -n -r -e '/r+t/p' 
     3  ccc rt
     5  eee rtt
     7  ggg rttt

实现删除功能

可选项	英文	含义
`d`	delete	删除指定内容

# 删除前3行数据,并显示行号
[root@hadoop01 02]# cat -n test3.txt | sed -e '1,3d' 
     4  ddd root nologin
     5  eee rtt
     6  fff ROOT nologin
     7  ggg rttt
     
   

[root@hadoop01 02]# nl test3.txt | sed -e '1,3d'      
     4  ddd root nologin
     5  eee rtt
     6  fff ROOT nologin
     7  ggg rttt

实现修改功能

练习1: 在01.txt的第二行下面添加aaaaa,并显示行号

参数	英文	含义
i	insert	目标前面插入内容
a	append	目标后面追加内容

答案：

#第三行 下面 sjhshjfdhdadhakdajdha,并显示行号
[root@hadoop01 02]# nl test3.txt | sed -e '3a sjhshjfdhdadhakdajdha'
     1  aaa java root
     2  bbb hello
     3  ccc rt
sjhshjfdhdadhakdajdha
     4  ddd root nologin
     5  eee rtt
     6  fff ROOT nologin
     7  ggg rttt
     
 #在第一行前加sjhshjfdhdadhakdajdha
 [root@hadoop01 02]# nl test3.txt | sed -e '1i sjhshjfdhdadhakdajdha'
sjhshjfdhdadhakdajdha
     1  aaa java root
     2  bbb hello
     3  ccc rt
     4  ddd root nologin
     5  eee rtt
     6  fff ROOT nologin
     7  ggg rttt

实现替换功能

	英文	含义
s/oldString/newString/	replace	替换

#将root替换成bushanyantan
     
[root@hadoop01 02]# nl test3.txt | sed -e 's/root/bushanyantan/'    
     1  aaa java bushanyantan
     2  bbb hello
     3  ccc rt
     4  ddd bushanyantan nologin
     5  eee rtt
     6  fff ROOT nologin
     7  ggg rttt

选项	英文
`2c 新字符串`	replace	使用新字符串替换选中的行

#把第二行替换成bushanyantan
[root@hadoop01 02]# nl test3.txt | sed -e '2c bushanyantan'     
     1  aaa java root
bushanyantan
     3  ccc rt
     4  ddd root nologin
     5  eee rtt
     6  fff ROOT nologin
     7  ggg rttt

我们之前的替换对原文件没有影响

对原文件进行操作

参数	英文	含义
-i	in-place	替换原有文件内容

答案：

# 备份原始文件内容
[root@node01 export]# cat 1.txt [root@hadoop01 02]# cp test3.txt  test4.txt
[root@hadoop01 02]# ll
总用量 12
-rw-r--r--. 1 root root 94 4月  17 18:52 test1.txt
-rw-r--r--. 1 root root 82 4月  17 19:31 test3.txt
-rw-r--r--. 1 root root 82 4月  17 20:15 test4.txt


# 替换
[root@hadoop01 02]# sed -i -e '2c bushanyantan' test3.txt 
[root@hadoop01 02]# cat test3.txt 
aaa java root
bushanyantan
ccc rt
ddd root nologin
eee rtt
fff ROOT nologin
ggg rttt

综合练习

符号	含义
^	表示开始	^aaa 表示以 aaa 开始
$	表示结尾	bbb$ 表示以 bbb 结尾
.*	表示任意	^.* 表示以任意字符开始

获取ip地址

# 查看网卡信息
[root@hadoop01 02]# ifconfig eth0
eth0      Link encap:Ethernet  HWaddr 00:0C:29:36:8F:F0  
          inet addr:192.168.100.201  Bcast:192.168.100.255  Mask:255.255.255.0
          inet6 addr: fe80::20c:29ff:fe36:8ff0/64 Scope:Link
          UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
          RX packets:472555 errors:0 dropped:0 overruns:0 frame:0
          TX packets:123560 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:1000 
          RX bytes:481323587 (459.0 MiB)  TX bytes:43834715 (41.8 MiB)

# 根据关键 搜索行
[root@hadoop01 02]# ifconfig eth0 | grep 'inet addr:'
          inet addr:192.168.100.201  Bcast:192.168.100.255  Mask:255.255.255.0
[root@hadoop01 02]# ifconfig eth0 | awk '/inet addr:/' 
          inet addr:192.168.100.201  Bcast:192.168.100.255  Mask:255.255.255.0
[root@hadoop01 02]# 
          
# 去掉ip地址以前的部分
[root@hadoop01 02]# ifconfig eth0 | awk '/inet addr:/'|sed -e 's/^*addr://'
          inet addr:192.168.100.201  Bcast:192.168.100.255  Mask:255.255.255.0

# 去掉ip地址以后的部分
[root@hadoop01 02]# ifconfig eth0 | awk '/inet addr:/'|sed -e 's/^*addr://' |sed -e 's/Bcast*//' 
          inet addr:192.168.100.201  :192.168.100.255  Mask:255.255.255.0
[root@hadoop01 02]# ifconfig eth0 | awk '/inet addr:/'|sed -e 's/^*addr://' |sed -e 's/Bcast*$//'
          inet addr:192.168.100.201  Bcast:192.168.100.255  Mask:255.255.255.0
[root@hadoop01 02]# ifconfig eth0 | awk '/inet addr:/'|sed -e 's/^*addr://' |sed -e 's/Bcast:*$//'
          inet addr:192.168.100.201  Bcast:192.168.100.255  Mask:255.255.255.0
[root@hadoop01 02]# ifconfig eth0 | awk '/inet addr:/'|sed -e 's/^*addr://' |sed -e 's/Bcast:.*$//'
          inet addr:192.168.100.201  
[root@hadoop01 02]# ifconfig eth0 | awk '/inet addr:/'|sed -e 's/^*>addr://' |sed -e 's/Bcast:.*$//'
          inet addr:192.168.100.201  
[root@hadoop01 02]# ifconfig eth0 | awk '/inet addr:/'|sed -e 's/^.*>addr://' |sed -e 's/Bcast:.*$//'
          inet addr:192.168.100.201  
[root@hadoop01 02]# ifconfig eth0 | awk '/inet addr:/'|sed -e 's/^.*addr://' |sed -e 's/Bcast:.*$//' 
192.168.100.201

不善言谈者

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
linux简单命令4:管道命令下

split按字节将大文件切分成若干小文件命令英文含义split -b 10k 文件byte将大文件切分成若干10KB的小文件#准备文件,通过rz命令上传一份大文档5.1M的demo.txt[root@hadoop01 20200503]# ll总用量 0[root@hadoop01 20200503]# rzrz waiting to ...
复制链接

扫一扫