- 微信
- 微博
  
  分享文章到微博
- 复制链接
  
  复制链接到剪贴板

[华为云在线课程][Linux文本处理工具和正则表达式][第二章文本常见处理工具][学习笔记]

John2021 发表于 2022/04/12 08:14:35 2022/04/12

【摘要】文件内容查看命令查看文本文件内容 catcat可以查看文本内容格式：cat [OPTIONS]... [FILE]...常见选项：-E 显示行结束符$-A 显示所有控制符-n 对显示出的每一行进行编号-b 非空行编号-s 压缩连续的空行成一行例子：[root@localhost Code]# cat...

文件内容查看命令

查看文本文件内容

cat

cat可以查看文本内容
格式：

cat [OPTIONS]... [FILE]...

常见选项：

-E          显示行结束符$
-A          显示所有控制符
-n          对显示出的每一行进行编号
-b          非空行编号
-s          压缩连续的空行成一行

例子：

[root@localhost Code]# cat -E 99.sh
#!/bin/bash$
$
for i in {1..9};do$
    for j in `seq $i`;do$
        echo -ne "${j}x${i}=$((i*j))\t" $
    done$
    echo$
done$
[root@localhost Code]# cat -n 99.sh
     1  #!/bin/bash
     2
     3  for i in {1..9};do
     4      for j in `seq $i`;do
     5          echo -ne "${j}x${i}=$((i*j))\t"
     6      done
     7      echo
     8  done
[root@localhost Code]# cat -b 99.sh
     1  #!/bin/bash

     2  for i in {1..9};do
     3      for j in `seq $i`;do
     4          echo -ne "${j}x${i}=$((i*j))\t"
     5      done
     6      echo
     7  done

nl

显示行号，相当于cat -b

[root@localhost Code]# nl 99.sh
     1  #!/bin/bash

     2  for i in {1..9};do
     3      for j in `seq $i`;do
     4          echo -ne "${j}x${i}=$((i*j))\t"
     5      done
     6      echo
     7  done

tac

逆向显示文本内容

[root@localhost Code]# tac 99.sh
done
    echo
    done
        echo -ne "${j}x${i}=$((i*j))\t"
    for j in `seq $i`;do
for i in {1..9};do

#!/bin/bash
[root@localhost Code]# seq 3
1
2
3
[root@localhost Code]# seq 3|tac
3
2
1

rev

将同一行的内容逆向显示

[root@localhost Code]# cat hello.txt
hello
world
123
456
789
[root@localhost Code]# tac hello.txt
789
456
123
world
hello
[root@localhost Code]# rev hello.txt
olleh
dlrow
321
654
987
[root@localhost Code]# rev
abcdef
fedcba
[root@localhost Code]# echo {1..10}
1 2 3 4 5 6 7 8 9 10
[root@localhost Code]# echo {1..10}|rev
01 9 8 7 6 5 4 3 2 1

查看非文本文件的内容

hexdump

例子：

[root@localhost Code]# hexdump -C -n 512 hello.txt
00000000  68 65 6c 6c 6f 0a 77 6f  72 6c 64 0a 31 32 33 0a  |hello.world.123.|
00000010  34 35 36 0a 37 38 39 0a                           |456.789.|
00000018

od

od即dump files in octal and other formats
例子：

[root@localhost Code]# echo {a..z} | tr -d ' ' | od -t x
0000000 64636261 68676665 6c6b6a69 706f6e6d
0000020 74737271 78777675 000a7a79
0000033
[root@localhost Code]# echo {a..z} | tr -d ' ' | od -t x1
0000000 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f 70
0000020 71 72 73 74 75 76 77 78 79 7a 0a
0000033
[root@localhost Code]# echo {a..z} | tr -d ' ' | od -t x1z
0000000 61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f 70  >abcdefghijklmnop<
0000020 71 72 73 74 75 76 77 78 79 7a 0a                 >qrstuvwxyz.<
0000033

xxd

[root@localhost Code]# echo {a..z} | tr -d ' ' | xxd
0000000: 6162 6364 6566 6768 696a 6b6c 6d6e 6f70  abcdefghijklmnop
0000010: 7172 7374 7576 7778 797a 0a              qrstuvwxyz.

分页查看文件内容

可以实现分页查看文件，可以配合管道实现输出信息的分页
格式

more [OPTIONS...] FILE...

选项：
-d：显示翻页及退出提示

less

less，也可以实现分页查看文件或STDIN输出，less命令是man命令使用的分页器
查看时有用的命令包括：

/文本 搜索 文本
n/N 跳到下一个 或 上一个匹配

例子：

[root@localhost Code]# cat /etc/init.d/functions |less

显示文本前或后行内容

head

可以显示文件或标准输入的前面行
格式：

head [OPTIONS]... [FILE]...

选项：

-c #        指定获取前#字节
-n #        指定获取前#行
-#          同上

例子：

[root@localhost Code]# head -n 2 /etc/passwd
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
[root@localhost Code]# head -2 /etc/passwd
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
[root@localhost Code]# echo helloworld | head -c5
hello[root@localhost Code]#

# 生成随机密码
[root@localhost ~]# cat /dev/urandom | tr -dc '[:alnum:]' | head -c5
rnNTa[root@localhost ~]#
# 生成随机密码并保存到pass.txt中
[root@localhost ~]# cat /dev/urandom | tr -dc '[:alnum:]' | head -c5 | tee pass.txt
GbfNT[root@localhost ~]#

[root@localhost Code]# head -n -3 hello.txt
hello
world
[root@localhost Code]# head -n -4 hello.txt
hello
[root@localhost Code]# cat hello.txt
hello
world
123
456
789

tail

tail和head相反，查看文件或标准输入的倒数行
格式：

tail [OPTION]... [FILE]...

常用选择：

-c #        指定获取后#字节
-n #        指定获取后#行
-#          同上
-f          跟踪显示文件fd新追加的内容，常用日志监控，相当于--follow=descriptor，当文件删除再新建同名文件，将无法继续跟踪文件
-F          跟踪文件名，相当于--follow=name --retry，当文件删除再新建同名文件，将可以继续跟踪文件

tailf       类似tail -f，当文件不增长时并不访问文件

例子：

[root@localhost Code]# tail -n 3 hello.txt
123
456
789
[root@localhost Code]# tail -n +3 hello.txt
123
456
789
[root@localhost Code]# tail -n 5 hello.txt
hello
world
123
456
789

head和tail总结

按列抽取文本cut

cut命令可以提取文本文件或STDIN数据的指定列
格式

cut [OPTION]... [FILE]...

常用选项：

-d DELEMITER    指明分隔符，默认tab
-f FILEDS:
        #：第#个字段，例如：3
        #,#[,#]：离散的多个字段，例如：1，3，6
        #-#：连续的多个字段，例如：1-6
        混合使用：1-3，7
-c              按字符切割
--output-delimiter=STRING 指定输出分隔符

例子：截取文本

# 原始数据
[root@localhost ~]# cat /etc/passwd
root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
# 按列抽取文本的1，3到4，7列，以：作为分隔符
[root@localhost Code]# cut -d: -f1,3-4,7 /etc/passwd
root:0:0:/bin/bash
bin:1:1:/sbin/nologin

例子：截取ip

[root@localhost ~]# ifconfig
ens33: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 10.0.0.110  netmask 255.255.255.0  broadcast 10.0.0.255
        inet6 fe80::5aa3:63ca:bbda:8721  prefixlen 64  scopeid 0x20<link>
        ether 00:0c:29:29:2c:9c  txqueuelen 1000  (Ethernet)
        RX packets 11595  bytes 2970318 (2.8 MiB)
        RX errors 0  dropped 0  overruns 0  frame 0
        TX packets 13212  bytes 16453877 (15.6 MiB)
        TX errors 0  dropped 0 overruns 0  carrier 0  collisions 0
[root@localhost Code]# ifconfig | head -n2
ens33: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 10.0.0.110  netmask 255.255.255.0  broadcast 10.0.0.255
[root@localhost Code]# ifconfig | head -n2 | tail -n1
        inet 10.0.0.110  netmask 255.255.255.0  broadcast 10.0.0.255
[root@localhost Code]# ifconfig | head -n2 | tail -n1 | tr -s " "
 inet 10.0.0.110 netmask 255.255.255.0 broadcast 10.0.0.255
[root@localhost Code]# ifconfig | head -n2 | tail -n1 | tr -s " "|cut -d " " -f3
10.0.0.110
[root@localhost Code]# ifconfig | head -n2 | tail -n1 | cut -d " " -f10
10.0.0.110

例子：截取磁盘利用率

[root@localhost ~]# df
Filesystem              1K-blocks    Used Available Use% Mounted on
devtmpfs                   914484       0    914484   0% /dev
tmpfs                      931516       0    931516   0% /dev/shm
tmpfs                      931516   10392    921124   2% /run
tmpfs                      931516       0    931516   0% /sys/fs/cgroup
/dev/mapper/centos-root  17811456 4673740  13137716  27% /
/dev/sda1                 1038336  189120    849216  19% /boot
tmpfs                      186304      12    186292   1% /run/user/42
tmpfs                      186304       0    186304   0% /run/user/0

[root@localhost Code]# df | tr -s " "
Filesystem 1K-blocks Used Available Use% Mounted on
devtmpfs 914484 0 914484 0% /dev
tmpfs 931516 0 931516 0% /dev/shm
tmpfs 931516 10392 921124 2% /run
tmpfs 931516 0 931516 0% /sys/fs/cgroup
/dev/mapper/centos-root 17811456 4673752 13137704 27% /
/dev/sda1 1038336 189120 849216 19% /boot
tmpfs 186304 12 186292 1% /run/user/42
tmpfs 186304 0 186304 0% /run/user/0
[root@localhost Code]# df | tr -s " "|cut -d" " -f5
Use%
0%
0%
2%
0%
27%
19%
1%
0%
[root@localhost Code]# df | tr -s " "|cut -d" " -f5|tr -dc "[0-9\n]"

0
0
2
0
27
19
1
0

[root@localhost Code]# df | tr -s " " %
Filesystem%1K-blocks%Used%Available%Use%Mounted%on
devtmpfs%914484%0%914484%0%/dev
tmpfs%931516%0%931516%0%/dev/shm
tmpfs%931516%10392%921124%2%/run
tmpfs%931516%0%931516%0%/sys/fs/cgroup
/dev/mapper/centos-root%17811456%4673984%13137472%27%/
/dev/sda1%1038336%189120%849216%19%/boot
tmpfs%186304%12%186292%1%/run/user/42
tmpfs%186304%0%186304%0%/run/user/0
[root@localhost Code]# df | tr -s " " % | cut -d% -f5
Use
0
0
2
0
27
19
1
0
[root@localhost Code]# df | tr -s " " % | cut -d% -f5|tr -d "[:alpha:]"

0
0
2
0
27
19
1
0

[root@localhost Code]# df | cut -c54-55
se
 0
 0
 2
 0
27
19
 1
 0
[root@localhost Code]# df | cut -c54-55 | tr -d "[:alpha:]"

 0
 0
 2
 0
27
19
 1
 0

[root@localhost Code]# cut -d: -f1,3,7 --output-delimiter="===" /etc/passwd | head -3
root===0===/bin/bash
bin===1===/sbin/nologin
daemon===2===/sbin/nologin

[root@localhost Code]# df | tr -s " "
Filesystem 1K-blocks Used Available Use% Mounted on
devtmpfs 914484 0 914484 0% /dev
tmpfs 931516 0 931516 0% /dev/shm
tmpfs 931516 10424 921092 2% /run
tmpfs 931516 0 931516 0% /sys/fs/cgroup
/dev/mapper/centos-root 17811456 4665720 13145736 27% /
/dev/sda1 1038336 189120 849216 19% /boot
tmpfs 186304 12 186292 1% /run/user/42
tmpfs 186304 0 186304 0% /run/user/0
[root@localhost Code]# df | tr -s " "|cut -d" " -f5
Use%
0%
0%
2%
0%
27%
19%
1%
0%
[root@localhost Code]# df | tr -s " "|cut -d" " -f5|tr -d %
Use
0
0
2
0
27
19
1
0

[root@localhost Code]# df | tr -s " " "%"
Filesystem%1K-blocks%Used%Available%Use%Mounted%on
devtmpfs%914484%0%914484%0%/dev
tmpfs%931516%0%931516%0%/dev/shm
tmpfs%931516%10424%921092%2%/run
tmpfs%931516%0%931516%0%/sys/fs/cgroup
/dev/mapper/centos-root%17811456%4665720%13145736%27%/
/dev/sda1%1038336%189120%849216%19%/boot
tmpfs%186304%12%186292%1%/run/user/42
tmpfs%186304%0%186304%0%/run/user/0
[root@localhost Code]# df | tr -s " " "%"|cut -d% -f5
Use
0
0
2
0
27
19
1
0

合并多个文件paste

paste合并多个文件同行号的列到一行
格式

paste [OPTION]... [FILE]...

常用选项：

-d      分隔符：指定分隔符，默认用TAB
-s      所有行合成一行显示

例子：

[root@localhost Code]# cat alpha.log
a
b
c
d
e
f
g
[root@localhost Code]# cat seq.log
1
2
3
4
5
6
7
[root@localhost Code]# cat alpha.log seq.log
a
b
c
d
e
f
g
1
2
3
4
5
6
7
[root@localhost Code]# paste alpha.log seq.log
a       1
b       2
c       3
d       4
e       5
f       6
g       7
[root@localhost Code]# paste -d":"  alpha.log seq.log
a:1
b:2
c:3
d:4
e:5
f:6
g:7
[root@localhost Code]# paste -s seq.log
1       2       3       4       5       6       7
[root@localhost Code]# paste -s alpha.log
a       b       c       d       e       f       g
[root@localhost Code]# paste -s alpha.log seq.log
a       b       c       d       e       f       g
1       2       3       4       5       6       7

例子：批量修改密码

[root@localhost Code]# paste -d: user.txt pass.txt
hello:1234
world:5678
[root@localhost Code]# paste -d: user.txt pass.txt |chpasswd

分析文本的工具

文本数据统计：wc
整理文本：sort
比较文件：diff和patch

收集文本统计数据wc

wc命令可用于统计文件的总行数、单词总数、字节总数和字符总数
可以对文件或STDIN中的数据统计
常用选项

-l      只计数行数
-w      只计数单词总数
-c      只计数字节总数
-m      只计数字符总数
-L      显示文件中最长行的长度

例子：

[root@localhost Code]# wc hello.txt
 5  5 24 hello.txt
行数  单词数  字节数
[root@localhost Code]# cat hello.txt
hello
world
123
456
789

例子：单词文件

[root@localhost Code]# wc -l /usr/share/dict/linux.words
479828 /usr/share/dict/linux.words

文本排序sort

把整理过的文本显示在STDOUT，不改变原始文件
格式：

sort [options] file(s)

常用选项

-r      执行反方向（由上至下）整理
-R      随机排序
-n      执行按数字大小整理
-h      人类可读排序，如：2K 1G
-f      选项忽略（fold）字符串中的字符大小写
-u      选项（独特，unique），合并重复项，即去重
-t c    选项使用c作为字段界定符
-k #    选项按照使用c字符分隔的#列来整理能够使用多次

例子：

[root@localhost ~]# cut -d: -f1,3 /etc/passwd | head -n3
root:0
bin:1
daemon:2
[root@localhost ~]# cut -d: -f1,3 /etc/passwd | head -n3|sort -t: -k2 -nr
daemon:2
bin:1
root:0

例子：统计分区利用率

Filesystem              1K-blocks    Used Available Use% Mounted on
devtmpfs                   914484       0    914484   0% /dev
tmpfs                      931516       0    931516   0% /dev/shm
tmpfs                      931516   10788    920728   2% /run
tmpfs                      931516       0    931516   0% /sys/fs/cgroup
/dev/mapper/centos-root  17811456 4667636  13143820  27% /
/dev/sda1                 1038336  189120    849216  19% /boot
tmpfs                      186304       0    186304   0% /run/user/0
tmpfs                      186304      24    186280   1% /run/user/1000

# 查看分区利用率最高值
[root@localhost ~]# df | tr -s " " "%"|cut -d% -f5|sort -nr|head -1
27
[root@localhost ~]# df | tr -s " " "%"|cut -d% -f5|tr -d '[:alpha:]'|sort

0
0
0
0
1
19
2
27

[root@localhost ~]# df | tr -s " " "%"|cut -d% -f5|tr -d '[:alpha:]'|sort -n|tail -n1
27

面试题：有两个文件，a.txt和b.txt，合并两个文件，并输出时确保每个数字也唯一

# a.txt中的每一个数字在本文件唯一
[root@localhost Code]# cat a.txt
12
34
56
78
90
# b.txt中的每一个数字在本文件唯一
[root@localhost Code]# cat b.txt
56
78
90
11
22
33
# 将两个文件合并后重复的行消除，不保留
[root@localhost Code]# cat a.txt b.txt |sort -u
11
12
22
33
34
56
78
90

去重uniq

uniq命令从输入中删除前后相接的重复的行
格式：

uniq [OPTION]... [FILE]...

常见选项：

-c      显示每行重复出现的次数
-d      仅显示重复过的行
-u      仅显示不曾重复的行

uniq常和sort命令一起配合使用：

sort user.txt | uniq -c

例子：取两个文件的相同和不同的行

[root@localhost Code]# cat a.txt
12
34
56
78
90
[root@localhost Code]# cat b.txt
56
78
90
11
22
33
# 取文件的共同行
[root@localhost Code]# cat a.txt b.txt | sort | uniq -d
56
78
90
# 取文件的不同行
[root@localhost Code]# cat a.txt b.txt | sort | uniq -u
11
12
22
33
34

比较文件

diff

diff命令比较两个文件之间的区别

-u 选项来输出"统一的（unified）"diff格式文件，最适用于补丁文件

例子：

[root@localhost Code]# cat a.txt
12
34
56
78
90
[root@localhost Code]# cat b.txt
56
78
90
11
22
33
[root@localhost Code]# diff a.txt b.txt
1,2d0
< 12
< 34
5a4,6
> 11
> 22
> 33
[root@localhost Code]# diff a.txt b.txt -c
*** a.txt       2022-04-12 07:12:44.273846443 +0800 #表示第一个文件
--- b.txt       2022-04-12 07:13:32.557843941 +0800 #表示第二个文件
***************
*** 1,5 ****
- 12 #表示第一个文件独有
- 34
  56
  78
  90
--- 1,6 ----
  56
  78
  90
+ 11 #表示第二个文件独有
+ 22
+ 33
[root@localhost Code]# diff a.txt b.txt -u
--- a.txt       2022-04-12 07:12:44.273846443 +0800
+++ b.txt       2022-04-12 07:13:32.557843941 +0800
@@ -1,5 +1,6 @@
-12
-34
 56
 78
 90
+11
+22
+33

patch

patch 复制在其他文件中进行的改变（要谨慎使用）

-b  选项来自动备份改变了的文件

例子：

diff -u foo.conf foo2.conf > foo.patch
patch -b foo.conf foo.patch

vimdiff

相当于vim -d

[root@localhost Code]# cat a.txt
12
34
56
78
90
[root@localhost Code]# cat b.txt
56
78
90
11
22
33
[root@localhost Code]# which vimdiff
/usr/bin/vimdiff
[root@localhost Code]# ll /usr/bin/vimdiff
lrwxrwxrwx. 1 root root 3 Apr  4 13:40 /usr/bin/vimdiff -> vim
[root@localhost Code]# vimdiff a.txt b.txt
2 files to edit
[root@localhost Code]# cat a.txt
12
34
56
78
90
[root@localhost Code]# cat b.txt
56
78
90
11
22
33

cmp

例子：查看二进制文件的不同

[root@localhost Code]# ll /usr/bin/dir /usr/bin/ls
-rwxr-xr-x. 1 root root 117608 Aug 20  2019 /usr/bin/dir
-rwxr-xr-x. 1 root root 117608 Aug 20  2019 /usr/bin/ls
[root@localhost Code]# ll /usr/bin/dir /usr/bin/ls -i
50724194 -rwxr-xr-x. 1 root root 117608 Aug 20  2019 /usr/bin/dir
50724215 -rwxr-xr-x. 1 root root 117608 Aug 20  2019 /usr/bin/ls
[root@localhost Code]# diff /usr/bin/dir /usr/bin/ls -i
Binary files /usr/bin/dir and /usr/bin/ls differ
[root@localhost Code]# cmp /bin/dir /bin/ls
/bin/dir /bin/ls differ: byte 645, line 1
[root@localhost Code]# hexdump -s 730 -Cn 7 /bin/dir
000002da  00 00 00 00 00 00 00                              |.......|
000002e1
[root@localhost Code]# hexdump -s 730 -Cn 7 /bin/ls
000002da  00 00 00 00 00 00 00                              |.......|
000002e1

【声明】本内容来自华为云开发者社区博主，不代表华为云及华为云开发者社区的观点和立场。转载时必须标注文章的来源（华为云社区）、文章链接、文章作者等基本信息，否则作者和本社区有权追究责任。如果您发现本社区中有涉嫌抄袭的内容，欢迎发送邮件进行举报，并提供相关证据，一经查实，本社区将立刻删除涉嫌侵权内容，举报邮箱： cloudbbs@huaweicloud.com

点赞
收藏
关注作者

0/1000

抱歉，系统识别当前为高风险访问，暂不支持该操作

全部回复

上滑加载中

设置昵称

在此一键设置昵称，即可参与社区互动！

*长度不超过10个汉字或20个英文字符，设置后3个月内不可修改。

确认取消

加入云驻计划，成为创作者

华为云周边好礼
免费体验产品
特殊身份标识
线下官方门票
内部专家零距离
与10000+优质创作者共同成长

立即加入

[华为云在线课程][Linux文本处理工具和正则表达式][第二章文本常见处理工具][学习笔记]

文件内容查看命令

查看文本文件内容

cat

nl

tac

rev

查看非文本文件的内容

hexdump

od

xxd

分页查看文件内容

more

less

显示文本前或后行内容

head

tail

head和tail总结

按列抽取文本cut

合并多个文件paste

分析文本的工具

收集文本统计数据wc

文本排序sort

去重uniq

比较文件

diff

patch

vimdiff

cmp

全部回复

设置昵称

关于作者

目录

加入云驻计划，成为创作者

[华为云在线课程][Linux文本处理工具和正则表达式][第二章文本常见处理工具][学习笔记]

文件内容查看命令

查看文本文件内容

cat

nl

tac

rev

查看非文本文件的内容

hexdump

od

xxd

分页查看文件内容

more

less

显示文本前或后行内容

head

tail

head和tail总结

按列抽取文本cut

合并多个文件paste

分析文本的工具

收集文本统计数据wc

文本排序sort

去重uniq

比较文件

diff

patch

vimdiff

cmp

全部回复

设置昵称

关于作者

目录

热门推荐查看更多

相关文章

加入云驻计划，成为创作者

相关产品