1、什么是正则表达式
正则表达式由普通字符和元字符组成的表达式, 通过此模式,用来匹配特定的内容。普通字符包括大小写的字母和数字。元字符,是一些特殊符号,这些字符并不表示字面上的含义, 而会被解释为一些特定的含义.正则表达式区分大小写。正则表达式主要用于简化管理员的工作负担。
2、正则表达式分类
1、基本的正则表达式(Basic Regular Expression 又叫 Basic RegEx 简称 BREs)
2、扩展的正则表达式(Extended Regular Expression 又叫 Extended RegEx 简称 EREs)
3、Perl 的正则表达式(Perl Regular Expression 又叫 Perl RegEx 简称 PREs)
说明:只有掌握了正则表达式,才能全面地掌握 Linux 下的常用文本工具(例如:grep、egrep、GUN sed、 Awk 等) 的用法
i.匹配字符:
. :匹配任意单个字符
[ ]:匹配指定范围内的任意单个字符
[^ ]:匹配指定范围i外的任意单个字符(出现在[ ]中的^代表排除)
例:1 匹配行首字符为数字的
[^ ] ^出现在[]内 代表排除“非” 举例:[^0-9] 排除纯数字 [^a-z] 排除纯字母
[0-9]: 数字
[a-z]:小写字母
[A-Z]:大写字母
touch a{1..3}c
touch a{1..3}bc
[root@ localhost test]# ls | grep 3
a3bc
a3c
[root@ localhost test]# ls | grep 'a.c'
a1c
a2c
a3c
[root@ localhost test]# ls | grep 'a..c'
a1bc
a2bc
a3bc
[root@ localhost test]# ls | grep '.*'
a1bc
a1c
a2bc
a2c
a3bc
a3c
[root@ localhost test]# ls | grep 'a[1-2]c'
a1c
a2c
[root@ localhost test]# ls | grep 'a[1-2][a-b]c'
a1bc
a2bc
[root@ localhost test]#
[root@ localhost test]# touch a{1..3}{d..g}c
[root@ localhost test]# ls
a1bc a1c a1dc a1ec a1fc a1gc a2bc a2c a2dc a2ec a2fc a2gc a3bc a3c a3dc a3ec a3fc a3gc
[root@ localhost test]# ls | grep 'a.[d-f]c'
a1dc
a1ec
a1fc
a2dc
a2ec
a2fc
a3dc
a3ec
a3fc
[root@ localhost test]# ls | grep 'a.[^b-f]c'
a1gc
a2gc
a3gc
[root@ localhost test]#
[root@ localhost test]# touch {4..7}a{b..e}c
[root@ localhost test]# ls
4abc 4adc 5abc 5adc 6abc 6adc 7abc 7adc a1bc a1dc a1fc a2bc a2dc a2fc a3bc a3dc a3fc
4acc 4aec 5acc 5aec 6acc 6aec 7acc 7aec a1c a1ec a1gc a2c a2ec a2gc a3c a3ec a3gc
[root@ localhost test]# ls | grep '^[0-9]'
4abc
4acc
4adc
4aec
5abc
5acc
5adc
5aec
6abc
6acc
6adc
6aec
7abc
7acc
7adc
7aec
[root@ localhost test]# ls | grep '[^0-9]'
4abc
4acc
4adc
4aec
5abc
5acc
5adc
5aec
6abc
6acc
6adc
6aec
7abc
7acc
7adc
7aec
a1bc
a1c
a1dc
a1ec
a1fc
a1gc
a2bc
a2c
a2dc
a2ec
a2fc
a2gc
a3bc
a3c
a3dc
a3ec
a3fc
a3gc
[root@ localhost test]# ls | grep '^[^0-9]'
a1bc
a1c
a1dc
a1ec
a1fc
a1gc
a2bc
a2c
a2dc
a2ec
a2fc
a2gc
a3bc
a3c
a3dc
a3ec
a3fc
a3gc
[root@ localhost test]#
[root@ localhost test]# touch _123
[root@ localhost test]# ls | grep '^[^0-9]'
_123
a1bc
a1c
a1dc
a1ec
a1fc
a1gc
a2bc
a2c
a2dc
a2ec
a2fc
a2gc
a3bc
a3c
a3dc
a3ec
a3fc
a3gc
[root@ localhost test]#
rm -rf *
匹配次数:
*:匹配其前面的一个字符任意次,包括0次 则.*:匹配任意长度的任意字符
\?:需加\转义字符使用,匹配其前面的字符1次或0次 可有可无
\{m,n\} :注意花括号外要有转义字符,防止被shell解释,匹配其前面字符至少m次,至多n次
[root@ localhost test]# touch a{1..2}b
[root@ localhost test]# ls
a1b a2b
[root@ localhost test]# touch ab
[root@ localhost test]# touch a{1..2}{c..f}b
[root@ localhost test]# ls
a1b a1cb a1db a1eb a1fb a2b a2cb a2db a2eb a2fb ab
[root@ localhost test]# ls | grep 'a.*b'
a1b
a1cb
a1db
a1eb
a1fb
a2b
a2cb
a2db
a2eb
a2fb
ab
[root@ localhost test]# ls | grep 'a.\?b'
a1b
a2b
ab
[root@ localhost test]# ls
a1b a1cb a1db a1eb a1fb a2b a2cb a2db a2eb a2fb ab
[root@ localhost test]# ls | grep 'a.\{1,2\}b'
a1b
a1cb
a1db
a1eb
a1fb
a2b
a2cb
a2db
a2eb
a2fb
[root@ localhost test]# ls
a1b a1cb a1db a1eb a1fb a2b a2cb a2db a2eb a2fb ab
[root@ localhost test]#
[root@ localhost test]# touch a{1..2}b{g..k}c
[root@ localhost test]# ls
a1b a1bhc a1bjc a1cb a1eb a2b a2bhc a2bjc a2cb a2eb ab
a1bgc a1bic a1bkc a1db a1fb a2bgc a2bic a2bkc a2db a2fb
[root@ localhost test]# ls | grep 'a.\{1,2\}b'
a1b
a1bgc
a1bhc
a1bic
a1bjc
a1bkc
a1cb
a1db
a1eb
a1fb
a2b
a2bgc
a2bhc
a2bic
a2bjc
a2bkc
a2cb
a2db
a2eb
a2fb
[root@ localhost test]# ls | grep 'a.\{1,2\}b$'
a1b
a1cb
a1db
a1eb
a1fb
a2b
a2cb
a2db
a2eb
a2fb
[root@ localhost test]# ls
rm -rf *
分组:
\(\):将内容分组,作为整体
实例:m\(ab\)* 将ab作为整体,m后ab可以出现0次或任意次,小括号目的是为了以后引用此小括号处匹配到的内容
\1: 引用第一个小括号所包括的所有内容
\2: 引用第二个小括号所包括的所有内容
\3: 引用第三个小括号所包括的所有内容
[root@ localhost test]# touch m
[root@ localhost test]# touch mab
[root@ localhost test]# touch mabab
[root@ localhost test]# touch mababab
[root@ localhost test]# ls
m mab mabab mababab
[root@ localhost test]# ls | grep 'm\(ab\)*'
m
mab
mabab
mababab
[root@ localhost test]# ls | grep 'm\(ab\)\{2,3\}'
mabab
mababab
[root@ localhost test]# ls | grep 'm\(ab\)*'
m
mab
mabab
mababab
[root@ localhost test]# ls | grep 'm\(ab\)\{2,3\}'
mabab
mababab
[root@ localhost test]# ls | grep 'm\(ab\)\1'
mabab
mababab
[root@ localhost test]# ls | grep 'm\(ab\)\1\1'
mababab
[root@ localhost test]#
扩展正则表达式:
i.字符匹配:
. [ ] [^ ]
ii.次数匹配:
*
?:不需要转义字符 \
+ :匹配其前面的字符至少一次
iii.位置锚定:
^、$、\<、\>
iv.分组:
():不需要转义字符 \ ,分组
\1 \2 \3
v.|:或者
a|b
例:C|cat 匹配C或者cat
(C|c)at 匹配Cat或者cat
[root@ localhost test]# ls
m mab mabab mababab
[root@ localhost test]# ls | egrep 'ma+'
mab
mabab
mababab
[root@ localhost test]# touch maa
[root@ localhost test]# ls | egrep 'ma+'
maa
mab
mabab
mababab
[root@ localhost test]#
[root@ localhost test]# touch cat
[root@ localhost test]# touch Cat
[root@ localhost test]# touch c
[root@ localhost test]# touch C
[root@ localhost test]# ls | egrep 'C|cat'
C
cat
Cat
[root@ localhost test]# ls | egrep '(C|c)at'
cat
Cat
[root@ localhost test]#
grep命令常用选项:
-i:忽略大小写
-v:反向查找,显示没有匹配模式的行
-c: 仅显示匹配行数,不显示匹配内容
-n: 显示行号
-o:只显示被模式匹配的字符串
-r:递归,读取目录下所有文件进行匹配
-l: 不显示输出的匹配结果,仅显示匹配的文件名
--color :加颜色
[root@ localhost test]# ls | grep -v c
C
Cat
m
maa
mab
mabab
mababab
[root@ localhost test]# ls | grep -vi c
m
maa
mab
mabab
mababab
[root@ localhost test]# ls | grep -vin c
5:m
6:maa
7:mab
8:mabab
9:mababab
[root@ localhost test]#
[root@ localhost test]# vim 1.txt
[root@ localhost test]# vim 2.txt
[root@ localhost test]# cd ..
[root@ localhost ~]# grep -rl 'a' test
test/1.txt
[root@ localhost ~]# grep -rl '1' test
test/2.txt
[root@ localhost ~]# grep -r 'a' test
test/1.txt:a
[root@ localhost ~]# cd test/
[root@ localhost test]# ls
1.txt 2.txt c C cat Cat m maa mab mabab mababab
[root@ localhost test]# cat 1.txt
a
b
c
d
[root@ localhost test]# cat 2.txt
1
2
3
4
[root@ localhost test]#
0-9 ↩︎
来源:CSDN
作者:aaronszm
链接:https://blog.csdn.net/aaronszm/article/details/103804473