shlex模块实现了一个类来解析简单的类shell语法,可以用来编写领域特定的语言,或者解析加引号的字符串。 处理输入文本时有一个常见的问题,往往要把一个加引号的单词序列标识为一个实体。根据引号划分文本可能与预想的并不一样,特别是嵌套有多层引号时。
示例文本:
1
|
This
string
has
embedded
"double quotes"
and
'single quotes'
in
it
,
\
nand
even
"a 'nested example'"
.
|
一种简单的方法是构造一个正则表达式,来查找引号之外的文本部分,将它们与引号内的文本分开,这可能带来不必要的复杂性,而且很容易因为边界条件出错,如撇号或者拼写错误。更好地解决方案是使用一个真正的解析器,如shlex模块提供的解析器。以下是一个简单的例子,它使用shlex类打印输入文件中找到的token。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
[
root
@
ipython
example
]
# python shlex_one.py text.str
ORIGINAL
:
'This string has embedded "double quotes" and \'single quotes\' in it,\nand even "a \'nested example\'".\n'
TOKENS
:
'This'
'string'
'has'
'embedded'
'"double quotes"'
'and'
"'single quotes'"
'in'
'it'
','
'and'
'even'
'"a \'nested example\'"'
'.'
|
看一下shlex_one.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
import
shlex
import
sys
if
len
(
sys
.
argv
)
!=
2
:
print
'Please specify one filename on the command line.'
sys
.
exit
(
1
)
filename
=
sys
.
argv
[
1
]
body
=
file
(
filename
,
'rt'
)
.
read
(
)
print
'ORIGINAL:'
,
repr
(
body
)
print
print
'TOKENS:'
lexer
=
shlex
.
shlex
(
body
)
for
token
in
lexer
:
print
repr
(
token
)
|
来分解执行一下
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
>>>
for
i
in
shlex
.
shlex
(
test
)
:
.
.
.
print
i
.
.
.
This
string
has
embedded
"double quotes"
and
'single quotes'
in
it
,
and
even
"a 'nested example'"
.
>>>
for
i
in
shlex
.
shlex
(
test
)
:
.
.
.
print
repr
(
i
)
.
.
.
'This'
'string'
'has'
'embedded'
'"double quotes"'
'and'
"'single quotes'"
'in'
'it'
','
'and'
'even'
'"a \'nested example\'"'
'.'
|
内置split方法
1
2
3
4
5
6
7
8
|
>>>
shlex
.
split
(
test
)
[
'This'
,
'string'
,
'has'
,
'embedded'
,
'double quotes'
,
'and'
,
'single quotes'
,
'in'
,
'it,'
,
'and'
,
'even'
,
"a 'nested example'."
]
##或许这是一个小技巧来的##
>>>
subprocess
.
Popen
(
shlex
.
split
(
'ls -l'
)
)
total
8
-
rw
-
r
--
r
--
.
1
root
root
324
Sep
8
07
:
15
shlex_one
.
py
-
rw
-
r
--
r
--
.
1
root
root
99
Sep
8
07
:
16
text
.
str
|