lpeg攻略

outmanwt

已于 2022-03-31 10:17:28 修改

阅读量2.7k

点赞数 29

分类专栏： lua 文章标签： lua 正则表达式

于 2021-08-01 14:11:52 首次发布

本文链接：https://blog.csdn.net/qq_25743645/article/details/118876357

版权

lua 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

文章目录

前言
一、lpeg是什么？
二、使用步骤
拓展阅读

前言

最近看到lua的一个库lpeg,记录一下学到的

提示：以下是本篇文章正文内容

一、lpeg是什么？

LPeg文本匹配的工具，比Lua原生的字符串匹配和标准正则表达式更优异。

二、使用步骤

1.引入库

代码如下（示例）：

local lpeg = require "lpeg"

2.起别名

代码如下（示例）：

--起别名
match = lpeg.match
P = lpeg.P
S = lpeg.S
R = lpeg.R

目的: 少打点代码

3. 常用操作速览

Operator	Description
`lpeg.P(string)`	Matches `string` literally
`lpeg.P(n)`	Matches exactly `n` characters
`lpeg.S(string)`	Matches any character in `string` (Set)
`lpeg.R("xy")`	Matches any character between x and y (Range)
`patt^n`	Matches at least `n` repetitions of `patt`
`patt^-n`	Matches at most `n` repetitions of `patt`
`patt1 * patt2`	Matches `patt1` followed by `patt2`
`patt1 + patt2`	Matches `patt1` or `patt2` (ordered choice)
`patt1 - patt2`	Matches `patt1` if `patt2` does not match
`-patt`	Equivalent to `("" - patt)`
`#patt`	Matches `patt` but consumes no input
`lpeg.B(patt)`	Matches `patt` behind the current position, consuming no input

4.P、R、S

-- P匹配字符串 从头开始找,返回第一次找到的位置+1
print(match(P'a',  'abc'))	--2
print(match(P'b',  'abc'))	--nil
print(match(P'ab', 'abc'))	--3
print(match(P'a',  'aaa'))	--2
print(match(P'ab', 'abab'))	--3

-- R匹配范围,09代表0~9,AZ同理,也是从头开始找
print(match(R'az',  'abc'))	--2
print(match(R'aZ',  'abc'))	--nil
print(match(R'AZ',  'abc'))	--nil
print(match(R'Az',  'abc'))	--2
print(match(R'az',  '0abc'))--nil

-- S集合匹配 从头开始 若字符串包含集合内元素,匹配成功(ps:那岂不是匹配成功就一定是2,后面再看)
print(match(S'abc', 'abc')) --2
print(match(S'bc',  'abc'))	--nil
print(match(S'1a',  'abc'))	--2

4.多次匹配+、*

-- ^n代表至少匹配n次 ^-n代表至多匹配n次
print(match(P'a'^1,   'aaaa'))	--5
print(match(P'a'^4,   'aaaa'))	--5
print(match(P'a'^5,   'aaaa'))	--nil
print(match(P'aa'^2,  'aaaa'))	--5
print(match(P'ab'^2,  'ababc'))	--5
print(match(P'ab'^-1, 'ababc'))--3

-- *代表且 例如:P'a'*P'b'^0代表开头是a且后面跟至少0个b
print(match(P'a'*P'b'^0, 'aab')) --2
print(match(P'a'*P'b'^1, 'aab')) --nil
print(match(P'a'*P'b'^1, 'abab'))--3
print(match(P'a'^0*P'b', 'aab')) --4
print(match(P'a'^1*P'b', 'aab')) --4

-- +代表或 例如:P'a' + P'b'^1 代表开头是a 或 开头至少1个b
local a_1b = P'a' + P'b'^1
print(match(a_1b, 'ab')) --2
print(match(a_1b, 'bb')) --3
print(a_1b:match  'bc')  --2(小括号的语法糖 f('x')-> f 'x')

5.练习

匹配换行

m.P'\r\n' + m.S'\r\n'	--开头是\r\n 或 \r 或 \n

数字

digit = R'09' -- 一个数字
digits = digit^1 -- 任意整数

6.基本捕获

Operation	What it Produces
`lpeg.C(patt)`	the match for `patt` plus all captures made by `patt`
`lpeg.Carg(n)`	the value of the n^th extra argument to `lpeg.match` (matches the empty string)
`lpeg.Cb(name)`	the values produced by the previous group capture named `name` (matches the empty string)
`lpeg.Cc(values)`	the given values (matches the empty string)
`lpeg.Cf(patt, func)`	a folding of the captures from `patt`
`lpeg.Cg(patt [, name])`	the values produced by `patt`, optionally tagged with `name`
`lpeg.Cp()`	the current position (matches the empty string)
`lpeg.Cs(patt)`	the match for `patt` with the values from nested captures replacing their matches
`lpeg.Ct(patt)`	a table with all captures from `patt`
`patt / string`	`string`, with some marks replaced by captures of `patt`
`patt / number`	the n-th value captured by `patt`, or no value when `number` is zero.
`patt / table`	`table[c]`, where `c` is the (first) capture of `patt`
`patt / function`	the returns of `function` applied to the captures of `patt`
`lpeg.Cmt(patt, function)`	the returns of `function` applied to the captures of `patt`; the application is done at match time

--别名
C = lpeg.C
Ct = lpeg.Ct
--构造捕获器
match_digits = C(digits)
print(match_digits:match '1233210')	--1233210
print(match_digits:match '+123')	--nil
--兼容开头正负号
int = S'+-'^-1 * digits
match_int = C(int)	--至多一次的正或负+数字
print(match_int:match '+123')		--+123
-- 对捕获后的数据进行处理    关键字\将所有匹配的子串丢给一个函数或者table
match(int/tonumber,'+123') + 1		--124
match(C(P'a'^1) * C(P'b'^1), 'aabbbb') --先当于'^(a+)(b+)'匹配然后捕获两次

7.复杂的例子:匹配浮点数

正则表达式版本

'[-+]?[0-9]+\.?[0-9]+([eE][+-]?[0-9]+)?'

lpeg版本

-- maybe(p)  可能有p
function maybe(p) return p^-1 end

digits = R'09'^1
sign = maybe(S'+-')
dot = '.'
exp = S'eE'
-- 浮点数 = 正负号 + 整数 + 小数 + 科学计数法
float = sign * digits * maybe(dot*digits) * maybe(exp*mpm*digits)

print(match(C(float),'3.1415926'))	--3.1415926
print(match(C(float),'3.14.15'))	--3.14
print(match(C(float),'3.14e-3'))	--3.14e-3

匹配以逗号分隔的多个浮点数

listf = C(float) * (',' * C(float))^0
print(listf:match '2,3,4') -- 2 3 4

-- 把结果存一张表里
--~ 1	3.140
--~ 2	1.590
--~ 3	2.0e-10
local t_match = match(Ct(listf),'3.140,1.590,2.0e-10')
for	k,v in pairs(t_match) do
	print(k,v)
end


--number版本(之前是string)
--~ 1	3.14
--~ 2	1.59
--~ 3	2e-010
floatc = float/tonumber
listf = floatc * (',' * floatc)^0
floatc = float/tonumber
listf = floatc * (',' * floatc)^0
t_match = match(Ct(listf),'3.14,1.59,2.0e-10')
for	k,v in pairs(t_match) do
	print(k,v)
end

-- 兼容空格
sp = P' '^0 --任意数量空格
function space(pat) return sp * pat * sp end --匹配前后空格
floatc = space(float/tonumber) 
listc = floatc * (',' * floatc)^0
print(match(Ct(listc),' 1,2, 3'))

8.更为复杂的例子读取文本多少行多少字输出第几行

x = [[a=123456

b=2
c=3456789]]
local m = require 'lpeglabel'

local row
local fl
-- 行尾符
local NL = (m.P'\r\n' + m.S'\r\n') * m.Cp() / function (pos)
    -- 行数++
    row = row + 1
    -- 行尾前面有多少数 如果最后不是行尾,col不为0
    fl = pos
end
-- 一个字一个字匹配   若干(空白符或者一个字) 匹配了所有字
local ROWCOL = (NL + m.P(1))^0

local function rowcol(str, n)
    row = 1
    fl = 1
    ROWCOL:match(str:sub(1, n))
    local col = n - fl + 1
    return row, col
end

--空行
local NL = m.P'\r\n' + m.S'\r\n'

local function line(str, row)
    local count = 0
    local res
    --以出现换行符或者行尾无换行符为一行
    local LINE = m.Cmt((1 - NL)^0, function (a, b, c)
-- print(a,b,c)
-- 第一次打印
-- a:'a=13456\n\nb=2\nc=3456789'
-- b:9
-- c:'a=123456'
        count = count + 1
        if count == row then
            res = c
            return false
        end
		a = 1
        return true
    end)
    --每一行
    local MATCH = (LINE * NL)^0 * LINE
    MATCH:match(str)
    return res
end


print(rowcol(x,#x));   	 --4 9 四行 最后一行9字符
print(line(x,4))       		 --c=3456789 打印第4行