Skip to content

Commit bb70705

Browse files
OmikhleiaDidier Willis
authored and
Didier Willis
committed
fixup! feat: SIL TeX-like lexer (SILE)
1 parent 604b271 commit bb70705

File tree

1 file changed

+88
-17
lines changed

1 file changed

+88
-17
lines changed

lexers/sil.lua

Lines changed: 88 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@
1212
--
1313
-- SIL TeX-like is similar to LaTeX, but with a parity with SIL XML.
1414
-- I.e. \foo[attr=val]{...} is equivalent to <foo attr="val">...</foo>.
15-
-- Environments are syntactic sugar for commands
15+
-- Environments are syntactic sugar for commands.
1616
-- I.e. \begin[attr=val]{foo}...\end{foo} is the same as \foo[attr=val]{...}.
1717
-- Rules below try to respect this parity.
1818

1919
local lexer = lexer
20-
local P, S, R = lpeg.P, lpeg.S, lpeg.R
20+
local P, S, R, Cb, Cg, Ct, Cmt = lpeg.P, lpeg.S, lpeg.R, lpeg.Cb, lpeg.Cg, lpeg.Ct, lpeg.Cmt
2121

2222
local lex = lexer.new(...)
2323
local ws = lex:get_rule('whitespace')
@@ -30,17 +30,17 @@ local identifier = lexer.alnum^1 * (S(':-') * lexer.alnum^1)^0
3030

3131
-- Reserved hard-coded "pass-through" commands/environments.
3232
local reserved_specials = {
33-
ftl = 'text', -- Well Fluent has a syntax, but let's not care here
33+
ftl = 'text', -- Well Fluent has a syntax, but let's not care here.
3434
lua = 'lua',
35-
math = 'latex', -- We'd need a LaTeX math-only lexer to handle this properly.
35+
math = 'tex', -- We'd need a (La)TeX math-only lexer to handle this properly.
3636
raw = 'text',
3737
script = 'lua',
38-
-- sil = ... -- It's the default here, so no need to add a rule for it.
38+
-- sil = ... -- It's the default here, so no need to add a rule for it.
3939
xml = 'xml',
4040
use = 'lua',
4141
}
42-
-- Other reserved keywords are "comment" and "begin"/"end"
43-
-- But we'll handle them in the rules below.
42+
-- Other reserved keywords are "comment" and "begin"/"end",
43+
-- but we'll handle them in the rules below.
4444

4545
-- Parameters (key-value pairs).
4646
local eq = lex:tag(lexer.OPERATOR, '=')
@@ -57,22 +57,93 @@ local cmd_comment = P('\\comment') * optparams * lexer.range('{', '}', false, fa
5757
lex:add_rule('comment', lex:tag(lexer.COMMENT, line_comment + env_comment + cmd_comment))
5858

5959
-- 3. Special reserved pass-through commands/environments.
60+
61+
local function check_exit_brace_level(_, _, current_level)
62+
current_level = tonumber(current_level)
63+
return current_level == 0
64+
end
65+
66+
local function increment_brace_level(increment)
67+
local function update_brace_level(_, _, current_level)
68+
current_level = tonumber(current_level)
69+
local next_level = tostring(current_level + increment)
70+
return true, next_level
71+
end
72+
return Cg(Cmt(Cb('brace_level'), update_brace_level), 'brace_level')
73+
end
74+
75+
local is_exit_brace = Cmt(Cb('brace_level'), check_exit_brace_level)
76+
local init_brace_level = Cg(Ct('') / '0', 'brace_level')
77+
6078
for name, lang in pairs(reserved_specials) do
61-
-- Order matters: environments, commands with arguments, commands without arguments
62-
-- We need alt names for multiple embeddings and rules
63-
local base_rule_id = name .. lang
64-
local embedder = lexer.load(lang, base_rule_id .. '_cmd')
65-
lex:embed(
66-
embedder,
67-
lex:tag(lexer.FUNCTION_BUILTIN, '\\' .. name) * optparams * lex:tag(lexer.OPERATOR, '{'),
68-
lex:tag(lexer.OPERATOR, '}'))
69-
local env = lexer.load(lang, base_rule_id .. '_env')
79+
-- Order matters: environments, commands with arguments, commands without arguments.
80+
-- We need alt names for multiple embeddings and rules.
81+
local base_rule_id = name .. '_' .. lang
82+
83+
-- 3.1. Reserved environments.
84+
-- Ex. \begin{lua} ... Lua code ... \end{lua}
85+
local env_embedder = lexer.load(lang, base_rule_id .. '_env')
7086
lex:embed(
71-
env,
87+
env_embedder,
7288
lex:tag(lexer.FUNCTION_BUILTIN, '\\begin') * optparams
7389
* lex:tag(lexer.OPERATOR, '{') * lex:tag(lexer.FUNCTION_BUILTIN, name) * lex:tag(lexer.OPERATOR, '}'),
7490
lex:tag(lexer.FUNCTION_BUILTIN, '\\end')
7591
* lex:tag(lexer.OPERATOR, '{') * lex:tag(lexer.FUNCTION_BUILTIN, name) * lex:tag(lexer.OPERATOR, '}'))
92+
93+
-- 3.2. Reserved commands.
94+
-- Ex. \lua{... Lua code ...}
95+
-- The hard trick here is that we want to want to keep track of the paired braces,
96+
-- in order to exit the embedding on the right closing brace.
97+
local cmd_embedder = lang == 'text'
98+
and lexer.new(base_rule_id .. '_cmd') -- pseudo-lexer for text
99+
or lexer.load(lang, base_rule_id .. '_cmd') -- real lexer for Lua, TeX, XML
100+
if lang == 'lua' then
101+
-- We hack the Lua lexer to intercept and handle the pairs of braces,
102+
-- i.e. we remove them for the 'operator' rule and handle them separately.
103+
cmd_embedder:modify_rule('operator', cmd_embedder:tag(lexer.OPERATOR, '..' + S('+-*/%^#=<>&|~;:,.[]()')))
104+
cmd_embedder:add_rule(
105+
'sil_brace_open',
106+
cmd_embedder:tag(lexer.OPERATOR, '{') * increment_brace_level(1)
107+
)
108+
cmd_embedder:add_rule(
109+
'sil_brace_close',
110+
cmd_embedder:tag(lexer.OPERATOR, '}') * increment_brace_level(-1)
111+
)
112+
elseif lang == 'tex' then
113+
-- We hack the TeX math lexer to intercept and handle the pairs of braces,
114+
-- i.e. we remove them for the 'operator' rule and handle them separately.
115+
-- We also take the opportunity remove some operators not expected in math mode,
116+
-- and add some extra operators for math mode.
117+
cmd_embedder:modify_rule('operator', cmd_embedder:tag(lexer.OPERATOR, S('&()[]')))
118+
cmd_embedder:add_rule('operator_math', cmd_embedder:tag(lexer.OPERATOR .. ".math", S('+-=^_')))
119+
cmd_embedder:add_rule(
120+
'sil_brace_open',
121+
cmd_embedder:tag(lexer.OPERATOR, '{') * increment_brace_level(1)
122+
)
123+
cmd_embedder:add_rule(
124+
'sil_brace_close',
125+
cmd_embedder:tag(lexer.OPERATOR, '}') * increment_brace_level(-1)
126+
)
127+
else
128+
-- We just need to keep track of the braces for the XML and text lexers,
129+
-- without any special marking.
130+
cmd_embedder:add_rule(
131+
'sil_brace_open',
132+
P'{' * increment_brace_level(1)
133+
)
134+
cmd_embedder:add_rule(
135+
'sil_brace_close',
136+
P'}' * increment_brace_level(-1)
137+
)
138+
end
139+
lex:embed(
140+
cmd_embedder,
141+
lex:tag(lexer.FUNCTION_BUILTIN, '\\' .. name) * optparams * init_brace_level * lex:tag(lexer.FUNCTION_BUILTIN, '{'),
142+
lex:tag(lexer.FUNCTION_BUILTIN, '}' * is_exit_brace)
143+
)
144+
145+
-- 3.3. Reserved commands without arguments (must come after the commands with arguments).
146+
-- Ex. \use[module=packages.highlighter]
76147
lex:add_rule(base_rule_id .. '_cmd_no_arg', lex:tag(lexer.FUNCTION_BUILTIN, P('\\' .. name)) * optparams)
77148
end
78149

0 commit comments

Comments
 (0)