1-- $Id: testes/pm.lua $
  2-- See Copyright Notice in file all.lua
  3
  4-- UTF-8 file
  5
  6
  7print('testing pattern matching')
  8
  9local function checkerror (msg, f, ...)
 10  local s, err = pcall(f, ...)
 11  assert(not s and string.find(err, msg))
 12end
 13
 14
 15local function f (s, p)
 16  local i,e = string.find(s, p)
 17  if i then return string.sub(s, i, e) end
 18end
 19
 20local a,b = string.find('', '')    -- empty patterns are tricky
 21assert(a == 1 and b == 0);
 22a,b = string.find('alo', '')
 23assert(a == 1 and b == 0)
 24a,b = string.find('a\0o a\0o a\0o', 'a', 1)   -- first position
 25assert(a == 1 and b == 1)
 26a,b = string.find('a\0o a\0o a\0o', 'a\0o', 2)   -- starts in the midle
 27assert(a == 5 and b == 7)
 28a,b = string.find('a\0o a\0o a\0o', 'a\0o', 9)   -- starts in the midle
 29assert(a == 9 and b == 11)
 30a,b = string.find('a\0a\0a\0a\0\0ab', '\0ab', 2);  -- finds at the end
 31assert(a == 9 and b == 11);
 32a,b = string.find('a\0a\0a\0a\0\0ab', 'b')    -- last position
 33assert(a == 11 and b == 11)
 34assert(not string.find('a\0a\0a\0a\0\0ab', 'b\0'))   -- check ending
 35assert(not string.find('', '\0'))
 36assert(string.find('alo123alo', '12') == 4)
 37assert(not string.find('alo123alo', '^12'))
 38
 39assert(string.match("aaab", ".*b") == "aaab")
 40assert(string.match("aaa", ".*a") == "aaa")
 41assert(string.match("b", ".*b") == "b")
 42
 43assert(string.match("aaab", ".+b") == "aaab")
 44assert(string.match("aaa", ".+a") == "aaa")
 45assert(not string.match("b", ".+b"))
 46
 47assert(string.match("aaab", ".?b") == "ab")
 48assert(string.match("aaa", ".?a") == "aa")
 49assert(string.match("b", ".?b") == "b")
 50
 51assert(f('aloALO', '%l*') == 'alo')
 52assert(f('aLo_ALO', '%a*') == 'aLo')
 53
 54assert(f("  \n\r*&\n\r   xuxu  \n\n", "%g%g%g+") == "xuxu")
 55
 56
 57-- Adapt a pattern to UTF-8
 58local function PU (p)
 59  -- reapply '?' into each individual byte of a character.
 60  -- (For instance, "á?" becomes "\195?\161?".)
 61  p = string.gsub(p, "(" .. utf8.charpattern .. ")%?", function (c)
 62    return string.gsub(c, ".", "%0?")
 63  end)
 64  -- change '.' to utf-8 character patterns
 65  p = string.gsub(p, "%.", utf8.charpattern)
 66  return p
 67end
 68
 69
 70assert(f('aaab', 'a*') == 'aaa');
 71assert(f('aaa', '^.*$') == 'aaa');
 72assert(f('aaa', 'b*') == '');
 73assert(f('aaa', 'ab*a') == 'aa')
 74assert(f('aba', 'ab*a') == 'aba')
 75assert(f('aaab', 'a+') == 'aaa')
 76assert(f('aaa', '^.+$') == 'aaa')
 77assert(not f('aaa', 'b+'))
 78assert(not f('aaa', 'ab+a'))
 79assert(f('aba', 'ab+a') == 'aba')
 80assert(f('a$a', '.$') == 'a')
 81assert(f('a$a', '.%$') == 'a$')
 82assert(f('a$a', '.$.') == 'a$a')
 83assert(not f('a$a', '$$'))
 84assert(not f('a$b', 'a$'))
 85assert(f('a$a', '$') == '')
 86assert(f('', 'b*') == '')
 87assert(not f('aaa', 'bb*'))
 88assert(f('aaab', 'a-') == '')
 89assert(f('aaa', '^.-$') == 'aaa')
 90assert(f('aabaaabaaabaaaba', 'b.*b') == 'baaabaaabaaab')
 91assert(f('aabaaabaaabaaaba', 'b.-b') == 'baaab')
 92assert(f('alo xo', '.o$') == 'xo')
 93assert(f(' \n isto é assim', '%S%S*') == 'isto')
 94assert(f(' \n isto é assim', '%S*$') == 'assim')
 95assert(f(' \n isto é assim', '[a-z]*$') == 'assim')
 96assert(f('um caracter ? extra', '[^%sa-z]') == '?')
 97assert(f('', 'a?') == '')
 98assert(f('á', PU'á?') == 'á')
 99assert(f('ábl', PU'á?b?l?') == 'ábl')
100assert(f('  ábl', PU'á?b?l?') == '')
101assert(f('aa', '^aa?a?a') == 'aa')
102assert(f(']]]áb', '[^]]+') == 'áb')
103assert(f("0alo alo", "%x*") == "0a")
104assert(f("alo alo", "%C+") == "alo alo")
105print('+')
106
107
108local function f1 (s, p)
109  p = string.gsub(p, "%%([0-9])", function (s)
110        return "%" .. (tonumber(s)+1)
111       end)
112  p = string.gsub(p, "^(^?)", "%1()", 1)
113  p = string.gsub(p, "($?)$", "()%1", 1)
114  local t = {string.match(s, p)}
115  return string.sub(s, t[1], t[#t] - 1)
116end
117
118assert(f1('alo alx 123 b\0o b\0o', '(..*) %1') == "b\0o b\0o")
119assert(f1('axz123= 4= 4 34', '(.+)=(.*)=%2 %1') == '3= 4= 4 3')
120assert(f1('=======', '^(=*)=%1$') == '=======')
121assert(not string.match('==========', '^([=]*)=%1$'))
122
123local function range (i, j)
124  if i <= j then
125    return i, range(i+1, j)
126  end
127end
128
129local abc = string.char(range(0, 127)) .. string.char(range(128, 255));
130
131assert(string.len(abc) == 256)
132
133local function strset (p)
134  local res = {s=''}
135  string.gsub(abc, p, function (c) res.s = res.s .. c end)
136  return res.s
137end;
138
139assert(string.len(strset('[\200-\210]')) == 11)
140
141assert(strset('[a-z]') == "abcdefghijklmnopqrstuvwxyz")
142assert(strset('[a-z%d]') == strset('[%da-uu-z]'))
143assert(strset('[a-]') == "-a")
144assert(strset('[^%W]') == strset('[%w]'))
145assert(strset('[]%%]') == '%]')
146assert(strset('[a%-z]') == '-az')
147assert(strset('[%^%[%-a%]%-b]') == '-[]^ab')
148assert(strset('%Z') == strset('[\1-\255]'))
149assert(strset('.') == strset('[\1-\255%z]'))
150print('+');
151
152assert(string.match("alo xyzK", "(%w+)K") == "xyz")
153assert(string.match("254 K", "(%d*)K") == "")
154assert(string.match("alo ", "(%w*)$") == "")
155assert(not string.match("alo ", "(%w+)$"))
156assert(string.find("(álo)", "%(á") == 1)
157local a, b, c, d, e = string.match("âlo alo", PU"^(((.).). (%w*))$")
158assert(a == 'âlo alo' and b == 'âl' and c == 'â' and d == 'alo' and e == nil)
159a, b, c, d  = string.match('0123456789', '(.+(.?)())')
160assert(a == '0123456789' and b == '' and c == 11 and d == nil)
161print('+')
162
163assert(string.gsub('ülo ülo', 'ü', 'x') == 'xlo xlo')
164assert(string.gsub('alo úlo  ', ' +$', '') == 'alo úlo')  -- trim
165assert(string.gsub('  alo alo  ', '^%s*(.-)%s*$', '%1') == 'alo alo')  -- double trim
166assert(string.gsub('alo  alo  \n 123\n ', '%s+', ' ') == 'alo alo 123 ')
167local t = "abç d"
168a, b = string.gsub(t, PU'(.)', '%1@')
169assert(a == "a@b@ç@ @d@" and b == 5)
170a, b = string.gsub('abçd', PU'(.)', '%0@', 2)
171assert(a == 'a@b@çd' and b == 2)
172assert(string.gsub('alo alo', '()[al]', '%1') == '12o 56o')
173assert(string.gsub("abc=xyz", "(%w*)(%p)(%w+)", "%3%2%1-%0") ==
174              "xyz=abc-abc=xyz")
175assert(string.gsub("abc", "%w", "%1%0") == "aabbcc")
176assert(string.gsub("abc", "%w+", "%0%1") == "abcabc")
177assert(string.gsub('áéí', '$', '\0óú') == 'áéí\0óú')
178assert(string.gsub('', '^', 'r') == 'r')
179assert(string.gsub('', '$', 'r') == 'r')
180print('+')
181
182
183do   -- new (5.3.3) semantics for empty matches
184  assert(string.gsub("a b cd", " *", "-") == "-a-b-c-d-")
185
186  local res = ""
187  local sub = "a  \nbc\t\td"
188  local i = 1
189  for p, e in string.gmatch(sub, "()%s*()") do
190    res = res .. string.sub(sub, i, p - 1) .. "-"
191    i = e
192  end
193  assert(res == "-a-b-c-d-")
194end
195
196
197assert(string.gsub("um (dois) tres (quatro)", "(%(%w+%))", string.upper) ==
198            "um (DOIS) tres (QUATRO)")
199
200do
201  local function setglobal (n,v) rawset(_G, n, v) end
202  string.gsub("a=roberto,roberto=a", "(%w+)=(%w%w*)", setglobal)
203  assert(_G.a=="roberto" and _G.roberto=="a")
204  _G.a = nil; _G.roberto = nil
205end
206
207function f(a,b) return string.gsub(a,'.',b) end
208assert(string.gsub("trocar tudo em |teste|b| é |beleza|al|", "|([^|]*)|([^|]*)|", f) ==
209            "trocar tudo em bbbbb é alalalalalal")
210
211local function dostring (s) return load(s, "")() or "" end
212assert(string.gsub("alo $a='x'$ novamente $return a$",
213                   "$([^$]*)%$",
214                   dostring) == "alo  novamente x")
215
216local x = string.gsub("$x=string.gsub('alo', '.', string.upper)$ assim vai para $return x$",
217         "$([^$]*)%$", dostring)
218assert(x == ' assim vai para ALO')
219_G.a, _G.x = nil
220
221local t = {}
222local s = 'a alo jose  joao'
223local r = string.gsub(s, '()(%w+)()', function (a,w,b)
224             assert(string.len(w) == b-a);
225             t[a] = b-a;
226           end)
227assert(s == r and t[1] == 1 and t[3] == 3 and t[7] == 4 and t[13] == 4)
228
229
230local function isbalanced (s)
231  return not string.find(string.gsub(s, "%b()", ""), "[()]")
232end
233
234assert(isbalanced("(9 ((8))(\0) 7) \0\0 a b ()(c)() a"))
235assert(not isbalanced("(9 ((8) 7) a b (\0 c) a"))
236assert(string.gsub("alo 'oi' alo", "%b''", '"') == 'alo " alo')
237
238
239local t = {"apple", "orange", "lime"; n=0}
240assert(string.gsub("x and x and x", "x", function () t.n=t.n+1; return t[t.n] end)
241        == "apple and orange and lime")
242
243t = {n=0}
244string.gsub("first second word", "%w%w*", function (w) t.n=t.n+1; t[t.n] = w end)
245assert(t[1] == "first" and t[2] == "second" and t[3] == "word" and t.n == 3)
246
247t = {n=0}
248assert(string.gsub("first second word", "%w+",
249         function (w) t.n=t.n+1; t[t.n] = w end, 2) == "first second word")
250assert(t[1] == "first" and t[2] == "second" and t[3] == undef)
251
252checkerror("invalid replacement value %(a table%)",
253            string.gsub, "alo", ".", {a = {}})
254checkerror("invalid capture index %%2", string.gsub, "alo", ".", "%2")
255checkerror("invalid capture index %%0", string.gsub, "alo", "(%0)", "a")
256checkerror("invalid capture index %%1", string.gsub, "alo", "(%1)", "a")
257checkerror("invalid use of '%%'", string.gsub, "alo", ".", "%x")
258
259
260if not _soft then
261  print("big strings")
262  local a = string.rep('a', 300000)
263  assert(string.find(a, '^a*.?$'))
264  assert(not string.find(a, '^a*.?b$'))
265  assert(string.find(a, '^a-.?$'))
266
267  -- bug in 5.1.2
268  a = string.rep('a', 10000) .. string.rep('b', 10000)
269  assert(not pcall(string.gsub, a, 'b'))
270end
271
272-- recursive nest of gsubs
273local function rev (s)
274  return string.gsub(s, "(.)(.+)", function (c,s1) return rev(s1)..c end)
275end
276
277local x = "abcdef"
278assert(rev(rev(x)) == x)
279
280
281-- gsub with tables
282assert(string.gsub("alo alo", ".", {}) == "alo alo")
283assert(string.gsub("alo alo", "(.)", {a="AA", l=""}) == "AAo AAo")
284assert(string.gsub("alo alo", "(.).", {a="AA", l="K"}) == "AAo AAo")
285assert(string.gsub("alo alo", "((.)(.?))", {al="AA", o=false}) == "AAo AAo")
286
287assert(string.gsub("alo alo", "().", {'x','yy','zzz'}) == "xyyzzz alo")
288
289t = {}; setmetatable(t, {__index = function (t,s) return string.upper(s) end})
290assert(string.gsub("a alo b hi", "%w%w+", t) == "a ALO b HI")
291
292
293-- tests for gmatch
294local a = 0
295for i in string.gmatch('abcde', '()') do assert(i == a+1); a=i end
296assert(a==6)
297
298t = {n=0}
299for w in string.gmatch("first second word", "%w+") do
300      t.n=t.n+1; t[t.n] = w
301end
302assert(t[1] == "first" and t[2] == "second" and t[3] == "word")
303
304t = {3, 6, 9}
305for i in string.gmatch ("xuxx uu ppar r", "()(.)%2") do
306  assert(i == table.remove(t, 1))
307end
308assert(#t == 0)
309
310t = {}
311for i,j in string.gmatch("13 14 10 = 11, 15= 16, 22=23", "(%d+)%s*=%s*(%d+)") do
312  t[tonumber(i)] = tonumber(j)
313end
314a = 0
315for k,v in pairs(t) do assert(k+1 == v+0); a=a+1 end
316assert(a == 3)
317
318
319do   -- init parameter in gmatch
320  local s = 0
321  for k in string.gmatch("10 20 30", "%d+", 3) do
322    s = s + tonumber(k)
323  end
324  assert(s == 50)
325
326  s = 0
327  for k in string.gmatch("11 21 31", "%d+", -4) do
328    s = s + tonumber(k)
329  end
330  assert(s == 32)
331
332  -- there is an empty string at the end of the subject
333  s = 0
334  for k in string.gmatch("11 21 31", "%w*", 9) do
335    s = s + 1
336  end
337  assert(s == 1)
338
339  -- there are no empty strings after the end of the subject
340  s = 0
341  for k in string.gmatch("11 21 31", "%w*", 10) do
342    s = s + 1
343  end
344  assert(s == 0)
345end
346
347
348-- tests for `%f' (`frontiers')
349
350assert(string.gsub("aaa aa a aaa a", "%f[%w]a", "x") == "xaa xa x xaa x")
351assert(string.gsub("[[]] [][] [[[[", "%f[[].", "x") == "x[]] x]x] x[[[")
352assert(string.gsub("01abc45de3", "%f[%d]", ".") == ".01abc.45de.3")
353assert(string.gsub("01abc45 de3x", "%f[%D]%w", ".") == "01.bc45 de3.")
354assert(string.gsub("function", "%f[\1-\255]%w", ".") == ".unction")
355assert(string.gsub("function", "%f[^\1-\255]", ".") == "function.")
356
357assert(string.find("a", "%f[a]") == 1)
358assert(string.find("a", "%f[^%z]") == 1)
359assert(string.find("a", "%f[^%l]") == 2)
360assert(string.find("aba", "%f[a%z]") == 3)
361assert(string.find("aba", "%f[%z]") == 4)
362assert(not string.find("aba", "%f[%l%z]"))
363assert(not string.find("aba", "%f[^%l%z]"))
364
365local i, e = string.find(" alo aalo allo", "%f[%S].-%f[%s].-%f[%S]")
366assert(i == 2 and e == 5)
367local k = string.match(" alo aalo allo", "%f[%S](.-%f[%s].-%f[%S])")
368assert(k == 'alo ')
369
370local a = {1, 5, 9, 14, 17,}
371for k in string.gmatch("alo alo th02 is 1hat", "()%f[%w%d]") do
372  assert(table.remove(a, 1) == k)
373end
374assert(#a == 0)
375
376
377-- malformed patterns
378local function malform (p, m)
379  m = m or "malformed"
380  local r, msg = pcall(string.find, "a", p)
381  assert(not r and string.find(msg, m))
382end
383
384malform("(.", "unfinished capture")
385malform(".)", "invalid pattern capture")
386malform("[a")
387malform("[]")
388malform("[^]")
389malform("[a%]")
390malform("[a%")
391malform("%b")
392malform("%ba")
393malform("%")
394malform("%f", "missing")
395
396-- \0 in patterns
397assert(string.match("ab\0\1\2c", "[\0-\2]+") == "\0\1\2")
398assert(string.match("ab\0\1\2c", "[\0-\0]+") == "\0")
399assert(string.find("b$a", "$\0?") == 2)
400assert(string.find("abc\0efg", "%\0") == 4)
401assert(string.match("abc\0efg\0\1e\1g", "%b\0\1") == "\0efg\0\1e\1")
402assert(string.match("abc\0\0\0", "%\0+") == "\0\0\0")
403assert(string.match("abc\0\0\0", "%\0%\0?") == "\0\0")
404
405-- magic char after \0
406assert(string.find("abc\0\0","\0.") == 4)
407assert(string.find("abcx\0\0abc\0abc","x\0\0abc\0a.") == 4)
408
409
410do   -- test reuse of original string in gsub
411  local s = string.rep("a", 100)
412  local r = string.gsub(s, "b", "c")   -- no match
413  assert(string.format("%p", s) == string.format("%p", r))
414
415  r = string.gsub(s, ".", {x = "y"})   -- no substitutions
416  assert(string.format("%p", s) == string.format("%p", r))
417
418  local count = 0
419  r = string.gsub(s, ".", function (x)
420                            assert(x == "a")
421                            count = count + 1
422                            return nil    -- no substitution
423                          end)
424  r = string.gsub(r, ".", {b = 'x'})   -- "a" is not a key; no subst.
425  assert(count == 100)
426  assert(string.format("%p", s) == string.format("%p", r))
427
428  count = 0
429  r = string.gsub(s, ".", function (x)
430                            assert(x == "a")
431                            count = count + 1
432                            return x    -- substitution...
433                          end)
434  assert(count == 100)
435  -- no reuse in this case
436  assert(r == s and string.format("%p", s) ~= string.format("%p", r))
437end
438
439print('OK')
440