1 /++
2  + Copyright: Copyright © 2015, Christian Köstlin
3  + License: MIT
4  + Authors: Christian Koestlin, Christian Köstlin
5  +/
6 
7 module pc4d.parsers;
8 
9 version (unittest) import unit_threaded;
10 
11 import pc4d.parser;
12 import std.array;
13 import std.conv;
14 import std.functional;
15 
16 /// convenient function to instantiate a AlphaNumericParser
17 auto alnum(T)(bool collect = true)
18 {
19     return new Regex(`-?\w[\w\d]*`, collect) ^^ (data) {
20         return variantArray(data[0]);
21     };
22 }
23 
24 /// the alnum parser
25 @("alnum parser") unittest
26 {
27     auto parser = alnum!(immutable(char))();
28     auto res = parser.parseAll("-Aa1234");
29     res.success.should == true;
30     res.results[0].should == "-Aa1234";
31     res = parser.parseAll("a1234");
32     res.success.should == true;
33 }
34 
35 /// class for matching alternatives
36 class Alternative(T) : Parser!(T)
37 {
38     Parser!(T)[] fParsers;
39 
40     this(Parser!(T)[] parsers...)
41     {
42         fParsers = parsers.dup;
43     }
44 
45     override ParseResult!(T) parse(T[] s)
46     {
47         foreach (parser; fParsers)
48         {
49             auto res = parser.parse(s);
50             if (res.success)
51             {
52                 return transform(res);
53             }
54         }
55         return ParseResult!(T).error("or did not match");
56     }
57 }
58 
59 /// convenient function
60 Parser!(T) or(T)(Parser!(T)[] parsers...)
61 {
62     return new Alternative!(T)(parsers);
63 }
64 
65 /// showing off the or dsl
66 @("or dsl") unittest
67 {
68     auto parser = or(match("a"), match("b"), match("c"));
69     parser.parse("a").success.should == true;
70     parser.parse("b").success.should == true;
71     parser.parse("c").success.should == true;
72 }
73 
74 /// parser for blockcomments
75 static class BlockComment(T) : Parser!(T)
76 {
77     T[] fStart;
78     T[] fEnd;
79     bool fCollect;
80     this(T[] startString, T[] endString, bool collect = true)
81     {
82         fStart = startString;
83         fEnd = endString;
84         fCollect = collect;
85     }
86 
87     bool startsWith(T[] aInput, ulong startIdx, T[] expected)
88     {
89         T[] slice = aInput[startIdx .. $];
90         if (slice.length < expected.length)
91         {
92             return false;
93         }
94         for (int i = 0; i < expected.length; i++)
95         {
96             if (slice[i] != expected[i])
97             {
98                 return false;
99             }
100         }
101         return true;
102     }
103 
104     override ParseResult!(T) parse(T[] s)
105     {
106         if (startsWith(s, 0, fStart))
107         {
108             auto l = fStart.length;
109             for (auto i = l; i < s.length; i++)
110             {
111                 if (startsWith(s, i, fEnd))
112                 {
113                     auto lastIdx = i + fEnd.length;
114                     auto rest = s[lastIdx .. $];
115                     if (fCollect)
116                     {
117                         auto matched = s[0 .. lastIdx];
118                         return transform(success(rest, matched));
119                     }
120                     else
121                     {
122                         return success(rest);
123                     }
124                 }
125             }
126             return ParseResult!(T).error("");
127         }
128         else
129         {
130             return ParseResult!(T).error("");
131         }
132     }
133 }
134 
135 /// convenient function
136 Parser!(T) blockComment(T)(T[] startString, T[] endString, bool collect = true)
137 {
138     return new BlockComment!(T)(startString, endString, collect);
139 }
140 
141 /// blockComment can collect the comment itself
142 @("block comment - catch comment") unittest
143 {
144     auto parser = blockComment("/*", "*/", true);
145     auto res = parser.parseAll("/*abc*/");
146     res.success.should == true;
147     res.fResults[0].should == "/*abc*/";
148 }
149 
150 /// blockComment can also throw the comment away
151 @("block comment - discard comment") unittest
152 {
153     auto parser = blockComment("/*", "*/", false);
154     auto res = parser.parseAll("/*abc*/");
155     res.success.should == true;
156     res.fResults.length.should == 0;
157 }
158 
159 /++ example of an expression parser.
160  + expr -> term { + term }
161  + term -> factor { * factor }
162  + factor -> number | ( expr )
163  +/
164 static class ExprParser
165 {
166     Parser!(immutable(char)) lazyExpr()
167     {
168         return lazyParser(&expr);
169     }
170 
171     Parser!(immutable(char)) expr()
172     {
173         auto add = (term() ~ match("+", false) ~ term()) ^^ (input) {
174             return variantArray(input[0] + input[1]);
175         };
176         return add | term();
177     }
178 
179     Parser!(immutable(char)) term()
180     {
181         auto mult = (factor() ~ match("*", false) ~ factor()) ^^ (input) {
182             return variantArray(input[0] * input[1]);
183         };
184         return mult | factor();
185     }
186 
187     Parser!(immutable(char)) factor()
188     {
189         auto exprWithParens = match("(", false) ~ lazyExpr() ~ match(")", false);
190         return number!(immutable(char))() | exprWithParens;
191     }
192 }
193 
194 /// a simple expression parser
195 @("expression example") unittest
196 {
197     auto parser = new ExprParser;
198     auto p = parser.expr();
199     auto res = p.parse("1+2*3");
200     res.success.should == true;
201     res.results[0].get!double.should == 7;
202 
203     res = p.parse("(1+2)*3");
204     res.success.should == true;
205     res.results[0].get!double.should == 9;
206 }
207 
208 /// parser for parsing ints
209 static class Integer : Regex
210 {
211     this()
212     {
213         import std.conv;
214 
215         super(r"\d+") ^^ (input) {
216             return variantArray(input[0].get!string
217                     .to!int);
218         };
219     }
220 }
221 
222 /// convenience function to create an integer parser
223 Parser!(T) integer(T)()
224 {
225     return new Integer;
226 }
227 
228 /// unittests for integer
229 @("integer") unittest
230 {
231     auto parser = integer!(immutable(char));
232     auto res = parser.parse("123");
233     res.success.should == true;
234     res.results[0].should == 123;
235 }
236 
237 /++
238  + convenient function to build a parser that kills the result of
239  + another parser e.g. killResults(match("a")) succeeds, but returns
240  + an empty result
241  +/
242 Parser!(T) killResults(T)(Parser!(T) parser)
243 {
244     return parser ^^ (Variant[] input) { Variant[] res; return res; };
245 }
246 
247 /// unittests for kill results
248 @("killResults") unittest
249 {
250     auto res = killResults(match("a")).parse("a");
251     res.success.should == true;
252     res.results.length.should == 0;
253 }
254 
255 /++
256  + this parser is needed to build recursive parser hierarchies.
257  + look for expression.d for a more realistic example than in the unittest
258  +/
259 class Lazy(T) : Parser!(T)
260 {
261     Parser!(T) delegate() fCallable;
262 
263     this(Parser!(T) delegate() parser)
264     {
265         assert(parser != null);
266         fCallable = parser;
267     }
268 
269     this(Parser!(T) function() parser)
270     {
271         assert(parser != null);
272         fCallable = toDelegate(parser);
273     }
274 
275     override ParseResult!(T) parse(T[] s)
276     {
277         auto parser = fCallable();
278         return transform(parser.parse(s));
279     }
280 
281 }
282 
283 /// convenient function to instantiate a lazy parser with a delegate
284 Parser!(T) lazyParser(T)(Parser!(T) delegate() parser)
285 {
286     return new Lazy!(T)(parser);
287 }
288 
289 /// convenient function to instantiate a lazy parser with a function
290 Parser!(T) lazyParser(T)(Parser!(T) function() parser)
291 {
292     return new Lazy!(T)(parser);
293 }
294 
295 /// unittest to show the simplest usage of lazy
296 @("lazy") unittest
297 {
298     // endless -> a | a opt(endless)
299     struct Endless
300     {
301         static Parser!(immutable(char)) lazyEndless()
302         {
303             return lazyParser!(immutable(char))(&parser);
304         }
305 
306         static Parser!(immutable(char)) parser()
307         {
308             return match("a") ~ (-(lazyEndless()));
309         }
310     }
311 
312     auto p = Endless.parser();
313     auto res = p.parse("aa");
314     res.success.should == true;
315     res.results.should == ["a", "a"];
316 
317     res = p.parse("aab");
318     res.success.should == true;
319     res.results.should == ["a", "a"];
320     res.rest.should == "b";
321 }
322 
323 /// class for matching an array exactly
324 class Match(T) : Parser!(T)
325 {
326     T[] fExpected;
327     bool fCollect;
328     this(T[] expected, bool collect = true)
329     {
330         fExpected = expected;
331         fCollect = collect;
332     }
333 
334     bool startsWith(T[] aInput, T[] expected)
335     {
336         if (aInput.length < expected.length)
337         {
338             return false;
339         }
340         for (int i = 0; i < expected.length; i++)
341         {
342             if (aInput[i] != expected[i])
343             {
344                 return false;
345             }
346         }
347         return true;
348     }
349 
350     override ParseResult!(T) parse(T[] s)
351     {
352         if (startsWith(s, fExpected))
353         {
354             auto rest = s[fExpected.length .. $];
355             if (fCollect)
356             {
357                 return transform(success(rest, fExpected));
358             }
359             else
360             {
361                 return success(rest);
362             }
363         }
364         else
365         {
366             return ParseResult!(T).error(""); //"Expected: '" ~ fExpected ~ "' but got '" ~ s ~ "'");
367         }
368     }
369 }
370 
371 /// convenient function to instantiate a matcher
372 Parser!(T) match(T)(T[] s, bool collect = true)
373 {
374     return new Match!(T)(s, collect);
375 }
376 
377 /// matching a string
378 @("match") unittest
379 {
380     auto parser = match("test");
381     auto res = parser.parseAll("test");
382 
383     res.success.should == true;
384     res.rest.length.should == 0;
385 }
386 
387 @("match 2") unittest
388 {
389     auto parser = match("test");
390     auto res = parser.parse("test2");
391     res.success.should == true;
392     res.rest.should == "2";
393 
394     res = parser.parseAll("test2");
395     res.success.should == false;
396 }
397 
398 /// transform match result
399 @("match + transform") unittest
400 {
401     auto parser = match("test") ^^ (objects) {
402         auto res = objects;
403         if (objects[0] == "test")
404         {
405             res[0] = "super";
406         }
407         return objects;
408     };
409     auto res = parser.parse("test");
410     res.success.should == true;
411     res.results[0].should == "super";
412 }
413 
414 @("match - discard") unittest
415 {
416     auto parser = match("test", false);
417     auto res = parser.parseAll("test");
418     res.success.should == true;
419     res.results.length.should == 0;
420 }
421 
422 @("match on arrays") unittest
423 {
424     auto parser = match([1, 2, 3]);
425     auto res = parser.parseAll([1, 2, 3]);
426     res.success.should == true;
427     res.results.length.should == 1;
428     res.results[0].should == [1, 2, 3];
429 }
430 
431 /// convenient function to instantiate a number parser
432 Parser!(T) number(T)()
433 {
434     return new Regex(r"[-+]?[0-9]*\.?[0-9]+") ^^ (Variant[] input) {
435         auto output = appender!(Variant[])();
436         foreach (Variant o; input)
437         {
438             string s = o.get!(string);
439             double h = std.conv.to!(double)(s);
440             Variant v = h;
441             output.put(v);
442         }
443         return output.data;
444     };
445 }
446 
447 /// unittests for number parser
448 @("number parser") unittest
449 {
450     auto parser = number!(immutable(char))();
451     auto res = parser.parse("123.123");
452     res.success.should == true;
453     res.results[0].should == 123.123;
454 }
455 
456 /// class for matching something optional
457 class Optional(T) : Parser!(T)
458 {
459     Parser!(T) fParser;
460 
461     this(Parser!(T) parser)
462     {
463         fParser = parser;
464     }
465 
466     override ParseResult!(T) parse(T[] s)
467     {
468         auto res = fParser.parse(s);
469         if (!res.success)
470         {
471             return success(s);
472         }
473         else
474         {
475             return res;
476         }
477     }
478 }
479 
480 /// unittests to show the usage of OptionalParser and its dsl '-'
481 @("optional and dsl") unittest
482 {
483     auto abc = match("abc");
484     auto opt = -abc;
485     auto res = opt.parse("abc");
486     res.success.should == true;
487     res.results.length.should == 1;
488     res.results[0].should == "abc";
489     res.rest.length.should == 0;
490 }
491 
492 /// unittest to show optional in action.
493 @("optional") unittest
494 {
495     auto abc = match("abc");
496     auto opt = -abc;
497     auto res = opt.parse("efg");
498     auto withoutOptional = abc.parse("efg");
499     withoutOptional.success.should == false;
500     res.success.should == true;
501     res.results.length.should == 0;
502     res.rest.should == "efg";
503 }
504 
505 /// parse a number with or without sign
506 @("parse number with or without +") unittest
507 {
508     auto sign = match("+");
509     auto value = match("1");
510     auto test = (-sign) ~ value;
511     auto resWithSign = test.parse("+1");
512     resWithSign.success.should == true;
513     resWithSign.results.length.should == 2;
514     auto resWithoutSign = test.parse("1");
515     resWithoutSign.success.should == true;
516 }
517 
518 /++
519  + parser for regular expressions
520  + a successful parse step returns all captures in an array
521  +/
522 class Regex : Parser!(immutable(char))
523 {
524     string fRegex;
525     bool fCollect;
526     this(string regex, bool collect = true)
527     {
528         fRegex = regex;
529         fCollect = collect;
530     }
531 
532     override ParseResult!(immutable(char)) parse(string s)
533     {
534         import std.regex;
535 
536         auto res = std.regex.match(s, std.regex.regex(fRegex));
537         if (res.empty())
538         {
539             return ParseResult!(immutable(char)).error(s ~ "did not match " ~ fRegex);
540         }
541         else if (res.pre.length > 0)
542         {
543             return ParseResult!(immutable(char)).error(s ~ "did not match " ~ fRegex);
544         }
545         else
546         {
547             if (fCollect)
548             {
549                 auto results = appender!(Variant[])();
550                 foreach (c; res.captures)
551                 {
552                     Variant v = c;
553                     results.put(v);
554                 }
555                 return transform(ParseResult!(immutable(char)).ok(res.post, results.data));
556             }
557             else
558             {
559                 return success(res.post);
560             }
561         }
562     }
563 }
564 
565 /// convenient function to instantiate a regexparser
566 Parser!(T) regex(T)(T[] s, bool collect = true)
567 {
568     return new Regex(s, collect);
569 }
570 
571 /// regexParser
572 @("regex parser") unittest
573 {
574     auto res = regex("(a)(.)(c)").parse("abcd");
575     res.success.should == true;
576     res.results.should == ["abc", "a", "b", "c"];
577     res.rest.shouldEqual("d");
578 }
579 
580 /// regexParser works from the start of the input
581 @("regex parser parses from start") unittest
582 {
583     auto res = regex("abc").parse("babc");
584     res.success.shouldBeFalse;
585 }
586 
587 /// class for matching repetitions
588 static class Repetition(T) : Parser!(T)
589 {
590     Parser!(T) fToRepeat;
591     this(Parser!(T) toRepeat)
592     {
593         fToRepeat = toRepeat;
594     }
595 
596     override ParseResult!(T) parse(T[] s)
597     {
598         Variant[] results;
599         auto rest = s;
600         while (true)
601         {
602             auto res = fToRepeat.parse(rest);
603             if (res.success)
604             {
605                 rest = res.rest;
606                 results = results ~ res.results;
607             }
608             else
609             {
610                 break;
611             }
612         }
613         return transform(ParseResult!(T).ok(rest, results));
614     }
615 }
616 
617 /// unittest for repetition
618 @("repetition more than one") unittest
619 {
620     auto parser = *match("a");
621     auto res = parser.parse("aa");
622     res.success.shouldBeTrue;
623     res.rest.shouldEqual("");
624     res.results.length.shouldEqual(2);
625 }
626 
627 @("repetition none") unittest
628 {
629     auto parser = *match("a");
630     auto res = parser.parse("b");
631     res.success.shouldBeTrue;
632     res.rest.shouldEqual("b");
633 }
634 
635 @("repetition with rest") unittest
636 {
637     auto parser = *match("a");
638     auto res = parser.parse("ab");
639     res.success.shouldBeTrue;
640     res.rest.shouldEqual("b");
641 }
642 
643 @("repetition with other parser") unittest
644 {
645     auto parser = *(match("+") ~ match("-"));
646     auto res = parser.parse("+-+-+");
647     res.success.shouldBeTrue;
648     res.rest.shouldEqual("+");
649 }
650 
651 @("repetition discarding result") unittest
652 {
653     auto parser = *match("a", false);
654     auto res = parser.parse("aaaa");
655     res.success.shouldBeTrue;
656     res.rest.length.shouldEqual(0);
657     res.results.length.shouldEqual(0);
658 }
659 
660 @("repetition transforming result") unittest
661 {
662     auto parser = (*match("a")) ^^ (input) { return variantArray(input.length); };
663     auto suc = parser.parseAll("aaaaa");
664     suc.success.shouldBeTrue;
665     suc.results.length.shouldEqual(1);
666     suc.results[0].get!(ulong).shouldEqual(5);
667 }
668 
669 /// class for matching sequences
670 class Sequence(T) : Parser!(T)
671 {
672     Parser!(T)[] fParsers;
673     this(Parser!(T)[] parsers...)
674     {
675         fParsers = parsers.dup;
676     }
677 
678     override ParseResult!(T) parse(T[] s)
679     {
680         auto resultObjects = appender!(Variant[])();
681         T[] h = s;
682         foreach (parser; fParsers)
683         {
684             auto res = parser.parse(h);
685             if (res.success)
686             {
687                 h = res.rest;
688                 resultObjects.put(res.results);
689             }
690             else
691             {
692                 return res;
693             }
694         }
695         return transform(ParseResult!(T).ok(h, resultObjects.data));
696     }
697 }
698 
699 /// convenient function
700 Parser!(T) sequence(T)(Parser!(T)[] parsers...)
701 {
702     return new Sequence!(T)(parsers);
703 }
704 
705 /// unittests showing usage of sequence parser and dsl '~'
706 @("sequence") unittest
707 {
708     auto parser = match("a") ~ match("b");
709     auto res = parser.parse("ab");
710     res.success.shouldBeTrue;
711     res.results.length.shouldEqual(2);
712 }
713 
714 @("sequence and rest") unittest
715 {
716     auto parser = match("a") ~ "b".match;
717     auto res = parser.parse("abc");
718     res.success.shouldBeTrue;
719     res.rest.shouldEqual("c");
720 }
721 
722 @("sequence fails") unittest
723 {
724     auto parser = match("a") ~ match("b");
725     auto res = parser.parse("ac");
726 
727     res.success.shouldBeFalse;
728 }
729 
730 @("sequence with discard result") unittest
731 {
732     auto parser = match("a", false) ~ match("b");
733     auto res = parser.parse("ab");
734     res.success.shouldBeTrue;
735     res.results.length.shouldEqual(1);
736 }
737 
738 @("sequence with transformation") unittest
739 {
740     auto parser = (match("a") ~ match("b")) ^^ (Variant[] result) {
741         string totalString;
742         foreach (Variant o; result)
743         {
744             if (o.type == typeid(string))
745             {
746                 totalString ~= o.get!(string);
747             }
748         }
749 
750         Variant v = totalString;
751         return [v];
752     };
753 
754     auto suc = parser.parse("ab");
755     assert(suc.success);
756     assert(suc.results.length == 1);
757     assert(suc.results[0] == "ab");
758 }
759 
760 @("sequence and optional") unittest
761 {
762     auto ab = -match("a") ~ match("b");
763     auto res = ab.parse("ab");
764     res.success.shouldBeTrue;
765     res = ab.parse("b");
766     res.success.shouldBeTrue;
767     res = ab.parse("c");
768     res.success.shouldBeFalse;
769     res = ab.parse("");
770     res.success.shouldBeFalse;
771 }
772 
773 @("sequence api not dsl") unittest
774 {
775     auto ab = sequence(match("a"), match("b"));
776     auto res = ab.parse("ab");
777     res.success.shouldBeTrue;
778 }
779 
780 @("sequence api") unittest
781 {
782     auto ab = sequence(match("a"), match("b"), match("c"));
783     auto res = ab.parse("abc");
784     res.success.shouldBeTrue;
785 }