1 /++
2  + Copyright: Copyright © 2015, Christian Köstlin
3  + License: MIT
4  + Authors: Christian Koestlin, Christian Köstlin
5  +/
6 
7 module pc4d.parsers;
8 
9 import pc4d.parser;
10 import std.array;
11 import std.conv;
12 import std.functional;
13 
14 /// convenient function to instantiate a AlphaNumericParser
15 auto alnum(T)(bool collect = true)
16 {
17     return new Regex(r"-?\w[\w\d]*", collect) ^^ (data) {
18         return variantArray(data[0]);
19     };
20 }
21 
22 /// the alnum parser
23 @("alnum parser") unittest
24 {
25     import unit_threaded;
26 
27     auto parser = alnum!(immutable(char))();
28     auto res = parser.parseAll("-Aa1234");
29     res.success.shouldBeTrue;
30     res.results[0].shouldEqual("-Aa1234");
31     res = parser.parseAll("a1234");
32     res.success.shouldBeTrue;
33 }
34 
35 /// class for matching alternatives
36 class Alternative(T) : Parser!(T)
37 {
38     Parser!(T)[] fParsers;
39 
40     this(Parser!(T)[] parsers...)
41     {
42         fParsers = parsers.dup;
43     }
44 
45     override ParseResult!(T) parse(T[] s)
46     {
47         foreach (parser; fParsers)
48         {
49             auto res = parser.parse(s);
50             if (res.success)
51             {
52                 return transform(res);
53             }
54         }
55         return ParseResult!(T).error("or did not match");
56     }
57 }
58 
59 /// convenient function
60 Parser!(T) or(T)(Parser!(T)[] parsers...)
61 {
62     return new Alternative!(T)(parsers);
63 }
64 
65 /// showing off the or dsl
66 @("or dsl") unittest
67 {
68     import unit_threaded;
69 
70     auto parser = or(match("a"), match("b"), match("c"));
71     parser.parse("a").success.shouldBeTrue;
72     parser.parse("b").success.shouldBeTrue;
73     parser.parse("c").success.shouldBeTrue;
74 }
75 
76 /// parser for blockcomments
77 static class BlockComment(T) : Parser!(T)
78 {
79     T[] fStart;
80     T[] fEnd;
81     bool fCollect;
82     this(T[] startString, T[] endString, bool collect = true)
83     {
84         fStart = startString;
85         fEnd = endString;
86         fCollect = collect;
87     }
88 
89     bool startsWith(T[] aInput, ulong startIdx, T[] expected)
90     {
91         T[] slice = aInput[startIdx .. $];
92         if (slice.length < expected.length)
93         {
94             return false;
95         }
96         for (int i = 0; i < expected.length; i++)
97         {
98             if (slice[i] != expected[i])
99             {
100                 return false;
101             }
102         }
103         return true;
104     }
105 
106     override ParseResult!(T) parse(T[] s)
107     {
108         if (startsWith(s, 0, fStart))
109         {
110             auto l = fStart.length;
111             for (auto i = l; i < s.length; i++)
112             {
113                 if (startsWith(s, i, fEnd))
114                 {
115                     auto lastIdx = i + fEnd.length;
116                     auto rest = s[lastIdx .. $];
117                     if (fCollect)
118                     {
119                         auto matched = s[0 .. lastIdx];
120                         return transform(success(rest, matched));
121                     }
122                     else
123                     {
124                         return success(rest);
125                     }
126                 }
127             }
128             return ParseResult!(T).error("");
129         }
130         else
131         {
132             return ParseResult!(T).error("");
133         }
134     }
135 }
136 
137 /// convenient function
138 Parser!(T) blockComment(T)(T[] startString, T[] endString, bool collect = true)
139 {
140     return new BlockComment!(T)(startString, endString, collect);
141 }
142 
143 /// blockComment can collect the comment itself
144 @("block comment - catch comment") unittest
145 {
146     import unit_threaded;
147 
148     auto parser = blockComment("/*", "*/", true);
149     auto res = parser.parseAll("/*abc*/");
150     res.success.shouldBeTrue;
151     res.fResults[0].shouldEqual("/*abc*/");
152 }
153 
154 /// blockComment can also throw the comment away
155 @("block comment - discard comment") unittest
156 {
157     import unit_threaded;
158 
159     auto parser = blockComment("/*", "*/", false);
160     auto res = parser.parseAll("/*abc*/");
161     res.success.shouldBeTrue;
162     res.fResults.length.shouldEqual(0);
163 }
164 
165 /++ example of an expression parser.
166  + expr -> term { + term }
167  + term -> factor { * factor }
168  + factor -> number | ( expr )
169  +/
170 static class ExprParser
171 {
172     Parser!(immutable(char)) lazyExpr()
173     {
174         return lazyParser(&expr);
175     }
176 
177     Parser!(immutable(char)) expr()
178     {
179         auto add = (term() ~ match("+", false) ~ term()) ^^ (input) {
180             return variantArray(input[0] + input[1]);
181         };
182         return add | term();
183     }
184 
185     Parser!(immutable(char)) term()
186     {
187         auto mult = (factor() ~ match("*", false) ~ factor()) ^^ (input) {
188             return variantArray(input[0] * input[1]);
189         };
190         return mult | factor();
191     }
192 
193     Parser!(immutable(char)) factor()
194     {
195         auto exprWithParens = match("(", false) ~ lazyExpr() ~ match(")", false);
196         return number!(immutable(char))() | exprWithParens;
197     }
198 }
199 
200 /// a simple expression parser
201 @("expression example") unittest
202 {
203     import unit_threaded;
204 
205     auto parser = new ExprParser;
206     auto p = parser.expr();
207     auto res = p.parse("1+2*3");
208     res.success.shouldBeTrue;
209     res.results[0].get!double.shouldEqual(7);
210 
211     res = p.parse("(1+2)*3");
212     res.success.shouldBeTrue;
213     res.results[0].get!double.shouldEqual(9);
214 }
215 
216 /// parser for parsing ints
217 static class Integer : Regex
218 {
219     this()
220     {
221         import std.conv;
222 
223         super(r"\d+") ^^ (input) {
224             return variantArray(input[0].get!string.to!int);
225         };
226     }
227 }
228 
229 /// convenience function to create an integer parser
230 Parser!(T) integer(T)()
231 {
232     return new Integer;
233 }
234 
235 /// unittests for integer
236 @("integer") unittest
237 {
238     import unit_threaded;
239 
240     auto parser = integer!(immutable(char));
241     auto res = parser.parse("123");
242     res.success.shouldBeTrue;
243     res.results[0].shouldEqual(123);
244 }
245 
246 /++
247  + convenient function to build a parser that kills the result of
248  + another parser e.g. killResults(match("a")) succeeds, but returns
249  + an empty result
250  +/
251 Parser!(T) killResults(T)(Parser!(T) parser)
252 {
253     return parser ^^ (Variant[] input) { Variant[] res; return res; };
254 }
255 
256 /// unittests for kill results
257 @("killResults") unittest
258 {
259     import unit_threaded;
260 
261     auto res = killResults(match("a")).parse("a");
262     res.success.shouldBeTrue;
263     res.results.length.shouldEqual(0);
264 }
265 
266 /++
267  + this parser is needed to build recursive parser hierarchies.
268  + look for expression.d for a more realistic example than in the unittest
269  +/
270 class Lazy(T) : Parser!(T)
271 {
272     Parser!(T) delegate() fCallable;
273 
274     this(Parser!(T) delegate() parser)
275     {
276         assert(parser != null);
277         fCallable = parser;
278     }
279 
280     this(Parser!(T) function() parser)
281     {
282         assert(parser != null);
283         fCallable = toDelegate(parser);
284     }
285 
286     override ParseResult!(T) parse(T[] s)
287     {
288         auto parser = fCallable();
289         return transform(parser.parse(s));
290     }
291 
292 }
293 
294 /// convenient function to instantiate a lazy parser with a delegate
295 Parser!(T) lazyParser(T)(Parser!(T) delegate() parser)
296 {
297     return new Lazy!(T)(parser);
298 }
299 
300 /// convenient function to instantiate a lazy parser with a function
301 Parser!(T) lazyParser(T)(Parser!(T) function() parser)
302 {
303     return new Lazy!(T)(parser);
304 }
305 
306 /// unittest to show the simplest usage of lazy
307 @("lazy") unittest
308 {
309     import unit_threaded;
310 
311     // endless -> a | a opt(endless)
312     struct Endless
313     {
314         static Parser!(immutable(char)) lazyEndless()
315         {
316             return lazyParser!(immutable(char))(&parser);
317         }
318 
319         static Parser!(immutable(char)) parser()
320         {
321             return match("a") ~ (-(lazyEndless()));
322         }
323     }
324 
325     auto p = Endless.parser();
326     auto res = p.parse("aa");
327     res.success.shouldBeTrue;
328     res.results.shouldEqual(["a", "a"]);
329 
330     res = p.parse("aab");
331     res.success.shouldBeTrue;
332     res.results.shouldEqual(["a", "a"]);
333     res.rest.shouldEqual("b");
334 }
335 
336 /// class for matching an array exactly
337 class Match(T) : Parser!(T)
338 {
339     T[] fExpected;
340     bool fCollect;
341     this(T[] expected, bool collect = true)
342     {
343         fExpected = expected;
344         fCollect = collect;
345     }
346 
347     bool startsWith(T[] aInput, T[] expected)
348     {
349         if (aInput.length < expected.length)
350         {
351             return false;
352         }
353         for (int i = 0; i < expected.length; i++)
354         {
355             if (aInput[i] != expected[i])
356             {
357                 return false;
358             }
359         }
360         return true;
361     }
362 
363     override ParseResult!(T) parse(T[] s)
364     {
365         if (startsWith(s, fExpected))
366         {
367             auto rest = s[fExpected.length .. $];
368             if (fCollect)
369             {
370                 return transform(success(rest, fExpected));
371             }
372             else
373             {
374                 return success(rest);
375             }
376         }
377         else
378         {
379             return ParseResult!(T).error(""); //"Expected: '" ~ fExpected ~ "' but got '" ~ s ~ "'");
380         }
381     }
382 }
383 
384 /// convenient function to instantiate a matcher
385 Parser!(T) match(T)(T[] s, bool collect = true)
386 {
387     return new Match!(T)(s, collect);
388 }
389 
390 /// matching a string
391 @("match") unittest
392 {
393     import unit_threaded;
394 
395     auto parser = match("test");
396     auto res = parser.parseAll("test");
397 
398     res.success.shouldBeTrue;
399     res.rest.length.shouldEqual(0);
400 }
401 
402 @("match 2") unittest
403 {
404     import unit_threaded;
405 
406     auto parser = match("test");
407     auto res = parser.parse("test2");
408     res.success.shouldBeTrue;
409     res.rest.shouldEqual("2");
410 
411     res = parser.parseAll("test2");
412     res.success.shouldBeFalse;
413 }
414 
415 /// transform match result
416 @("match + transform") unittest
417 {
418     import unit_threaded;
419 
420     auto parser = match("test") ^^ (objects) {
421         auto res = objects;
422         if (objects[0] == "test")
423         {
424             res[0] = "super";
425         }
426         return objects;
427     };
428     auto res = parser.parse("test");
429     res.success.shouldBeTrue;
430     res.results[0].shouldEqual("super");
431 }
432 
433 @("match - discard") unittest
434 {
435     import unit_threaded;
436 
437     auto parser = match("test", false);
438     auto res = parser.parseAll("test");
439     res.success.shouldBeTrue;
440     res.results.length.shouldEqual(0);
441 }
442 
443 @("match on arrays") unittest
444 {
445     import unit_threaded;
446 
447     auto parser = match([1, 2, 3]);
448     auto res = parser.parseAll([1, 2, 3]);
449     res.success.shouldBeTrue;
450     res.results.length.shouldEqual(1);
451     res.results[0].shouldEqual([1, 2, 3]);
452 }
453 
454 /// convenient function to instantiate a number parser
455 Parser!(T) number(T)()
456 {
457     return new Regex(r"[-+]?[0-9]*\.?[0-9]+") ^^ (Variant[] input) {
458         auto output = appender!(Variant[])();
459         foreach (Variant o; input)
460         {
461             string s = o.get!(string);
462             double h = std.conv.to!(double)(s);
463             Variant v = h;
464             output.put(v);
465         }
466         return output.data;
467     };
468 }
469 
470 /// unittests for number parser
471 @("number parser") unittest
472 {
473     import unit_threaded;
474 
475     auto parser = number!(immutable(char))();
476     auto res = parser.parse("123.123");
477     res.success.shouldBeTrue;
478     res.results[0].shouldEqual(123.123);
479 }
480 
481 /// class for matching something optional
482 class Optional(T) : Parser!(T)
483 {
484     Parser!(T) fParser;
485 
486     this(Parser!(T) parser)
487     {
488         fParser = parser;
489     }
490 
491     override ParseResult!(T) parse(T[] s)
492     {
493         auto res = fParser.parse(s);
494         if (!res.success)
495         {
496             return success(s);
497         }
498         else
499         {
500             return res;
501         }
502     }
503 }
504 
505 /// unittests to show the usage of OptionalParser and its dsl '-'
506 @("optional and dsl") unittest
507 {
508     import unit_threaded;
509 
510     auto abc = match("abc");
511     auto opt = -abc;
512     auto res = opt.parse("abc");
513     res.success.shouldBeTrue;
514     res.results.length.shouldEqual(1);
515     res.results[0].shouldEqual("abc");
516     res.rest.length.shouldEqual(0);
517 }
518 
519 /// unittest to show optional in action.
520 @("optional") unittest
521 {
522     import unit_threaded;
523 
524     auto abc = match("abc");
525     auto opt = -abc;
526     auto res = opt.parse("efg");
527     auto withoutOptional = abc.parse("efg");
528     withoutOptional.success.shouldBeFalse;
529     res.success.shouldBeTrue;
530     res.results.length.shouldEqual(0);
531     res.rest.shouldEqual("efg");
532 }
533 
534 /// parse a number with or without sign
535 @("parse number with or without +") unittest
536 {
537     import unit_threaded;
538 
539     auto sign = match("+");
540     auto value = match("1");
541     auto test = (-sign) ~ value;
542     auto resWithSign = test.parse("+1");
543     resWithSign.success.shouldBeTrue;
544     resWithSign.results.length.shouldEqual(2);
545     auto resWithoutSign = test.parse("1");
546     resWithoutSign.success.shouldBeTrue;
547 }
548 
549 /++
550  + parser for regular expressions
551  + a successful parse step returns all captures in an array
552  +/
553 class Regex : Parser!(immutable(char))
554 {
555     string fRegex;
556     bool fCollect;
557     this(string regex, bool collect = true)
558     {
559         fRegex = regex;
560         fCollect = collect;
561     }
562 
563     override ParseResult!(immutable(char)) parse(string s)
564     {
565         import std.regex;
566 
567         auto res = std.regex.match(s, std.regex.regex(fRegex));
568         if (res.empty())
569         {
570             return ParseResult!(immutable(char)).error(s ~ "did not match " ~ fRegex);
571         }
572         else if (res.pre.length > 0)
573         {
574             return ParseResult!(immutable(char)).error(s ~ "did not match " ~ fRegex);
575         }
576         else
577         {
578             if (fCollect)
579             {
580                 auto results = appender!(Variant[])();
581                 foreach (c; res.captures)
582                 {
583                     Variant v = c;
584                     results.put(v);
585                 }
586                 return transform(ParseResult!(immutable(char)).ok(res.post, results.data));
587             }
588             else
589             {
590                 return success(res.post);
591             }
592         }
593     }
594 }
595 
596 /// convenient function to instantiate a regexparser
597 Parser!(T) regex(T)(T[] s, bool collect = true)
598 {
599     return new Regex(s, collect);
600 }
601 
602 /// regexParser
603 @("regex parser") unittest
604 {
605     import unit_threaded;
606 
607     auto res = regex("(a)(.)(c)").parse("abcd");
608     res.success.shouldBeTrue;
609     res.results.shouldEqual(["abc", "a", "b", "c"]);
610     res.rest.shouldEqual("d");
611 }
612 
613 /// regexParser works from the start of the input
614 @("regex parser parses from start") unittest
615 {
616     import unit_threaded;
617 
618     auto res = regex("abc").parse("babc");
619     res.success.shouldBeFalse;
620 }
621 
622 /// class for matching repetitions
623 static class Repetition(T) : Parser!(T)
624 {
625     Parser!(T) fToRepeat;
626     this(Parser!(T) toRepeat)
627     {
628         fToRepeat = toRepeat;
629     }
630 
631     override ParseResult!(T) parse(T[] s)
632     {
633         Variant[] results;
634         auto rest = s;
635         while (true)
636         {
637             auto res = fToRepeat.parse(rest);
638             if (res.success)
639             {
640                 rest = res.rest;
641                 results = results ~ res.results;
642             }
643             else
644             {
645                 break;
646             }
647         }
648         return transform(ParseResult!(T).ok(rest, results));
649     }
650 }
651 
652 /// unittest for repetition
653 @("repetition more than one") unittest
654 {
655     import unit_threaded;
656 
657     auto parser = *match("a");
658     auto res = parser.parse("aa");
659     res.success.shouldBeTrue;
660     res.rest.shouldEqual("");
661     res.results.length.shouldEqual(2);
662 }
663 
664 @("repetition none") unittest
665 {
666     import unit_threaded;
667 
668     auto parser = *match("a");
669     auto res = parser.parse("b");
670     res.success.shouldBeTrue;
671     res.rest.shouldEqual("b");
672 }
673 
674 @("repetition with rest") unittest
675 {
676     import unit_threaded;
677 
678     auto parser = *match("a");
679     auto res = parser.parse("ab");
680     res.success.shouldBeTrue;
681     res.rest.shouldEqual("b");
682 }
683 
684 @("repetition with other parser") unittest
685 {
686     import unit_threaded;
687 
688     auto parser = *(match("+") ~ match("-"));
689     auto res = parser.parse("+-+-+");
690     res.success.shouldBeTrue;
691     res.rest.shouldEqual("+");
692 }
693 
694 @("repetition discarding result") unittest
695 {
696     import unit_threaded;
697 
698     auto parser = *match("a", false);
699     auto res = parser.parse("aaaa");
700     res.success.shouldBeTrue;
701     res.rest.length.shouldEqual(0);
702     res.results.length.shouldEqual(0);
703 }
704 
705 @("repetition transforming result") unittest
706 {
707     import unit_threaded;
708 
709     auto parser = (*match("a")) ^^ (input) { return variantArray(input.length); };
710     auto suc = parser.parseAll("aaaaa");
711     suc.success.shouldBeTrue;
712     suc.results.length.shouldEqual(1);
713     suc.results[0].get!(ulong).shouldEqual(5);
714 }
715 
716 /// class for matching sequences
717 class Sequence(T) : Parser!(T)
718 {
719     Parser!(T)[] fParsers;
720     this(Parser!(T)[] parsers...)
721     {
722         fParsers = parsers.dup;
723     }
724 
725     override ParseResult!(T) parse(T[] s)
726     {
727         auto resultObjects = appender!(Variant[])();
728         T[] h = s;
729         foreach (parser; fParsers)
730         {
731             auto res = parser.parse(h);
732             if (res.success)
733             {
734                 h = res.rest;
735                 resultObjects.put(res.results);
736             }
737             else
738             {
739                 return res;
740             }
741         }
742         return transform(ParseResult!(T).ok(h, resultObjects.data));
743     }
744 }
745 
746 /// convenient function
747 Parser!(T) sequence(T)(Parser!(T)[] parsers...)
748 {
749     return new Sequence!(T)(parsers);
750 }
751 
752 /// unittests showing usage of sequence parser and dsl '~'
753 @("sequence") unittest
754 {
755     import unit_threaded;
756 
757     auto parser = match("a") ~ match("b");
758     auto res = parser.parse("ab");
759     res.success.shouldBeTrue;
760     res.results.length.shouldEqual(2);
761 }
762 
763 @("sequence and rest") unittest
764 {
765     import unit_threaded;
766 
767     auto parser = match("a") ~ "b".match;
768     auto res = parser.parse("abc");
769     res.success.shouldBeTrue;
770     res.rest.shouldEqual("c");
771 }
772 
773 @("sequence fails") unittest
774 {
775     import unit_threaded;
776 
777     auto parser = match("a") ~ match("b");
778     auto res = parser.parse("ac");
779 
780     res.success.shouldBeFalse;
781 }
782 
783 @("sequence with discard result") unittest
784 {
785     import unit_threaded;
786 
787     auto parser = match("a", false) ~ match("b");
788     auto res = parser.parse("ab");
789     res.success.shouldBeTrue;
790     res.results.length.shouldEqual(1);
791 }
792 
793 @("sequence with transformation") unittest
794 {
795     auto parser = (match("a") ~ match("b")) ^^ (Variant[] result) {
796         string totalString;
797         foreach (Variant o; result)
798         {
799             if (o.type == typeid(string))
800             {
801                 totalString ~= o.get!(string);
802             }
803         }
804 
805         Variant v = totalString;
806         return [v];
807     };
808 
809     auto suc = parser.parse("ab");
810     assert(suc.success);
811     assert(suc.results.length == 1);
812     assert(suc.results[0] == "ab");
813 }
814 
815 @("sequence and optional") unittest
816 {
817     import unit_threaded;
818 
819     auto ab = -match("a") ~ match("b");
820     auto res = ab.parse("ab");
821     res.success.shouldBeTrue;
822     res = ab.parse("b");
823     res.success.shouldBeTrue;
824     res = ab.parse("c");
825     res.success.shouldBeFalse;
826     res = ab.parse("");
827     res.success.shouldBeFalse;
828 }
829 
830 @("sequence api not dsl") unittest
831 {
832     import unit_threaded;
833 
834     auto ab = sequence(match("a"), match("b"));
835     auto res = ab.parse("ab");
836     res.success.shouldBeTrue;
837 }
838 
839 @("sequence api") unittest
840 {
841     import unit_threaded;
842 
843     auto ab = sequence(match("a"), match("b"), match("c"));
844     auto res = ab.parse("abc");
845     res.success.shouldBeTrue;
846 }