1 /++ 2 + Copyright: Copyright © 2015, Christian Köstlin 3 + License: MIT 4 + Authors: Christian Koestlin, Christian Köstlin 5 +/ 6 7 module pc4d.parsers; 8 9 import pc4d.parser; 10 import std.array; 11 import std.conv; 12 import std.functional; 13 14 /// convenient function to instantiate a AlphaNumericParser 15 auto alnum(T)(bool collect = true) 16 { 17 return new Regex(r"-?\w[\w\d]*", collect) ^^ (data) { 18 return variantArray(data[0]); 19 }; 20 } 21 22 /// the alnum parser 23 @("alnum parser") unittest 24 { 25 import unit_threaded; 26 27 auto parser = alnum!(immutable(char))(); 28 auto res = parser.parseAll("-Aa1234"); 29 res.success.shouldBeTrue; 30 res.results[0].shouldEqual("-Aa1234"); 31 res = parser.parseAll("a1234"); 32 res.success.shouldBeTrue; 33 } 34 35 /// class for matching alternatives 36 class Alternative(T) : Parser!(T) 37 { 38 Parser!(T)[] fParsers; 39 40 this(Parser!(T)[] parsers...) 41 { 42 fParsers = parsers.dup; 43 } 44 45 override ParseResult!(T) parse(T[] s) 46 { 47 foreach (parser; fParsers) 48 { 49 auto res = parser.parse(s); 50 if (res.success) 51 { 52 return transform(res); 53 } 54 } 55 return ParseResult!(T).error("or did not match"); 56 } 57 } 58 59 /// convenient function 60 Parser!(T) or(T)(Parser!(T)[] parsers...) 61 { 62 return new Alternative!(T)(parsers); 63 } 64 65 /// showing off the or dsl 66 @("or dsl") unittest 67 { 68 import unit_threaded; 69 70 auto parser = or(match("a"), match("b"), match("c")); 71 parser.parse("a").success.shouldBeTrue; 72 parser.parse("b").success.shouldBeTrue; 73 parser.parse("c").success.shouldBeTrue; 74 } 75 76 /// parser for blockcomments 77 static class BlockComment(T) : Parser!(T) 78 { 79 T[] fStart; 80 T[] fEnd; 81 bool fCollect; 82 this(T[] startString, T[] endString, bool collect = true) 83 { 84 fStart = startString; 85 fEnd = endString; 86 fCollect = collect; 87 } 88 89 bool startsWith(T[] aInput, ulong startIdx, T[] expected) 90 { 91 T[] slice = aInput[startIdx .. $]; 92 if (slice.length < expected.length) 93 { 94 return false; 95 } 96 for (int i = 0; i < expected.length; i++) 97 { 98 if (slice[i] != expected[i]) 99 { 100 return false; 101 } 102 } 103 return true; 104 } 105 106 override ParseResult!(T) parse(T[] s) 107 { 108 if (startsWith(s, 0, fStart)) 109 { 110 auto l = fStart.length; 111 for (auto i = l; i < s.length; i++) 112 { 113 if (startsWith(s, i, fEnd)) 114 { 115 auto lastIdx = i + fEnd.length; 116 auto rest = s[lastIdx .. $]; 117 if (fCollect) 118 { 119 auto matched = s[0 .. lastIdx]; 120 return transform(success(rest, matched)); 121 } 122 else 123 { 124 return success(rest); 125 } 126 } 127 } 128 return ParseResult!(T).error(""); 129 } 130 else 131 { 132 return ParseResult!(T).error(""); 133 } 134 } 135 } 136 137 /// convenient function 138 Parser!(T) blockComment(T)(T[] startString, T[] endString, bool collect = true) 139 { 140 return new BlockComment!(T)(startString, endString, collect); 141 } 142 143 /// blockComment can collect the comment itself 144 @("block comment - catch comment") unittest 145 { 146 import unit_threaded; 147 148 auto parser = blockComment("/*", "*/", true); 149 auto res = parser.parseAll("/*abc*/"); 150 res.success.shouldBeTrue; 151 res.fResults[0].shouldEqual("/*abc*/"); 152 } 153 154 /// blockComment can also throw the comment away 155 @("block comment - discard comment") unittest 156 { 157 import unit_threaded; 158 159 auto parser = blockComment("/*", "*/", false); 160 auto res = parser.parseAll("/*abc*/"); 161 res.success.shouldBeTrue; 162 res.fResults.length.shouldEqual(0); 163 } 164 165 /++ example of an expression parser. 166 + expr -> term { + term } 167 + term -> factor { * factor } 168 + factor -> number | ( expr ) 169 +/ 170 static class ExprParser 171 { 172 Parser!(immutable(char)) lazyExpr() 173 { 174 return lazyParser(&expr); 175 } 176 177 Parser!(immutable(char)) expr() 178 { 179 auto add = (term() ~ match("+", false) ~ term()) ^^ (input) { 180 return variantArray(input[0] + input[1]); 181 }; 182 return add | term(); 183 } 184 185 Parser!(immutable(char)) term() 186 { 187 auto mult = (factor() ~ match("*", false) ~ factor()) ^^ (input) { 188 return variantArray(input[0] * input[1]); 189 }; 190 return mult | factor(); 191 } 192 193 Parser!(immutable(char)) factor() 194 { 195 auto exprWithParens = match("(", false) ~ lazyExpr() ~ match(")", false); 196 return number!(immutable(char))() | exprWithParens; 197 } 198 } 199 200 /// a simple expression parser 201 @("expression example") unittest 202 { 203 import unit_threaded; 204 205 auto parser = new ExprParser; 206 auto p = parser.expr(); 207 auto res = p.parse("1+2*3"); 208 res.success.shouldBeTrue; 209 res.results[0].get!double.shouldEqual(7); 210 211 res = p.parse("(1+2)*3"); 212 res.success.shouldBeTrue; 213 res.results[0].get!double.shouldEqual(9); 214 } 215 216 /// parser for parsing ints 217 static class Integer : Regex 218 { 219 this() 220 { 221 import std.conv; 222 223 super(r"\d+") ^^ (input) { 224 return variantArray(input[0].get!string.to!int); 225 }; 226 } 227 } 228 229 /// convenience function to create an integer parser 230 Parser!(T) integer(T)() 231 { 232 return new Integer; 233 } 234 235 /// unittests for integer 236 @("integer") unittest 237 { 238 import unit_threaded; 239 240 auto parser = integer!(immutable(char)); 241 auto res = parser.parse("123"); 242 res.success.shouldBeTrue; 243 res.results[0].shouldEqual(123); 244 } 245 246 /++ 247 + convenient function to build a parser that kills the result of 248 + another parser e.g. killResults(match("a")) succeeds, but returns 249 + an empty result 250 +/ 251 Parser!(T) killResults(T)(Parser!(T) parser) 252 { 253 return parser ^^ (Variant[] input) { Variant[] res; return res; }; 254 } 255 256 /// unittests for kill results 257 @("killResults") unittest 258 { 259 import unit_threaded; 260 261 auto res = killResults(match("a")).parse("a"); 262 res.success.shouldBeTrue; 263 res.results.length.shouldEqual(0); 264 } 265 266 /++ 267 + this parser is needed to build recursive parser hierarchies. 268 + look for expression.d for a more realistic example than in the unittest 269 +/ 270 class Lazy(T) : Parser!(T) 271 { 272 Parser!(T) delegate() fCallable; 273 274 this(Parser!(T) delegate() parser) 275 { 276 assert(parser != null); 277 fCallable = parser; 278 } 279 280 this(Parser!(T) function() parser) 281 { 282 assert(parser != null); 283 fCallable = toDelegate(parser); 284 } 285 286 override ParseResult!(T) parse(T[] s) 287 { 288 auto parser = fCallable(); 289 return transform(parser.parse(s)); 290 } 291 292 } 293 294 /// convenient function to instantiate a lazy parser with a delegate 295 Parser!(T) lazyParser(T)(Parser!(T) delegate() parser) 296 { 297 return new Lazy!(T)(parser); 298 } 299 300 /// convenient function to instantiate a lazy parser with a function 301 Parser!(T) lazyParser(T)(Parser!(T) function() parser) 302 { 303 return new Lazy!(T)(parser); 304 } 305 306 /// unittest to show the simplest usage of lazy 307 @("lazy") unittest 308 { 309 import unit_threaded; 310 311 // endless -> a | a opt(endless) 312 struct Endless 313 { 314 static Parser!(immutable(char)) lazyEndless() 315 { 316 return lazyParser!(immutable(char))(&parser); 317 } 318 319 static Parser!(immutable(char)) parser() 320 { 321 return match("a") ~ (-(lazyEndless())); 322 } 323 } 324 325 auto p = Endless.parser(); 326 auto res = p.parse("aa"); 327 res.success.shouldBeTrue; 328 res.results.shouldEqual(["a", "a"]); 329 330 res = p.parse("aab"); 331 res.success.shouldBeTrue; 332 res.results.shouldEqual(["a", "a"]); 333 res.rest.shouldEqual("b"); 334 } 335 336 /// class for matching an array exactly 337 class Match(T) : Parser!(T) 338 { 339 T[] fExpected; 340 bool fCollect; 341 this(T[] expected, bool collect = true) 342 { 343 fExpected = expected; 344 fCollect = collect; 345 } 346 347 bool startsWith(T[] aInput, T[] expected) 348 { 349 if (aInput.length < expected.length) 350 { 351 return false; 352 } 353 for (int i = 0; i < expected.length; i++) 354 { 355 if (aInput[i] != expected[i]) 356 { 357 return false; 358 } 359 } 360 return true; 361 } 362 363 override ParseResult!(T) parse(T[] s) 364 { 365 if (startsWith(s, fExpected)) 366 { 367 auto rest = s[fExpected.length .. $]; 368 if (fCollect) 369 { 370 return transform(success(rest, fExpected)); 371 } 372 else 373 { 374 return success(rest); 375 } 376 } 377 else 378 { 379 return ParseResult!(T).error(""); //"Expected: '" ~ fExpected ~ "' but got '" ~ s ~ "'"); 380 } 381 } 382 } 383 384 /// convenient function to instantiate a matcher 385 Parser!(T) match(T)(T[] s, bool collect = true) 386 { 387 return new Match!(T)(s, collect); 388 } 389 390 /// matching a string 391 @("match") unittest 392 { 393 import unit_threaded; 394 395 auto parser = match("test"); 396 auto res = parser.parseAll("test"); 397 398 res.success.shouldBeTrue; 399 res.rest.length.shouldEqual(0); 400 } 401 402 @("match 2") unittest 403 { 404 import unit_threaded; 405 406 auto parser = match("test"); 407 auto res = parser.parse("test2"); 408 res.success.shouldBeTrue; 409 res.rest.shouldEqual("2"); 410 411 res = parser.parseAll("test2"); 412 res.success.shouldBeFalse; 413 } 414 415 /// transform match result 416 @("match + transform") unittest 417 { 418 import unit_threaded; 419 420 auto parser = match("test") ^^ (objects) { 421 auto res = objects; 422 if (objects[0] == "test") 423 { 424 res[0] = "super"; 425 } 426 return objects; 427 }; 428 auto res = parser.parse("test"); 429 res.success.shouldBeTrue; 430 res.results[0].shouldEqual("super"); 431 } 432 433 @("match - discard") unittest 434 { 435 import unit_threaded; 436 437 auto parser = match("test", false); 438 auto res = parser.parseAll("test"); 439 res.success.shouldBeTrue; 440 res.results.length.shouldEqual(0); 441 } 442 443 @("match on arrays") unittest 444 { 445 import unit_threaded; 446 447 auto parser = match([1, 2, 3]); 448 auto res = parser.parseAll([1, 2, 3]); 449 res.success.shouldBeTrue; 450 res.results.length.shouldEqual(1); 451 res.results[0].shouldEqual([1, 2, 3]); 452 } 453 454 /// convenient function to instantiate a number parser 455 Parser!(T) number(T)() 456 { 457 return new Regex(r"[-+]?[0-9]*\.?[0-9]+") ^^ (Variant[] input) { 458 auto output = appender!(Variant[])(); 459 foreach (Variant o; input) 460 { 461 string s = o.get!(string); 462 double h = std.conv.to!(double)(s); 463 Variant v = h; 464 output.put(v); 465 } 466 return output.data; 467 }; 468 } 469 470 /// unittests for number parser 471 @("number parser") unittest 472 { 473 import unit_threaded; 474 475 auto parser = number!(immutable(char))(); 476 auto res = parser.parse("123.123"); 477 res.success.shouldBeTrue; 478 res.results[0].shouldEqual(123.123); 479 } 480 481 /// class for matching something optional 482 class Optional(T) : Parser!(T) 483 { 484 Parser!(T) fParser; 485 486 this(Parser!(T) parser) 487 { 488 fParser = parser; 489 } 490 491 override ParseResult!(T) parse(T[] s) 492 { 493 auto res = fParser.parse(s); 494 if (!res.success) 495 { 496 return success(s); 497 } 498 else 499 { 500 return res; 501 } 502 } 503 } 504 505 /// unittests to show the usage of OptionalParser and its dsl '-' 506 @("optional and dsl") unittest 507 { 508 import unit_threaded; 509 510 auto abc = match("abc"); 511 auto opt = -abc; 512 auto res = opt.parse("abc"); 513 res.success.shouldBeTrue; 514 res.results.length.shouldEqual(1); 515 res.results[0].shouldEqual("abc"); 516 res.rest.length.shouldEqual(0); 517 } 518 519 /// unittest to show optional in action. 520 @("optional") unittest 521 { 522 import unit_threaded; 523 524 auto abc = match("abc"); 525 auto opt = -abc; 526 auto res = opt.parse("efg"); 527 auto withoutOptional = abc.parse("efg"); 528 withoutOptional.success.shouldBeFalse; 529 res.success.shouldBeTrue; 530 res.results.length.shouldEqual(0); 531 res.rest.shouldEqual("efg"); 532 } 533 534 /// parse a number with or without sign 535 @("parse number with or without +") unittest 536 { 537 import unit_threaded; 538 539 auto sign = match("+"); 540 auto value = match("1"); 541 auto test = (-sign) ~ value; 542 auto resWithSign = test.parse("+1"); 543 resWithSign.success.shouldBeTrue; 544 resWithSign.results.length.shouldEqual(2); 545 auto resWithoutSign = test.parse("1"); 546 resWithoutSign.success.shouldBeTrue; 547 } 548 549 /++ 550 + parser for regular expressions 551 + a successful parse step returns all captures in an array 552 +/ 553 class Regex : Parser!(immutable(char)) 554 { 555 string fRegex; 556 bool fCollect; 557 this(string regex, bool collect = true) 558 { 559 fRegex = regex; 560 fCollect = collect; 561 } 562 563 override ParseResult!(immutable(char)) parse(string s) 564 { 565 import std.regex; 566 567 auto res = std.regex.match(s, std.regex.regex(fRegex)); 568 if (res.empty()) 569 { 570 return ParseResult!(immutable(char)).error(s ~ "did not match " ~ fRegex); 571 } 572 else if (res.pre.length > 0) 573 { 574 return ParseResult!(immutable(char)).error(s ~ "did not match " ~ fRegex); 575 } 576 else 577 { 578 if (fCollect) 579 { 580 auto results = appender!(Variant[])(); 581 foreach (c; res.captures) 582 { 583 Variant v = c; 584 results.put(v); 585 } 586 return transform(ParseResult!(immutable(char)).ok(res.post, results.data)); 587 } 588 else 589 { 590 return success(res.post); 591 } 592 } 593 } 594 } 595 596 /// convenient function to instantiate a regexparser 597 Parser!(T) regex(T)(T[] s, bool collect = true) 598 { 599 return new Regex(s, collect); 600 } 601 602 /// regexParser 603 @("regex parser") unittest 604 { 605 import unit_threaded; 606 607 auto res = regex("(a)(.)(c)").parse("abcd"); 608 res.success.shouldBeTrue; 609 res.results.shouldEqual(["abc", "a", "b", "c"]); 610 res.rest.shouldEqual("d"); 611 } 612 613 /// regexParser works from the start of the input 614 @("regex parser parses from start") unittest 615 { 616 import unit_threaded; 617 618 auto res = regex("abc").parse("babc"); 619 res.success.shouldBeFalse; 620 } 621 622 /// class for matching repetitions 623 static class Repetition(T) : Parser!(T) 624 { 625 Parser!(T) fToRepeat; 626 this(Parser!(T) toRepeat) 627 { 628 fToRepeat = toRepeat; 629 } 630 631 override ParseResult!(T) parse(T[] s) 632 { 633 Variant[] results; 634 auto rest = s; 635 while (true) 636 { 637 auto res = fToRepeat.parse(rest); 638 if (res.success) 639 { 640 rest = res.rest; 641 results = results ~ res.results; 642 } 643 else 644 { 645 break; 646 } 647 } 648 return transform(ParseResult!(T).ok(rest, results)); 649 } 650 } 651 652 /// unittest for repetition 653 @("repetition more than one") unittest 654 { 655 import unit_threaded; 656 657 auto parser = *match("a"); 658 auto res = parser.parse("aa"); 659 res.success.shouldBeTrue; 660 res.rest.shouldEqual(""); 661 res.results.length.shouldEqual(2); 662 } 663 664 @("repetition none") unittest 665 { 666 import unit_threaded; 667 668 auto parser = *match("a"); 669 auto res = parser.parse("b"); 670 res.success.shouldBeTrue; 671 res.rest.shouldEqual("b"); 672 } 673 674 @("repetition with rest") unittest 675 { 676 import unit_threaded; 677 678 auto parser = *match("a"); 679 auto res = parser.parse("ab"); 680 res.success.shouldBeTrue; 681 res.rest.shouldEqual("b"); 682 } 683 684 @("repetition with other parser") unittest 685 { 686 import unit_threaded; 687 688 auto parser = *(match("+") ~ match("-")); 689 auto res = parser.parse("+-+-+"); 690 res.success.shouldBeTrue; 691 res.rest.shouldEqual("+"); 692 } 693 694 @("repetition discarding result") unittest 695 { 696 import unit_threaded; 697 698 auto parser = *match("a", false); 699 auto res = parser.parse("aaaa"); 700 res.success.shouldBeTrue; 701 res.rest.length.shouldEqual(0); 702 res.results.length.shouldEqual(0); 703 } 704 705 @("repetition transforming result") unittest 706 { 707 import unit_threaded; 708 709 auto parser = (*match("a")) ^^ (input) { return variantArray(input.length); }; 710 auto suc = parser.parseAll("aaaaa"); 711 suc.success.shouldBeTrue; 712 suc.results.length.shouldEqual(1); 713 suc.results[0].get!(ulong).shouldEqual(5); 714 } 715 716 /// class for matching sequences 717 class Sequence(T) : Parser!(T) 718 { 719 Parser!(T)[] fParsers; 720 this(Parser!(T)[] parsers...) 721 { 722 fParsers = parsers.dup; 723 } 724 725 override ParseResult!(T) parse(T[] s) 726 { 727 auto resultObjects = appender!(Variant[])(); 728 T[] h = s; 729 foreach (parser; fParsers) 730 { 731 auto res = parser.parse(h); 732 if (res.success) 733 { 734 h = res.rest; 735 resultObjects.put(res.results); 736 } 737 else 738 { 739 return res; 740 } 741 } 742 return transform(ParseResult!(T).ok(h, resultObjects.data)); 743 } 744 } 745 746 /// convenient function 747 Parser!(T) sequence(T)(Parser!(T)[] parsers...) 748 { 749 return new Sequence!(T)(parsers); 750 } 751 752 /// unittests showing usage of sequence parser and dsl '~' 753 @("sequence") unittest 754 { 755 import unit_threaded; 756 757 auto parser = match("a") ~ match("b"); 758 auto res = parser.parse("ab"); 759 res.success.shouldBeTrue; 760 res.results.length.shouldEqual(2); 761 } 762 763 @("sequence and rest") unittest 764 { 765 import unit_threaded; 766 767 auto parser = match("a") ~ "b".match; 768 auto res = parser.parse("abc"); 769 res.success.shouldBeTrue; 770 res.rest.shouldEqual("c"); 771 } 772 773 @("sequence fails") unittest 774 { 775 import unit_threaded; 776 777 auto parser = match("a") ~ match("b"); 778 auto res = parser.parse("ac"); 779 780 res.success.shouldBeFalse; 781 } 782 783 @("sequence with discard result") unittest 784 { 785 import unit_threaded; 786 787 auto parser = match("a", false) ~ match("b"); 788 auto res = parser.parse("ab"); 789 res.success.shouldBeTrue; 790 res.results.length.shouldEqual(1); 791 } 792 793 @("sequence with transformation") unittest 794 { 795 auto parser = (match("a") ~ match("b")) ^^ (Variant[] result) { 796 string totalString; 797 foreach (Variant o; result) 798 { 799 if (o.type == typeid(string)) 800 { 801 totalString ~= o.get!(string); 802 } 803 } 804 805 Variant v = totalString; 806 return [v]; 807 }; 808 809 auto suc = parser.parse("ab"); 810 assert(suc.success); 811 assert(suc.results.length == 1); 812 assert(suc.results[0] == "ab"); 813 } 814 815 @("sequence and optional") unittest 816 { 817 import unit_threaded; 818 819 auto ab = -match("a") ~ match("b"); 820 auto res = ab.parse("ab"); 821 res.success.shouldBeTrue; 822 res = ab.parse("b"); 823 res.success.shouldBeTrue; 824 res = ab.parse("c"); 825 res.success.shouldBeFalse; 826 res = ab.parse(""); 827 res.success.shouldBeFalse; 828 } 829 830 @("sequence api not dsl") unittest 831 { 832 import unit_threaded; 833 834 auto ab = sequence(match("a"), match("b")); 835 auto res = ab.parse("ab"); 836 res.success.shouldBeTrue; 837 } 838 839 @("sequence api") unittest 840 { 841 import unit_threaded; 842 843 auto ab = sequence(match("a"), match("b"), match("c")); 844 auto res = ab.parse("abc"); 845 res.success.shouldBeTrue; 846 }