1 /++ 2 + Copyright: Copyright © 2015, Christian Köstlin 3 + License: MIT 4 + Authors: Christian Koestlin, Christian Köstlin 5 +/ 6 7 module pc4d.parsers; 8 9 version (unittest) import unit_threaded; 10 11 import pc4d.parser; 12 import std.array; 13 import std.conv; 14 import std.functional; 15 16 /// convenient function to instantiate a AlphaNumericParser 17 auto alnum(T)(bool collect = true) 18 { 19 return new Regex(`-?\w[\w\d]*`, collect) ^^ (data) { 20 return variantArray(data[0]); 21 }; 22 } 23 24 /// the alnum parser 25 @("alnum parser") unittest 26 { 27 auto parser = alnum!(immutable(char))(); 28 auto res = parser.parseAll("-Aa1234"); 29 res.success.should == true; 30 res.results[0].should == "-Aa1234"; 31 res = parser.parseAll("a1234"); 32 res.success.should == true; 33 } 34 35 /// class for matching alternatives 36 class Alternative(T) : Parser!(T) 37 { 38 Parser!(T)[] fParsers; 39 40 this(Parser!(T)[] parsers...) 41 { 42 fParsers = parsers.dup; 43 } 44 45 override ParseResult!(T) parse(T[] s) 46 { 47 foreach (parser; fParsers) 48 { 49 auto res = parser.parse(s); 50 if (res.success) 51 { 52 return transform(res); 53 } 54 } 55 return ParseResult!(T).error("or did not match"); 56 } 57 } 58 59 /// convenient function 60 Parser!(T) or(T)(Parser!(T)[] parsers...) 61 { 62 return new Alternative!(T)(parsers); 63 } 64 65 /// showing off the or dsl 66 @("or dsl") unittest 67 { 68 auto parser = or(match("a"), match("b"), match("c")); 69 parser.parse("a").success.should == true; 70 parser.parse("b").success.should == true; 71 parser.parse("c").success.should == true; 72 } 73 74 /// parser for blockcomments 75 static class BlockComment(T) : Parser!(T) 76 { 77 T[] fStart; 78 T[] fEnd; 79 bool fCollect; 80 this(T[] startString, T[] endString, bool collect = true) 81 { 82 fStart = startString; 83 fEnd = endString; 84 fCollect = collect; 85 } 86 87 bool startsWith(T[] aInput, ulong startIdx, T[] expected) 88 { 89 T[] slice = aInput[startIdx .. $]; 90 if (slice.length < expected.length) 91 { 92 return false; 93 } 94 for (int i = 0; i < expected.length; i++) 95 { 96 if (slice[i] != expected[i]) 97 { 98 return false; 99 } 100 } 101 return true; 102 } 103 104 override ParseResult!(T) parse(T[] s) 105 { 106 if (startsWith(s, 0, fStart)) 107 { 108 auto l = fStart.length; 109 for (auto i = l; i < s.length; i++) 110 { 111 if (startsWith(s, i, fEnd)) 112 { 113 auto lastIdx = i + fEnd.length; 114 auto rest = s[lastIdx .. $]; 115 if (fCollect) 116 { 117 auto matched = s[0 .. lastIdx]; 118 return transform(success(rest, matched)); 119 } 120 else 121 { 122 return success(rest); 123 } 124 } 125 } 126 return ParseResult!(T).error(""); 127 } 128 else 129 { 130 return ParseResult!(T).error(""); 131 } 132 } 133 } 134 135 /// convenient function 136 Parser!(T) blockComment(T)(T[] startString, T[] endString, bool collect = true) 137 { 138 return new BlockComment!(T)(startString, endString, collect); 139 } 140 141 /// blockComment can collect the comment itself 142 @("block comment - catch comment") unittest 143 { 144 auto parser = blockComment("/*", "*/", true); 145 auto res = parser.parseAll("/*abc*/"); 146 res.success.should == true; 147 res.fResults[0].should == "/*abc*/"; 148 } 149 150 /// blockComment can also throw the comment away 151 @("block comment - discard comment") unittest 152 { 153 auto parser = blockComment("/*", "*/", false); 154 auto res = parser.parseAll("/*abc*/"); 155 res.success.should == true; 156 res.fResults.length.should == 0; 157 } 158 159 /++ example of an expression parser. 160 + expr -> term { + term } 161 + term -> factor { * factor } 162 + factor -> number | ( expr ) 163 +/ 164 static class ExprParser 165 { 166 Parser!(immutable(char)) lazyExpr() 167 { 168 return lazyParser(&expr); 169 } 170 171 Parser!(immutable(char)) expr() 172 { 173 auto add = (term() ~ match("+", false) ~ term()) ^^ (input) { 174 return variantArray(input[0] + input[1]); 175 }; 176 return add | term(); 177 } 178 179 Parser!(immutable(char)) term() 180 { 181 auto mult = (factor() ~ match("*", false) ~ factor()) ^^ (input) { 182 return variantArray(input[0] * input[1]); 183 }; 184 return mult | factor(); 185 } 186 187 Parser!(immutable(char)) factor() 188 { 189 auto exprWithParens = match("(", false) ~ lazyExpr() ~ match(")", false); 190 return number!(immutable(char))() | exprWithParens; 191 } 192 } 193 194 /// a simple expression parser 195 @("expression example") unittest 196 { 197 auto parser = new ExprParser; 198 auto p = parser.expr(); 199 auto res = p.parse("1+2*3"); 200 res.success.should == true; 201 res.results[0].get!double.should == 7; 202 203 res = p.parse("(1+2)*3"); 204 res.success.should == true; 205 res.results[0].get!double.should == 9; 206 } 207 208 /// parser for parsing ints 209 static class Integer : Regex 210 { 211 this() 212 { 213 import std.conv; 214 215 super(r"\d+") ^^ (input) { 216 return variantArray(input[0].get!string 217 .to!int); 218 }; 219 } 220 } 221 222 /// convenience function to create an integer parser 223 Parser!(T) integer(T)() 224 { 225 return new Integer; 226 } 227 228 /// unittests for integer 229 @("integer") unittest 230 { 231 auto parser = integer!(immutable(char)); 232 auto res = parser.parse("123"); 233 res.success.should == true; 234 res.results[0].should == 123; 235 } 236 237 /++ 238 + convenient function to build a parser that kills the result of 239 + another parser e.g. killResults(match("a")) succeeds, but returns 240 + an empty result 241 +/ 242 Parser!(T) killResults(T)(Parser!(T) parser) 243 { 244 return parser ^^ (Variant[] input) { Variant[] res; return res; }; 245 } 246 247 /// unittests for kill results 248 @("killResults") unittest 249 { 250 auto res = killResults(match("a")).parse("a"); 251 res.success.should == true; 252 res.results.length.should == 0; 253 } 254 255 /++ 256 + this parser is needed to build recursive parser hierarchies. 257 + look for expression.d for a more realistic example than in the unittest 258 +/ 259 class Lazy(T) : Parser!(T) 260 { 261 Parser!(T) delegate() fCallable; 262 263 this(Parser!(T) delegate() parser) 264 { 265 assert(parser != null); 266 fCallable = parser; 267 } 268 269 this(Parser!(T) function() parser) 270 { 271 assert(parser != null); 272 fCallable = toDelegate(parser); 273 } 274 275 override ParseResult!(T) parse(T[] s) 276 { 277 auto parser = fCallable(); 278 return transform(parser.parse(s)); 279 } 280 281 } 282 283 /// convenient function to instantiate a lazy parser with a delegate 284 Parser!(T) lazyParser(T)(Parser!(T) delegate() parser) 285 { 286 return new Lazy!(T)(parser); 287 } 288 289 /// convenient function to instantiate a lazy parser with a function 290 Parser!(T) lazyParser(T)(Parser!(T) function() parser) 291 { 292 return new Lazy!(T)(parser); 293 } 294 295 /// unittest to show the simplest usage of lazy 296 @("lazy") unittest 297 { 298 // endless -> a | a opt(endless) 299 struct Endless 300 { 301 static Parser!(immutable(char)) lazyEndless() 302 { 303 return lazyParser!(immutable(char))(&parser); 304 } 305 306 static Parser!(immutable(char)) parser() 307 { 308 return match("a") ~ (-(lazyEndless())); 309 } 310 } 311 312 auto p = Endless.parser(); 313 auto res = p.parse("aa"); 314 res.success.should == true; 315 res.results.should == ["a", "a"]; 316 317 res = p.parse("aab"); 318 res.success.should == true; 319 res.results.should == ["a", "a"]; 320 res.rest.should == "b"; 321 } 322 323 /// class for matching an array exactly 324 class Match(T) : Parser!(T) 325 { 326 T[] fExpected; 327 bool fCollect; 328 this(T[] expected, bool collect = true) 329 { 330 fExpected = expected; 331 fCollect = collect; 332 } 333 334 bool startsWith(T[] aInput, T[] expected) 335 { 336 if (aInput.length < expected.length) 337 { 338 return false; 339 } 340 for (int i = 0; i < expected.length; i++) 341 { 342 if (aInput[i] != expected[i]) 343 { 344 return false; 345 } 346 } 347 return true; 348 } 349 350 override ParseResult!(T) parse(T[] s) 351 { 352 if (startsWith(s, fExpected)) 353 { 354 auto rest = s[fExpected.length .. $]; 355 if (fCollect) 356 { 357 return transform(success(rest, fExpected)); 358 } 359 else 360 { 361 return success(rest); 362 } 363 } 364 else 365 { 366 return ParseResult!(T).error(""); //"Expected: '" ~ fExpected ~ "' but got '" ~ s ~ "'"); 367 } 368 } 369 } 370 371 /// convenient function to instantiate a matcher 372 Parser!(T) match(T)(T[] s, bool collect = true) 373 { 374 return new Match!(T)(s, collect); 375 } 376 377 /// matching a string 378 @("match") unittest 379 { 380 auto parser = match("test"); 381 auto res = parser.parseAll("test"); 382 383 res.success.should == true; 384 res.rest.length.should == 0; 385 } 386 387 @("match 2") unittest 388 { 389 auto parser = match("test"); 390 auto res = parser.parse("test2"); 391 res.success.should == true; 392 res.rest.should == "2"; 393 394 res = parser.parseAll("test2"); 395 res.success.should == false; 396 } 397 398 /// transform match result 399 @("match + transform") unittest 400 { 401 auto parser = match("test") ^^ (objects) { 402 auto res = objects; 403 if (objects[0] == "test") 404 { 405 res[0] = "super"; 406 } 407 return objects; 408 }; 409 auto res = parser.parse("test"); 410 res.success.should == true; 411 res.results[0].should == "super"; 412 } 413 414 @("match - discard") unittest 415 { 416 auto parser = match("test", false); 417 auto res = parser.parseAll("test"); 418 res.success.should == true; 419 res.results.length.should == 0; 420 } 421 422 @("match on arrays") unittest 423 { 424 auto parser = match([1, 2, 3]); 425 auto res = parser.parseAll([1, 2, 3]); 426 res.success.should == true; 427 res.results.length.should == 1; 428 res.results[0].should == [1, 2, 3]; 429 } 430 431 /// convenient function to instantiate a number parser 432 Parser!(T) number(T)() 433 { 434 return new Regex(r"[-+]?[0-9]*\.?[0-9]+") ^^ (Variant[] input) { 435 auto output = appender!(Variant[])(); 436 foreach (Variant o; input) 437 { 438 string s = o.get!(string); 439 double h = std.conv.to!(double)(s); 440 Variant v = h; 441 output.put(v); 442 } 443 return output.data; 444 }; 445 } 446 447 /// unittests for number parser 448 @("number parser") unittest 449 { 450 auto parser = number!(immutable(char))(); 451 auto res = parser.parse("123.123"); 452 res.success.should == true; 453 res.results[0].should == 123.123; 454 } 455 456 /// class for matching something optional 457 class Optional(T) : Parser!(T) 458 { 459 Parser!(T) fParser; 460 461 this(Parser!(T) parser) 462 { 463 fParser = parser; 464 } 465 466 override ParseResult!(T) parse(T[] s) 467 { 468 auto res = fParser.parse(s); 469 if (!res.success) 470 { 471 return success(s); 472 } 473 else 474 { 475 return res; 476 } 477 } 478 } 479 480 /// unittests to show the usage of OptionalParser and its dsl '-' 481 @("optional and dsl") unittest 482 { 483 auto abc = match("abc"); 484 auto opt = -abc; 485 auto res = opt.parse("abc"); 486 res.success.should == true; 487 res.results.length.should == 1; 488 res.results[0].should == "abc"; 489 res.rest.length.should == 0; 490 } 491 492 /// unittest to show optional in action. 493 @("optional") unittest 494 { 495 auto abc = match("abc"); 496 auto opt = -abc; 497 auto res = opt.parse("efg"); 498 auto withoutOptional = abc.parse("efg"); 499 withoutOptional.success.should == false; 500 res.success.should == true; 501 res.results.length.should == 0; 502 res.rest.should == "efg"; 503 } 504 505 /// parse a number with or without sign 506 @("parse number with or without +") unittest 507 { 508 auto sign = match("+"); 509 auto value = match("1"); 510 auto test = (-sign) ~ value; 511 auto resWithSign = test.parse("+1"); 512 resWithSign.success.should == true; 513 resWithSign.results.length.should == 2; 514 auto resWithoutSign = test.parse("1"); 515 resWithoutSign.success.should == true; 516 } 517 518 /++ 519 + parser for regular expressions 520 + a successful parse step returns all captures in an array 521 +/ 522 class Regex : Parser!(immutable(char)) 523 { 524 string fRegex; 525 bool fCollect; 526 this(string regex, bool collect = true) 527 { 528 fRegex = regex; 529 fCollect = collect; 530 } 531 532 override ParseResult!(immutable(char)) parse(string s) 533 { 534 import std.regex; 535 536 auto res = std.regex.match(s, std.regex.regex(fRegex)); 537 if (res.empty()) 538 { 539 return ParseResult!(immutable(char)).error(s ~ "did not match " ~ fRegex); 540 } 541 else if (res.pre.length > 0) 542 { 543 return ParseResult!(immutable(char)).error(s ~ "did not match " ~ fRegex); 544 } 545 else 546 { 547 if (fCollect) 548 { 549 auto results = appender!(Variant[])(); 550 foreach (c; res.captures) 551 { 552 Variant v = c; 553 results.put(v); 554 } 555 return transform(ParseResult!(immutable(char)).ok(res.post, results.data)); 556 } 557 else 558 { 559 return success(res.post); 560 } 561 } 562 } 563 } 564 565 /// convenient function to instantiate a regexparser 566 Parser!(T) regex(T)(T[] s, bool collect = true) 567 { 568 return new Regex(s, collect); 569 } 570 571 /// regexParser 572 @("regex parser") unittest 573 { 574 auto res = regex("(a)(.)(c)").parse("abcd"); 575 res.success.should == true; 576 res.results.should == ["abc", "a", "b", "c"]; 577 res.rest.shouldEqual("d"); 578 } 579 580 /// regexParser works from the start of the input 581 @("regex parser parses from start") unittest 582 { 583 auto res = regex("abc").parse("babc"); 584 res.success.shouldBeFalse; 585 } 586 587 /// class for matching repetitions 588 static class Repetition(T) : Parser!(T) 589 { 590 Parser!(T) fToRepeat; 591 this(Parser!(T) toRepeat) 592 { 593 fToRepeat = toRepeat; 594 } 595 596 override ParseResult!(T) parse(T[] s) 597 { 598 Variant[] results; 599 auto rest = s; 600 while (true) 601 { 602 auto res = fToRepeat.parse(rest); 603 if (res.success) 604 { 605 rest = res.rest; 606 results = results ~ res.results; 607 } 608 else 609 { 610 break; 611 } 612 } 613 return transform(ParseResult!(T).ok(rest, results)); 614 } 615 } 616 617 /// unittest for repetition 618 @("repetition more than one") unittest 619 { 620 auto parser = *match("a"); 621 auto res = parser.parse("aa"); 622 res.success.shouldBeTrue; 623 res.rest.shouldEqual(""); 624 res.results.length.shouldEqual(2); 625 } 626 627 @("repetition none") unittest 628 { 629 auto parser = *match("a"); 630 auto res = parser.parse("b"); 631 res.success.shouldBeTrue; 632 res.rest.shouldEqual("b"); 633 } 634 635 @("repetition with rest") unittest 636 { 637 auto parser = *match("a"); 638 auto res = parser.parse("ab"); 639 res.success.shouldBeTrue; 640 res.rest.shouldEqual("b"); 641 } 642 643 @("repetition with other parser") unittest 644 { 645 auto parser = *(match("+") ~ match("-")); 646 auto res = parser.parse("+-+-+"); 647 res.success.shouldBeTrue; 648 res.rest.shouldEqual("+"); 649 } 650 651 @("repetition discarding result") unittest 652 { 653 auto parser = *match("a", false); 654 auto res = parser.parse("aaaa"); 655 res.success.shouldBeTrue; 656 res.rest.length.shouldEqual(0); 657 res.results.length.shouldEqual(0); 658 } 659 660 @("repetition transforming result") unittest 661 { 662 auto parser = (*match("a")) ^^ (input) { return variantArray(input.length); }; 663 auto suc = parser.parseAll("aaaaa"); 664 suc.success.shouldBeTrue; 665 suc.results.length.shouldEqual(1); 666 suc.results[0].get!(ulong).shouldEqual(5); 667 } 668 669 /// class for matching sequences 670 class Sequence(T) : Parser!(T) 671 { 672 Parser!(T)[] fParsers; 673 this(Parser!(T)[] parsers...) 674 { 675 fParsers = parsers.dup; 676 } 677 678 override ParseResult!(T) parse(T[] s) 679 { 680 auto resultObjects = appender!(Variant[])(); 681 T[] h = s; 682 foreach (parser; fParsers) 683 { 684 auto res = parser.parse(h); 685 if (res.success) 686 { 687 h = res.rest; 688 resultObjects.put(res.results); 689 } 690 else 691 { 692 return res; 693 } 694 } 695 return transform(ParseResult!(T).ok(h, resultObjects.data)); 696 } 697 } 698 699 /// convenient function 700 Parser!(T) sequence(T)(Parser!(T)[] parsers...) 701 { 702 return new Sequence!(T)(parsers); 703 } 704 705 /// unittests showing usage of sequence parser and dsl '~' 706 @("sequence") unittest 707 { 708 auto parser = match("a") ~ match("b"); 709 auto res = parser.parse("ab"); 710 res.success.shouldBeTrue; 711 res.results.length.shouldEqual(2); 712 } 713 714 @("sequence and rest") unittest 715 { 716 auto parser = match("a") ~ "b".match; 717 auto res = parser.parse("abc"); 718 res.success.shouldBeTrue; 719 res.rest.shouldEqual("c"); 720 } 721 722 @("sequence fails") unittest 723 { 724 auto parser = match("a") ~ match("b"); 725 auto res = parser.parse("ac"); 726 727 res.success.shouldBeFalse; 728 } 729 730 @("sequence with discard result") unittest 731 { 732 auto parser = match("a", false) ~ match("b"); 733 auto res = parser.parse("ab"); 734 res.success.shouldBeTrue; 735 res.results.length.shouldEqual(1); 736 } 737 738 @("sequence with transformation") unittest 739 { 740 auto parser = (match("a") ~ match("b")) ^^ (Variant[] result) { 741 string totalString; 742 foreach (Variant o; result) 743 { 744 if (o.type == typeid(string)) 745 { 746 totalString ~= o.get!(string); 747 } 748 } 749 750 Variant v = totalString; 751 return [v]; 752 }; 753 754 auto suc = parser.parse("ab"); 755 assert(suc.success); 756 assert(suc.results.length == 1); 757 assert(suc.results[0] == "ab"); 758 } 759 760 @("sequence and optional") unittest 761 { 762 auto ab = -match("a") ~ match("b"); 763 auto res = ab.parse("ab"); 764 res.success.shouldBeTrue; 765 res = ab.parse("b"); 766 res.success.shouldBeTrue; 767 res = ab.parse("c"); 768 res.success.shouldBeFalse; 769 res = ab.parse(""); 770 res.success.shouldBeFalse; 771 } 772 773 @("sequence api not dsl") unittest 774 { 775 auto ab = sequence(match("a"), match("b")); 776 auto res = ab.parse("ab"); 777 res.success.shouldBeTrue; 778 } 779 780 @("sequence api") unittest 781 { 782 auto ab = sequence(match("a"), match("b"), match("c")); 783 auto res = ab.parse("abc"); 784 res.success.shouldBeTrue; 785 }