1 module graphql.lexer; 2 3 version(LDC) { 4 import std.experimental.logger; 5 } else { 6 import std.logger; 7 } 8 9 import std.format : format; 10 import std.typecons : Flag; 11 import std.stdio; 12 13 import graphql.tokenmodule; 14 15 alias QueryParser = Flag!"QueryParser"; 16 17 struct Lexer { 18 const QueryParser qp; 19 string input; 20 size_t stringPos; 21 22 size_t line; 23 size_t column; 24 25 Token cur; 26 27 this(string input, QueryParser qp = QueryParser.yes) @safe { 28 this.input = input; 29 this.stringPos = 0; 30 this.line = 1; 31 this.column = 1; 32 this.qp = qp; 33 this.buildToken(); 34 } 35 36 bool isNotQueryParser() @safe const { 37 return this.qp == QueryParser.no; 38 } 39 40 private bool isTokenStop() const @safe { 41 return this.stringPos >= this.input.length 42 || this.isTokenStop(this.input[this.stringPos]); 43 } 44 45 private bool isTokenStop(const(char) c) const @safe { 46 import std.ascii : isWhite; 47 import std.algorithm.searching : canFind; 48 return isWhite(c) || "(){}!=|[:],@$".canFind(c); 49 } 50 51 private bool eatComment() @safe { 52 if(this.stringPos < this.input.length && 53 this.input[this.stringPos] == '#') 54 { 55 ++this.stringPos; 56 while(this.stringPos < this.input.length && 57 this.input[this.stringPos] != '\n') 58 { 59 ++this.stringPos; 60 } 61 ++this.stringPos; 62 ++this.line; 63 this.column = 1; 64 return true; 65 } else { 66 return false; 67 } 68 } 69 70 private void eatWhitespace() @safe { 71 import std.ascii : isWhite; 72 while(this.stringPos < this.input.length) { 73 if(this.eatComment()) { 74 continue; 75 } else if(this.input[this.stringPos] == '\n') { 76 this.column = 1; 77 ++this.line; 78 } else if(this.input[this.stringPos].isWhite) { 79 ++this.column; 80 } else { 81 break; 82 } 83 ++this.stringPos; 84 } 85 } 86 87 private void buildToken() @safe { 88 import std.ascii : isAlphaNum; 89 this.eatWhitespace(); 90 91 if(this.stringPos >= this.input.length) { 92 this.cur = Token(TokenType.undefined); 93 return; 94 } 95 96 if(this.input[this.stringPos] == ')') { 97 this.cur = Token(TokenType.rparen, this.line, this.column); 98 ++this.column; 99 ++this.stringPos; 100 } else if(this.input[this.stringPos] == '(') { 101 this.cur = Token(TokenType.lparen, this.line, this.column); 102 ++this.column; 103 ++this.stringPos; 104 } else if(this.input[this.stringPos] == ']') { 105 this.cur = Token(TokenType.rbrack, this.line, this.column); 106 ++this.column; 107 ++this.stringPos; 108 } else if(this.input[this.stringPos] == '[') { 109 this.cur = Token(TokenType.lbrack, this.line, this.column); 110 ++this.column; 111 ++this.stringPos; 112 } else if(this.input[this.stringPos] == '}') { 113 this.cur = Token(TokenType.rcurly, this.line, this.column); 114 ++this.column; 115 ++this.stringPos; 116 } else if(this.input[this.stringPos] == '$') { 117 this.cur = Token(TokenType.dollar, this.line, this.column); 118 ++this.column; 119 ++this.stringPos; 120 } else if(this.input[this.stringPos] == '!') { 121 this.cur = Token(TokenType.exclamation, this.line, this.column); 122 ++this.column; 123 ++this.stringPos; 124 } else if(this.input[this.stringPos] == '{') { 125 this.cur = Token(TokenType.lcurly, this.line, this.column); 126 ++this.column; 127 ++this.stringPos; 128 } else if(this.input[this.stringPos] == '|') { 129 this.cur = Token(TokenType.pipe, this.line, this.column); 130 ++this.column; 131 ++this.stringPos; 132 } else if(this.input[this.stringPos] == '@') { 133 this.cur = Token(TokenType.at, this.line, this.column); 134 ++this.column; 135 ++this.stringPos; 136 } else if(this.input[this.stringPos] == ',') { 137 this.cur = Token(TokenType.comma, this.line, this.column); 138 ++this.column; 139 ++this.stringPos; 140 } else if(this.input[this.stringPos] == '=') { 141 this.cur = Token(TokenType.equal, this.line, this.column); 142 ++this.column; 143 ++this.stringPos; 144 } else if(this.input[this.stringPos] == ':') { 145 this.cur = Token(TokenType.colon, this.line, this.column); 146 ++this.column; 147 ++this.stringPos; 148 } else { 149 size_t b = this.stringPos; 150 size_t e = this.stringPos; 151 switch(this.input[this.stringPos]) { 152 case 'm': 153 ++this.stringPos; 154 ++this.column; 155 ++e; 156 if(this.testStrAndInc!"utation"(e)) { 157 if(this.isTokenStop()) { 158 this.cur = Token(TokenType.mutation, this.line, 159 this.column); 160 return; 161 } 162 } 163 goto default; 164 case 's': 165 ++this.stringPos; 166 ++this.column; 167 ++e; 168 if(this.isNotQueryParser() && 169 this.testStrAndInc!"ubscription"(e)) 170 { 171 if(this.isTokenStop()) { 172 this.cur = 173 Token(TokenType.subscription, 174 this.line, 175 this.column); 176 return; 177 } 178 } else if(this.isNotQueryParser() 179 && this.testCharAndInc('c', e)) 180 { 181 if(this.testStrAndInc!"alar"(e)) { 182 if(this.isTokenStop()) { 183 this.cur = Token(TokenType.scalar, this.line, this.column); 184 return; 185 } 186 } else if(this.isNotQueryParser() 187 && this.testStrAndInc!"hema"(e)) 188 { 189 if(this.isTokenStop()) { 190 this.cur = Token(TokenType.schema, this.line, this.column); 191 return; 192 } 193 } 194 } 195 goto default; 196 case 'o': 197 ++this.stringPos; 198 ++this.column; 199 ++e; 200 if(this.testCharAndInc('n', e)) { 201 if(this.isTokenStop()) { 202 this.cur = Token(TokenType.on_, this.line, 203 this.column); 204 return; 205 } 206 } 207 goto default; 208 case 'd': 209 ++this.stringPos; 210 ++this.column; 211 ++e; 212 if(this.testStrAndInc!"irective"(e)) { 213 if(this.isTokenStop()) { 214 this.cur = Token(TokenType.directive, 215 this.line, this.column); 216 return; 217 } 218 } 219 goto default; 220 case 'e': 221 ++this.stringPos; 222 ++this.column; 223 ++e; 224 if(this.testStrAndInc!"num"(e)) { 225 if(this.isTokenStop()) { 226 this.cur = Token(TokenType.enum_, 227 this.line, this.column); 228 return; 229 } 230 } else if(this.testStrAndInc!"xtend"(e)) { 231 if(this.isTokenStop()) { 232 this.cur = Token(TokenType.extend, 233 this.line, this.column); 234 return; 235 } 236 } 237 goto default; 238 case 'i': 239 ++this.stringPos; 240 ++this.column; 241 ++e; 242 if(this.testCharAndInc('n', e)) { 243 if(this.isNotQueryParser() 244 && this.testCharAndInc('p', e) 245 ) 246 { 247 if(this.testStrAndInc!"ut"(e)) { 248 if(this.isTokenStop()) { 249 this.cur = Token(TokenType.input, 250 this.line, this.column); 251 return; 252 } 253 } 254 } else if(this.testStrAndInc!"terface"(e)) { 255 if(this.isTokenStop()) { 256 this.cur = Token(TokenType.interface_, 257 this.line, this.column); 258 return; 259 } 260 } 261 } else if(this.testStrAndInc!"mplements"(e)) { 262 if(this.isTokenStop()) { 263 this.cur = Token(TokenType.implements, 264 this.line, this.column); 265 return; 266 } 267 } 268 269 goto default; 270 case 'f': 271 ++this.stringPos; 272 ++this.column; 273 ++e; 274 if(this.testStrAndInc!"alse"(e)) { 275 if(this.isTokenStop()) { 276 this.cur = Token(TokenType.false_, 277 this.line, this.column); 278 return; 279 } 280 } else if(this.testStrAndInc!"ragment"(e)) { 281 if(this.isTokenStop()) { 282 this.cur = 283 Token(TokenType.fragment, 284 this.line, 285 this.column); 286 return; 287 } 288 } 289 goto default; 290 case 'q': 291 ++this.stringPos; 292 ++this.column; 293 ++e; 294 if(this.testStrAndInc!"uery"(e)) { 295 if(this.isTokenStop()) { 296 this.cur = Token(TokenType.query, 297 this.line, this.column); 298 return; 299 } 300 } 301 goto default; 302 case 't': 303 ++this.stringPos; 304 ++this.column; 305 ++e; 306 if(this.testStrAndInc!"rue"(e)) { 307 if(this.isTokenStop()) { 308 this.cur = Token(TokenType.true_, 309 this.line, this.column); 310 return; 311 } 312 } else if(this.isNotQueryParser() 313 && this.testStrAndInc!"ype"(e)) 314 { 315 if(this.isTokenStop()) { 316 this.cur = Token(TokenType.type, 317 this.line, this.column); 318 return; 319 } 320 } 321 goto default; 322 case 'n': 323 ++this.stringPos; 324 ++this.column; 325 ++e; 326 if(this.testStrAndInc!"ull"(e)) { 327 if(this.isTokenStop()) { 328 this.cur = Token(TokenType.null_, 329 this.line, this.column); 330 return; 331 } 332 } 333 goto default; 334 case 'u': 335 ++this.stringPos; 336 ++this.column; 337 ++e; 338 if(this.testStrAndInc!"nion"(e)) { 339 if(this.isTokenStop()) { 340 this.cur = Token(TokenType.union_, 341 this.line, this.column); 342 return; 343 } 344 } 345 goto default; 346 case '.': 347 ++this.stringPos; 348 ++this.column; 349 ++e; 350 if(this.testStrAndInc!".."(e)) { 351 if(this.isTokenStop() 352 || (this.stringPos < this.input.length 353 && isAlphaNum(this.input[this.stringPos]) 354 ) 355 ) 356 { 357 this.cur = Token(TokenType.dots, this.line, 358 this.column); 359 return; 360 } 361 } 362 throw new Exception(format( 363 "failed to parse \"...\" at line %s column %s", 364 this.line, this.column 365 )); 366 case '-': 367 ++this.stringPos; 368 ++this.column; 369 ++e; 370 goto case '0'; 371 case '+': 372 ++this.stringPos; 373 ++this.column; 374 ++e; 375 goto case '0'; 376 case '0': .. case '9': 377 do { 378 ++this.stringPos; 379 ++this.column; 380 ++e; 381 } while(this.stringPos < this.input.length 382 && this.input[this.stringPos] >= '0' 383 && this.input[this.stringPos] <= '9'); 384 385 if(this.stringPos >= this.input.length 386 || this.input[this.stringPos] != '.') 387 { 388 this.cur = Token(TokenType.intValue, this.input[b .. 389 e], this.line, this.column); 390 return; 391 } else if(this.stringPos < this.input.length 392 && this.input[this.stringPos] == '.') 393 { 394 do { 395 ++this.stringPos; 396 ++this.column; 397 ++e; 398 } while(this.stringPos < this.input.length 399 && this.input[this.stringPos] >= '0' 400 && this.input[this.stringPos] <= '9'); 401 402 this.cur = Token(TokenType.floatValue, this.input[b .. 403 e], this.line, this.column); 404 return; 405 } 406 goto default; 407 case '"': 408 ++this.stringPos; 409 ++this.column; 410 ++e; 411 if(this.qp == QueryParser.no 412 && this.testStrAndInc!("\"\"")(e)) 413 { 414 while(!this.testStrAndInc!("\"\"\"")(e)) { 415 if(this.input[this.stringPos] == '\n') { 416 this.column = 1; 417 ++this.line; 418 419 } else { 420 ++this.column; 421 } 422 ++this.stringPos; 423 ++e; 424 } 425 this.cur = Token(TokenType.stringValue, this.input[b + 3 426 .. e - 3], this.line, this.column); 427 } else { 428 while(this.stringPos < this.input.length 429 && (this.input[this.stringPos] != '"' 430 || (this.input[this.stringPos] == '"' 431 && this.input[this.stringPos - 1U] == '\\') 432 ) 433 ) 434 { 435 ++this.stringPos; 436 ++this.column; 437 ++e; 438 } 439 ++this.stringPos; 440 ++this.column; 441 this.cur = Token(TokenType.stringValue, this.input[b + 1 442 .. e], this.line, this.column); 443 } 444 break; 445 default: 446 while(!this.isTokenStop()) { 447 //writefln("455 '%s'", this.input[this.stringPos]); 448 ++this.stringPos; 449 ++this.column; 450 ++e; 451 } 452 this.cur = Token(TokenType.name, this.input[b .. e], 453 this.line, this.column 454 ); 455 break; 456 } 457 } 458 } 459 460 bool testCharAndInc(const(char) c, ref size_t e) @safe { 461 if(this.stringPos < this.input.length 462 && this.input[this.stringPos] == c) 463 { 464 ++this.column; 465 ++this.stringPos; 466 ++e; 467 return true; 468 } else { 469 return false; 470 } 471 } 472 473 bool testStrAndInc(string s)(ref size_t e) @safe { 474 for(size_t i = 0; i < s.length; ++i) { 475 if(this.stringPos < this.input.length 476 && this.input[this.stringPos] == s[i]) 477 { 478 ++this.column; 479 ++this.stringPos; 480 ++e; 481 } else { 482 return false; 483 } 484 } 485 486 return true; 487 } 488 489 @property bool empty() const @safe { 490 return this.stringPos >= this.input.length 491 && this.cur.type == TokenType.undefined; 492 } 493 494 Token front() @property @safe { 495 return this.cur; 496 } 497 498 @property Token front() const @safe @nogc pure { 499 return this.cur; 500 } 501 502 void popFront() @safe { 503 this.buildToken(); 504 } 505 506 string getRestOfInput() const @safe { 507 return this.input[this.stringPos .. $]; 508 } 509 } 510 511 unittest { 512 string f = "f "; 513 auto l = Lexer(f); 514 assert(!l.empty); 515 assert(l.front.type == TokenType.name); 516 assert(l.front.value == "f", format("'%s'", l.front.value)); 517 } 518 519 unittest { 520 string f = "... "; 521 522 auto l = Lexer(f); 523 assert(!l.empty); 524 assert(l.front.type == TokenType.dots); 525 l.popFront(); 526 assert(l.empty); 527 } 528 529 unittest { 530 string f = "name! "; 531 auto l = Lexer(f); 532 assert(!l.empty); 533 assert(l.front.type == TokenType.name); 534 assert(l.front.value == "name", format("'%s'", l.front.value)); 535 l.popFront(); 536 assert(!l.empty); 537 assert(l.front.type == TokenType.exclamation); 538 l.popFront(); 539 assert(l.empty); 540 } 541 542 unittest { 543 string f = "fragment"; 544 const l = Lexer(f); 545 assert(!l.empty); 546 assert(l.front.type == TokenType.fragment); 547 } 548 549 unittest { 550 string f = ` 551 mutation { 552 likeStory(storyID: 12345) { 553 story { 554 likeCount 555 } 556 } 557 }`; 558 auto l = Lexer(f); 559 assert(!l.empty); 560 assert(l.front.type == TokenType.mutation); 561 l.popFront(); 562 assert(!l.empty); 563 assert(l.front.type == TokenType.lcurly); 564 l.popFront(); 565 assert(!l.empty); 566 assert(l.front.type == TokenType.name); 567 l.popFront(); 568 assert(!l.empty); 569 assert(l.front.type == TokenType.lparen); 570 l.popFront(); 571 assert(!l.empty); 572 assert(l.front.type == TokenType.name); 573 l.popFront(); 574 assert(!l.empty); 575 assert(l.front.type == TokenType.colon, format("%s", l.front.type)); 576 l.popFront(); 577 assert(!l.empty); 578 assert(l.front.type == TokenType.intValue); 579 l.popFront(); 580 assert(!l.empty); 581 assert(l.front.type == TokenType.rparen); 582 l.popFront(); 583 assert(!l.empty); 584 assert(l.front.type == TokenType.lcurly); 585 l.popFront(); 586 assert(!l.empty); 587 assert(l.front.type == TokenType.name); 588 l.popFront(); 589 assert(!l.empty); 590 assert(l.front.type == TokenType.lcurly); 591 l.popFront(); 592 assert(!l.empty); 593 assert(l.front.type == TokenType.name); 594 l.popFront(); 595 assert(!l.empty); 596 assert(l.front.type == TokenType.rcurly); 597 l.popFront(); 598 assert(!l.empty); 599 assert(l.front.type == TokenType.rcurly); 600 l.popFront(); 601 assert(!l.empty); 602 assert(l.front.type == TokenType.rcurly); 603 l.popFront(); 604 assert(l.empty); 605 } 606 607 unittest { 608 string f = ` 609 query withFragments { 610 user(id: +4) { 611 # super cool comment 612 friends(first: -10.3) { 613 ...friendFields 614 null false true 615 } 616 mutualFriends(first: 10) { 617 ...friendFields 618 } 619 } 620 } 621 622 fragment friendFields on User { 623 id 624 name 625 profilePic(size: 50) 626 }`; 627 auto l = Lexer(f); 628 assert(!l.empty); 629 assert(l.front.type == TokenType.query); 630 l.popFront(); 631 assert(!l.empty); 632 assert(l.front.type == TokenType.name); 633 assert(l.front.value == "withFragments"); 634 l.popFront(); 635 l.popFront(); 636 assert(!l.empty); 637 assert(l.front.type == TokenType.name); 638 assert(l.front.value == "user"); 639 l.popFront(); 640 l.popFront(); 641 assert(!l.empty); 642 assert(l.front.type == TokenType.name); 643 assert(l.front.value == "id", l.front.value); 644 l.popFront(); 645 assert(!l.empty); 646 assert(l.front.type == TokenType.colon); 647 l.popFront(); 648 assert(!l.empty); 649 assert(l.front.type == TokenType.intValue); 650 assert(l.front.value == "+4"); 651 l.popFront(); 652 assert(!l.empty); 653 assert(l.front.type == TokenType.rparen); 654 l.popFront(); 655 l.popFront(); 656 assert(!l.empty); 657 assert(l.front.type == TokenType.name); 658 assert(l.front.value == "friends"); 659 l.popFront(); 660 assert(!l.empty); 661 assert(l.front.type == TokenType.lparen); 662 l.popFront(); 663 assert(!l.empty); 664 assert(l.front.type == TokenType.name); 665 assert(l.front.value == "first"); 666 l.popFront(); 667 l.popFront(); 668 assert(!l.empty); 669 assert(l.front.type == TokenType.floatValue, format("%s", l.front.type)); 670 assert(l.front.value == "-10.3", l.front.value); 671 l.popFront(); 672 l.popFront(); 673 l.popFront(); 674 assert(!l.empty); 675 assert(l.front.type == TokenType.dots, format("%s", l.front.type)); 676 l.popFront(); 677 assert(!l.empty); 678 assert(l.front.type == TokenType.name, format("%s", l.front.type)); 679 assert(l.front.value == "friendFields"); 680 l.popFront(); 681 assert(!l.empty); 682 assert(l.front.type == TokenType.null_, format("%s", l.front.type)); 683 l.popFront(); 684 assert(!l.empty); 685 assert(l.front.type == TokenType.false_, format("%s", l.front.type)); 686 l.popFront(); 687 assert(!l.empty); 688 assert(l.front.type == TokenType.true_, format("%s", l.front.type)); 689 while(!l.empty) { 690 l.popFront(); 691 } 692 } 693 694 unittest { 695 string f = ` 696 query withFragments { 697 user(id: "hello") { 698 } 699 }`; 700 701 auto l = Lexer(f); 702 assert(!l.empty); 703 assert(l.front.type == TokenType.query); 704 l.popFront(); 705 assert(!l.empty); 706 assert(l.front.type == TokenType.name); 707 assert(l.front.value == "withFragments"); 708 l.popFront(); 709 l.popFront(); 710 assert(!l.empty); 711 assert(l.front.type == TokenType.name); 712 assert(l.front.value == "user"); 713 l.popFront(); 714 l.popFront(); 715 assert(!l.empty); 716 assert(l.front.type == TokenType.name); 717 assert(l.front.value == "id", l.front.value); 718 l.popFront(); 719 assert(!l.empty); 720 assert(l.front.type == TokenType.colon); 721 l.popFront(); 722 assert(!l.empty); 723 assert(l.front.type == TokenType.stringValue); 724 assert(l.front.value == "hello", format("'%s' '%s'", l.front.value, "hello")); 725 l.popFront(); 726 assert(!l.empty); 727 assert(l.front.type == TokenType.rparen); 728 } 729 730 // Issue #20 731 unittest { 732 string f = `# asldf 733 # 734 { foo } 735 `; 736 737 auto l = Lexer(f); 738 assert(!l.empty); 739 assert(l.front.type == TokenType.lcurly, l.front.toString()); 740 l.popFront(); 741 assert(!l.empty); 742 assert(l.front.type == TokenType.name, l.front.toString()); 743 l.popFront(); 744 assert(!l.empty); 745 assert(l.front.type == TokenType.rcurly, l.front.toString()); 746 l.popFront(); 747 assert(l.empty); 748 } 749 750 unittest { 751 string f = `""" a long comment """ `; 752 753 auto l = Lexer(f, QueryParser.no); 754 assert(!l.empty); 755 assert(l.front.type == TokenType.stringValue, l.front.toString()); 756 assert(l.front.value == " a long comment ", l.front.value); 757 l.popFront(); 758 assert(l.empty); 759 } 760 761 unittest { 762 import std.string : indexOf; 763 764 string f = `""" a 765 766 long 767 768 comment """ `; 769 770 auto l = Lexer(f, QueryParser.no); 771 assert(!l.empty); 772 assert(l.front.type == TokenType.stringValue, l.front.toString()); 773 assert(l.front.value.indexOf("a") != -1); 774 assert(l.front.value.indexOf("long") != -1); 775 assert(l.front.value.indexOf("comment") != -1); 776 assert(l.front.value.indexOf("\n") != -1); 777 l.popFront(); 778 assert(l.empty); 779 }