1 module graphql.lexer; 2 3 import std.experimental.logger; 4 import std.format : format; 5 import std.typecons : Flag; 6 import std.stdio; 7 8 import graphql.tokenmodule; 9 10 alias QueryParser = Flag!"QueryParser"; 11 12 struct Lexer { 13 const QueryParser qp; 14 string input; 15 size_t stringPos; 16 17 size_t line; 18 size_t column; 19 20 Token cur; 21 22 this(string input, QueryParser qp = QueryParser.yes) @safe { 23 this.input = input; 24 this.stringPos = 0; 25 this.line = 1; 26 this.column = 1; 27 this.qp = qp; 28 this.buildToken(); 29 } 30 31 bool isNotQueryParser() @safe const { 32 return this.qp == QueryParser.no; 33 } 34 35 private bool isTokenStop() const @safe { 36 return this.stringPos >= this.input.length 37 || this.isTokenStop(this.input[this.stringPos]); 38 } 39 40 private bool isTokenStop(const(char) c) const @safe { 41 return 42 c == ' ' || c == '\t' || c == '\n' || c == '(' || c == ')' 43 || c == '{' || c == '}' || c == '!' || c == '=' || c == '|' 44 || c == '[' || c == ':' || c == ']' || c == ',' || c == '@' 45 || c == '$'; 46 } 47 48 private bool eatComment() @safe { 49 if(this.stringPos < this.input.length && 50 this.input[this.stringPos] == '#') 51 { 52 ++this.stringPos; 53 while(this.stringPos < this.input.length && 54 this.input[this.stringPos] != '\n') 55 { 56 ++this.stringPos; 57 } 58 ++this.stringPos; 59 ++this.line; 60 this.column = 1; 61 return true; 62 } else { 63 return false; 64 } 65 } 66 67 private void eatWhitespace() @safe { 68 import std.ascii : isWhite; 69 while(this.stringPos < this.input.length) { 70 if(this.eatComment()) { 71 continue; 72 } else if(this.input[this.stringPos] == ' ') { 73 ++this.column; 74 } else if(this.input[this.stringPos] == '\t') { 75 ++this.column; 76 } else if(this.input[this.stringPos] == '\n') { 77 this.column = 1; 78 ++this.line; 79 } else { 80 break; 81 } 82 ++this.stringPos; 83 } 84 } 85 86 private void buildToken() @safe { 87 import std.ascii : isAlphaNum; 88 this.eatWhitespace(); 89 90 if(this.stringPos >= this.input.length) { 91 this.cur = Token(TokenType.undefined); 92 return; 93 } 94 95 if(this.input[this.stringPos] == ')') { 96 this.cur = Token(TokenType.rparen, this.line, this.column); 97 ++this.column; 98 ++this.stringPos; 99 } else if(this.input[this.stringPos] == '(') { 100 this.cur = Token(TokenType.lparen, this.line, this.column); 101 ++this.column; 102 ++this.stringPos; 103 } else if(this.input[this.stringPos] == ']') { 104 this.cur = Token(TokenType.rbrack, this.line, this.column); 105 ++this.column; 106 ++this.stringPos; 107 } else if(this.input[this.stringPos] == '[') { 108 this.cur = Token(TokenType.lbrack, this.line, this.column); 109 ++this.column; 110 ++this.stringPos; 111 } else if(this.input[this.stringPos] == '}') { 112 this.cur = Token(TokenType.rcurly, this.line, this.column); 113 ++this.column; 114 ++this.stringPos; 115 } else if(this.input[this.stringPos] == '$') { 116 this.cur = Token(TokenType.dollar, this.line, this.column); 117 ++this.column; 118 ++this.stringPos; 119 } else if(this.input[this.stringPos] == '!') { 120 this.cur = Token(TokenType.exclamation, this.line, this.column); 121 ++this.column; 122 ++this.stringPos; 123 } else if(this.input[this.stringPos] == '{') { 124 this.cur = Token(TokenType.lcurly, this.line, this.column); 125 ++this.column; 126 ++this.stringPos; 127 } else if(this.input[this.stringPos] == '|') { 128 this.cur = Token(TokenType.pipe, this.line, this.column); 129 ++this.column; 130 ++this.stringPos; 131 } else if(this.input[this.stringPos] == '@') { 132 this.cur = Token(TokenType.at, this.line, this.column); 133 ++this.column; 134 ++this.stringPos; 135 } else if(this.input[this.stringPos] == ',') { 136 this.cur = Token(TokenType.comma, this.line, this.column); 137 ++this.column; 138 ++this.stringPos; 139 } else if(this.input[this.stringPos] == '=') { 140 this.cur = Token(TokenType.equal, this.line, this.column); 141 ++this.column; 142 ++this.stringPos; 143 } else if(this.input[this.stringPos] == ':') { 144 this.cur = Token(TokenType.colon, this.line, this.column); 145 ++this.column; 146 ++this.stringPos; 147 } else { 148 size_t b = this.stringPos; 149 size_t e = this.stringPos; 150 switch(this.input[this.stringPos]) { 151 case 'm': 152 ++this.stringPos; 153 ++this.column; 154 ++e; 155 if(this.testStrAndInc!"utation"(e)) { 156 if(this.isTokenStop()) { 157 this.cur = Token(TokenType.mutation, this.line, 158 this.column); 159 return; 160 } 161 } 162 goto default; 163 case 's': 164 ++this.stringPos; 165 ++this.column; 166 ++e; 167 if(this.isNotQueryParser() && 168 this.testStrAndInc!"ubscription"(e)) 169 { 170 if(this.isTokenStop()) { 171 this.cur = 172 Token(TokenType.subscription, 173 this.line, 174 this.column); 175 return; 176 } 177 } else if(this.isNotQueryParser() 178 && this.testCharAndInc('c', e)) 179 { 180 if(this.testStrAndInc!"alar"(e)) { 181 if(this.isTokenStop()) { 182 this.cur = Token(TokenType.scalar, this.line, this.column); 183 return; 184 } 185 } else if(this.isNotQueryParser() 186 && this.testStrAndInc!"hema"(e)) 187 { 188 if(this.isTokenStop()) { 189 this.cur = Token(TokenType.schema, this.line, this.column); 190 return; 191 } 192 } 193 } 194 goto default; 195 case 'o': 196 ++this.stringPos; 197 ++this.column; 198 ++e; 199 if(this.testCharAndInc('n', e)) { 200 if(this.isTokenStop()) { 201 this.cur = Token(TokenType.on_, this.line, 202 this.column); 203 return; 204 } 205 } 206 goto default; 207 case 'd': 208 ++this.stringPos; 209 ++this.column; 210 ++e; 211 if(this.testStrAndInc!"irective"(e)) { 212 if(this.isTokenStop()) { 213 this.cur = Token(TokenType.directive, 214 this.line, this.column); 215 return; 216 } 217 } 218 goto default; 219 case 'e': 220 ++this.stringPos; 221 ++this.column; 222 ++e; 223 if(this.testStrAndInc!"num"(e)) { 224 if(this.isTokenStop()) { 225 this.cur = Token(TokenType.enum_, 226 this.line, this.column); 227 return; 228 } 229 } else if(this.testStrAndInc!"xtend"(e)) { 230 if(this.isTokenStop()) { 231 this.cur = Token(TokenType.extend, 232 this.line, this.column); 233 return; 234 } 235 } 236 goto default; 237 case 'i': 238 ++this.stringPos; 239 ++this.column; 240 ++e; 241 if(this.testCharAndInc('n', e)) { 242 if(this.isNotQueryParser() 243 && this.testCharAndInc('p', e) 244 ) 245 { 246 if(this.testStrAndInc!"ut"(e)) { 247 if(this.isTokenStop()) { 248 this.cur = Token(TokenType.input, 249 this.line, this.column); 250 return; 251 } 252 } 253 } else if(this.testStrAndInc!"terface"(e)) { 254 if(this.isTokenStop()) { 255 this.cur = Token(TokenType.interface_, 256 this.line, this.column); 257 return; 258 } 259 } 260 } else if(this.testStrAndInc!"mplements"(e)) { 261 if(this.isTokenStop()) { 262 this.cur = Token(TokenType.implements, 263 this.line, this.column); 264 return; 265 } 266 } 267 268 goto default; 269 case 'f': 270 ++this.stringPos; 271 ++this.column; 272 ++e; 273 if(this.testStrAndInc!"alse"(e)) { 274 if(this.isTokenStop()) { 275 this.cur = Token(TokenType.false_, 276 this.line, this.column); 277 return; 278 } 279 } else if(this.testStrAndInc!"ragment"(e)) { 280 if(this.isTokenStop()) { 281 this.cur = 282 Token(TokenType.fragment, 283 this.line, 284 this.column); 285 return; 286 } 287 } 288 goto default; 289 case 'q': 290 ++this.stringPos; 291 ++this.column; 292 ++e; 293 if(this.testStrAndInc!"uery"(e)) { 294 if(this.isTokenStop()) { 295 this.cur = Token(TokenType.query, 296 this.line, this.column); 297 return; 298 } 299 } 300 goto default; 301 case 't': 302 ++this.stringPos; 303 ++this.column; 304 ++e; 305 if(this.testStrAndInc!"rue"(e)) { 306 if(this.isTokenStop()) { 307 this.cur = Token(TokenType.true_, 308 this.line, this.column); 309 return; 310 } 311 } else if(this.isNotQueryParser() 312 && this.testStrAndInc!"ype"(e)) 313 { 314 if(this.isTokenStop()) { 315 this.cur = Token(TokenType.type, 316 this.line, this.column); 317 return; 318 } 319 } 320 goto default; 321 case 'n': 322 ++this.stringPos; 323 ++this.column; 324 ++e; 325 if(this.testStrAndInc!"ull"(e)) { 326 if(this.isTokenStop()) { 327 this.cur = Token(TokenType.null_, 328 this.line, this.column); 329 return; 330 } 331 } 332 goto default; 333 case 'u': 334 ++this.stringPos; 335 ++this.column; 336 ++e; 337 if(this.testStrAndInc!"nion"(e)) { 338 if(this.isTokenStop()) { 339 this.cur = Token(TokenType.union_, 340 this.line, this.column); 341 return; 342 } 343 } 344 goto default; 345 case '.': 346 ++this.stringPos; 347 ++this.column; 348 ++e; 349 if(this.testStrAndInc!".."(e)) { 350 //if(this.stringPos < this.input.length 351 // && isAlphaNum(this.input[this.stringPos])) 352 if(this.isTokenStop() 353 || (this.stringPos < this.input.length 354 && isAlphaNum(this.input[this.stringPos]) 355 ) 356 ) 357 { 358 this.cur = Token(TokenType.dots, this.line, 359 this.column); 360 return; 361 } 362 } 363 throw new Exception(format( 364 "failed to parse \"...\" at line %s column %s", 365 this.line, this.column 366 )); 367 case '-': 368 ++this.stringPos; 369 ++this.column; 370 ++e; 371 goto case '0'; 372 case '+': 373 ++this.stringPos; 374 ++this.column; 375 ++e; 376 goto case '0'; 377 case '0': .. case '9': 378 do { 379 ++this.stringPos; 380 ++this.column; 381 ++e; 382 } while(this.stringPos < this.input.length 383 && this.input[this.stringPos] >= '0' 384 && this.input[this.stringPos] <= '9'); 385 386 if(this.stringPos >= this.input.length 387 || this.input[this.stringPos] != '.') 388 { 389 this.cur = Token(TokenType.intValue, this.input[b .. 390 e], this.line, this.column); 391 return; 392 } else if(this.stringPos < this.input.length 393 && this.input[this.stringPos] == '.') 394 { 395 do { 396 ++this.stringPos; 397 ++this.column; 398 ++e; 399 } while(this.stringPos < this.input.length 400 && this.input[this.stringPos] >= '0' 401 && this.input[this.stringPos] <= '9'); 402 403 this.cur = Token(TokenType.floatValue, this.input[b .. 404 e], this.line, this.column); 405 return; 406 } 407 goto default; 408 case '"': 409 ++this.stringPos; 410 ++this.column; 411 ++e; 412 while(this.stringPos < this.input.length 413 && (this.input[this.stringPos] != '"' 414 || (this.input[this.stringPos] == '"' 415 && this.input[this.stringPos - 1U] == '\\') 416 ) 417 ) 418 { 419 ++this.stringPos; 420 ++this.column; 421 ++e; 422 } 423 ++this.stringPos; 424 ++this.column; 425 this.cur = Token(TokenType.stringValue, this.input[b + 1 426 .. e], this.line, this.column); 427 break; 428 default: 429 while(!this.isTokenStop()) { 430 //writefln("455 '%s'", this.input[this.stringPos]); 431 ++this.stringPos; 432 ++this.column; 433 ++e; 434 } 435 //writefln("%s %s %s '%s'", b, e, this.stringPos, this.input[b .. e]); 436 //do { 437 // writefln("'%s'", this.input[this.stringPos]); 438 // ++this.stringPos; 439 // ++this.column; 440 // ++e; 441 //} while(!this.isTokenStop()); 442 //writefln("%s %s", TokenType.name, this.input[b .. e]); 443 this.cur = Token(TokenType.name, this.input[b .. e], 444 this.line, this.column 445 ); 446 break; 447 } 448 } 449 } 450 451 bool testCharAndInc(const(char) c, ref size_t e) @safe { 452 if(this.stringPos < this.input.length 453 && this.input[this.stringPos] == c) 454 { 455 ++this.column; 456 ++this.stringPos; 457 ++e; 458 return true; 459 } else { 460 return false; 461 } 462 } 463 464 bool testStrAndInc(string s)(ref size_t e) @safe { 465 for(size_t i = 0; i < s.length; ++i) { 466 if(this.stringPos < this.input.length 467 && this.input[this.stringPos] == s[i]) 468 { 469 ++this.column; 470 ++this.stringPos; 471 ++e; 472 } else { 473 return false; 474 } 475 } 476 return true; 477 } 478 479 @property bool empty() const @safe { 480 return this.stringPos >= this.input.length 481 && this.cur.type == TokenType.undefined; 482 } 483 484 Token front() @property @safe { 485 return this.cur; 486 } 487 488 @property Token front() const @safe @nogc pure { 489 return this.cur; 490 } 491 492 void popFront() @safe { 493 this.buildToken(); 494 } 495 496 string getRestOfInput() const @safe { 497 return this.input[this.stringPos .. $]; 498 } 499 } 500 501 unittest { 502 string f = "f "; 503 auto l = Lexer(f); 504 assert(!l.empty); 505 assert(l.front.type == TokenType.name); 506 assert(l.front.value == "f", format("'%s'", l.front.value)); 507 } 508 509 unittest { 510 string f = "... "; 511 512 auto l = Lexer(f); 513 assert(!l.empty); 514 assert(l.front.type == TokenType.dots); 515 l.popFront(); 516 assert(l.empty); 517 } 518 519 unittest { 520 string f = "name! "; 521 auto l = Lexer(f); 522 assert(!l.empty); 523 assert(l.front.type == TokenType.name); 524 assert(l.front.value == "name", format("'%s'", l.front.value)); 525 l.popFront(); 526 assert(!l.empty); 527 assert(l.front.type == TokenType.exclamation); 528 l.popFront(); 529 assert(l.empty); 530 } 531 532 unittest { 533 string f = "fragment"; 534 const l = Lexer(f); 535 assert(!l.empty); 536 assert(l.front.type == TokenType.fragment); 537 } 538 539 unittest { 540 string f = ` 541 mutation { 542 likeStory(storyID: 12345) { 543 story { 544 likeCount 545 } 546 } 547 }`; 548 auto l = Lexer(f); 549 assert(!l.empty); 550 assert(l.front.type == TokenType.mutation); 551 l.popFront(); 552 assert(!l.empty); 553 assert(l.front.type == TokenType.lcurly); 554 l.popFront(); 555 assert(!l.empty); 556 assert(l.front.type == TokenType.name); 557 l.popFront(); 558 assert(!l.empty); 559 assert(l.front.type == TokenType.lparen); 560 l.popFront(); 561 assert(!l.empty); 562 assert(l.front.type == TokenType.name); 563 l.popFront(); 564 assert(!l.empty); 565 assert(l.front.type == TokenType.colon, format("%s", l.front.type)); 566 l.popFront(); 567 assert(!l.empty); 568 assert(l.front.type == TokenType.intValue); 569 l.popFront(); 570 assert(!l.empty); 571 assert(l.front.type == TokenType.rparen); 572 l.popFront(); 573 assert(!l.empty); 574 assert(l.front.type == TokenType.lcurly); 575 l.popFront(); 576 assert(!l.empty); 577 assert(l.front.type == TokenType.name); 578 l.popFront(); 579 assert(!l.empty); 580 assert(l.front.type == TokenType.lcurly); 581 l.popFront(); 582 assert(!l.empty); 583 assert(l.front.type == TokenType.name); 584 l.popFront(); 585 assert(!l.empty); 586 assert(l.front.type == TokenType.rcurly); 587 l.popFront(); 588 assert(!l.empty); 589 assert(l.front.type == TokenType.rcurly); 590 l.popFront(); 591 assert(!l.empty); 592 assert(l.front.type == TokenType.rcurly); 593 l.popFront(); 594 assert(l.empty); 595 } 596 597 unittest { 598 string f = ` 599 query withFragments { 600 user(id: +4) { 601 # super cool comment 602 friends(first: -10.3) { 603 ...friendFields 604 null false true 605 } 606 mutualFriends(first: 10) { 607 ...friendFields 608 } 609 } 610 } 611 612 fragment friendFields on User { 613 id 614 name 615 profilePic(size: 50) 616 }`; 617 auto l = Lexer(f); 618 assert(!l.empty); 619 assert(l.front.type == TokenType.query); 620 l.popFront(); 621 assert(!l.empty); 622 assert(l.front.type == TokenType.name); 623 assert(l.front.value == "withFragments"); 624 l.popFront(); 625 l.popFront(); 626 assert(!l.empty); 627 assert(l.front.type == TokenType.name); 628 assert(l.front.value == "user"); 629 l.popFront(); 630 l.popFront(); 631 assert(!l.empty); 632 assert(l.front.type == TokenType.name); 633 assert(l.front.value == "id", l.front.value); 634 l.popFront(); 635 assert(!l.empty); 636 assert(l.front.type == TokenType.colon); 637 l.popFront(); 638 assert(!l.empty); 639 assert(l.front.type == TokenType.intValue); 640 assert(l.front.value == "+4"); 641 l.popFront(); 642 assert(!l.empty); 643 assert(l.front.type == TokenType.rparen); 644 l.popFront(); 645 l.popFront(); 646 assert(!l.empty); 647 assert(l.front.type == TokenType.name); 648 assert(l.front.value == "friends"); 649 l.popFront(); 650 assert(!l.empty); 651 assert(l.front.type == TokenType.lparen); 652 l.popFront(); 653 assert(!l.empty); 654 assert(l.front.type == TokenType.name); 655 assert(l.front.value == "first"); 656 l.popFront(); 657 l.popFront(); 658 assert(!l.empty); 659 assert(l.front.type == TokenType.floatValue, format("%s", l.front.type)); 660 assert(l.front.value == "-10.3", l.front.value); 661 l.popFront(); 662 l.popFront(); 663 l.popFront(); 664 assert(!l.empty); 665 assert(l.front.type == TokenType.dots, format("%s", l.front.type)); 666 l.popFront(); 667 assert(!l.empty); 668 assert(l.front.type == TokenType.name, format("%s", l.front.type)); 669 assert(l.front.value == "friendFields"); 670 l.popFront(); 671 assert(!l.empty); 672 assert(l.front.type == TokenType.null_, format("%s", l.front.type)); 673 l.popFront(); 674 assert(!l.empty); 675 assert(l.front.type == TokenType.false_, format("%s", l.front.type)); 676 l.popFront(); 677 assert(!l.empty); 678 assert(l.front.type == TokenType.true_, format("%s", l.front.type)); 679 while(!l.empty) { 680 l.popFront(); 681 } 682 } 683 684 unittest { 685 string f = ` 686 query withFragments { 687 user(id: "hello") { 688 } 689 }`; 690 691 auto l = Lexer(f); 692 assert(!l.empty); 693 assert(l.front.type == TokenType.query); 694 l.popFront(); 695 assert(!l.empty); 696 assert(l.front.type == TokenType.name); 697 assert(l.front.value == "withFragments"); 698 l.popFront(); 699 l.popFront(); 700 assert(!l.empty); 701 assert(l.front.type == TokenType.name); 702 assert(l.front.value == "user"); 703 l.popFront(); 704 l.popFront(); 705 assert(!l.empty); 706 assert(l.front.type == TokenType.name); 707 assert(l.front.value == "id", l.front.value); 708 l.popFront(); 709 assert(!l.empty); 710 assert(l.front.type == TokenType.colon); 711 l.popFront(); 712 assert(!l.empty); 713 assert(l.front.type == TokenType.stringValue); 714 assert(l.front.value == "hello", format("'%s' '%s'", l.front.value, "hello")); 715 l.popFront(); 716 assert(!l.empty); 717 assert(l.front.type == TokenType.rparen); 718 } 719 720 // Issue #20 721 unittest { 722 string f = `# asldf 723 # 724 { foo } 725 `; 726 727 auto l = Lexer(f); 728 assert(!l.empty); 729 assert(l.front.type == TokenType.lcurly, l.front.toString()); 730 l.popFront(); 731 assert(!l.empty); 732 assert(l.front.type == TokenType.name, l.front.toString()); 733 l.popFront(); 734 assert(!l.empty); 735 assert(l.front.type == TokenType.rcurly, l.front.toString()); 736 l.popFront(); 737 assert(l.empty); 738 }