1 module graphql.lexer;
2 
3 import std.experimental.logger;
4 import std.format : format;
5 import std.typecons : Flag;
6 import std.stdio;
7 
8 import graphql.tokenmodule;
9 
10 alias QueryParser = Flag!"QueryParser";
11 
12 struct Lexer {
13 	const QueryParser qp;
14 	string input;
15 	size_t stringPos;
16 
17 	size_t line;
18 	size_t column;
19 
20 	Token cur;
21 
22 	this(string input, QueryParser qp = QueryParser.yes) @safe {
23 		this.input = input;
24 		this.stringPos = 0;
25 		this.line = 1;
26 		this.column = 1;
27 		this.qp = qp;
28 		this.buildToken();
29 	}
30 
31 	bool isNotQueryParser() @safe const {
32 		return this.qp == QueryParser.no;
33 	}
34 
35 	private bool isTokenStop() const @safe {
36 		return this.stringPos >= this.input.length
37 			|| this.isTokenStop(this.input[this.stringPos]);
38 	}
39 
40 	private bool isTokenStop(const(char) c) const @safe {
41 		import std.ascii : isWhite;
42 		import std.algorithm.searching : canFind;
43 		return isWhite(c) || "(){}!=|[:],@$".canFind(c);
44 	}
45 
46 	private bool eatComment() @safe {
47 		if(this.stringPos < this.input.length &&
48 				this.input[this.stringPos] == '#')
49 		{
50 			++this.stringPos;
51 			while(this.stringPos < this.input.length &&
52 				this.input[this.stringPos] != '\n')
53 			{
54 				++this.stringPos;
55 			}
56 			++this.stringPos;
57 			++this.line;
58 			this.column = 1;
59 			return true;
60 		} else {
61 			return false;
62 		}
63 	}
64 
65 	private void eatWhitespace() @safe {
66 		import std.ascii : isWhite;
67 		while(this.stringPos < this.input.length) {
68 			if(this.eatComment()) {
69 				continue;
70 			} else if(this.input[this.stringPos] == '\n') {
71 				this.column = 1;
72 				++this.line;
73 			} else if(this.input[this.stringPos].isWhite) {
74 				++this.column;
75 			} else {
76 				break;
77 			}
78 			++this.stringPos;
79 		}
80 	}
81 
82 	private void buildToken() @safe {
83 		import std.ascii : isAlphaNum;
84 		this.eatWhitespace();
85 
86 		if(this.stringPos >= this.input.length) {
87 			this.cur = Token(TokenType.undefined);
88 			return;
89 		}
90 
91 		if(this.input[this.stringPos] == ')') {
92 			this.cur = Token(TokenType.rparen, this.line, this.column);
93 			++this.column;
94 			++this.stringPos;
95 		} else if(this.input[this.stringPos] == '(') {
96 			this.cur = Token(TokenType.lparen, this.line, this.column);
97 			++this.column;
98 			++this.stringPos;
99 		} else if(this.input[this.stringPos] == ']') {
100 			this.cur = Token(TokenType.rbrack, this.line, this.column);
101 			++this.column;
102 			++this.stringPos;
103 		} else if(this.input[this.stringPos] == '[') {
104 			this.cur = Token(TokenType.lbrack, this.line, this.column);
105 			++this.column;
106 			++this.stringPos;
107 		} else if(this.input[this.stringPos] == '}') {
108 			this.cur = Token(TokenType.rcurly, this.line, this.column);
109 			++this.column;
110 			++this.stringPos;
111 		} else if(this.input[this.stringPos] == '$') {
112 			this.cur = Token(TokenType.dollar, this.line, this.column);
113 			++this.column;
114 			++this.stringPos;
115 		} else if(this.input[this.stringPos] == '!') {
116 			this.cur = Token(TokenType.exclamation, this.line, this.column);
117 			++this.column;
118 			++this.stringPos;
119 		} else if(this.input[this.stringPos] == '{') {
120 			this.cur = Token(TokenType.lcurly, this.line, this.column);
121 			++this.column;
122 			++this.stringPos;
123 		} else if(this.input[this.stringPos] == '|') {
124 			this.cur = Token(TokenType.pipe, this.line, this.column);
125 			++this.column;
126 			++this.stringPos;
127 		} else if(this.input[this.stringPos] == '@') {
128 			this.cur = Token(TokenType.at, this.line, this.column);
129 			++this.column;
130 			++this.stringPos;
131 		} else if(this.input[this.stringPos] == ',') {
132 			this.cur = Token(TokenType.comma, this.line, this.column);
133 			++this.column;
134 			++this.stringPos;
135 		} else if(this.input[this.stringPos] == '=') {
136 			this.cur = Token(TokenType.equal, this.line, this.column);
137 			++this.column;
138 			++this.stringPos;
139 		} else if(this.input[this.stringPos] == ':') {
140 			this.cur = Token(TokenType.colon, this.line, this.column);
141 			++this.column;
142 			++this.stringPos;
143 		} else {
144 			size_t b = this.stringPos;
145 			size_t e = this.stringPos;
146 			switch(this.input[this.stringPos]) {
147 				case 'm':
148 					++this.stringPos;
149 					++this.column;
150 					++e;
151 					if(this.testStrAndInc!"utation"(e)) {
152 						if(this.isTokenStop()) {
153 							this.cur = Token(TokenType.mutation, this.line,
154 										this.column);
155 							return;
156 						}
157 					}
158 					goto default;
159 				case 's':
160 					++this.stringPos;
161 					++this.column;
162 					++e;
163 					if(this.isNotQueryParser() &&
164 							this.testStrAndInc!"ubscription"(e))
165 					{
166 						if(this.isTokenStop()) {
167 							this.cur =
168 								Token(TokenType.subscription,
169 										this.line,
170 										this.column);
171 							return;
172 						}
173 					} else if(this.isNotQueryParser()
174 								&& this.testCharAndInc('c', e))
175 					{
176 						if(this.testStrAndInc!"alar"(e)) {
177 							if(this.isTokenStop()) {
178 								this.cur = Token(TokenType.scalar, this.line, this.column);
179 								return;
180 							}
181 						} else if(this.isNotQueryParser()
182 									&& this.testStrAndInc!"hema"(e))
183 						{
184 							if(this.isTokenStop()) {
185 								this.cur = Token(TokenType.schema, this.line, this.column);
186 								return;
187 							}
188 						}
189 					}
190 					goto default;
191 				case 'o':
192 					++this.stringPos;
193 					++this.column;
194 					++e;
195 					if(this.testCharAndInc('n', e)) {
196 						if(this.isTokenStop()) {
197 							this.cur = Token(TokenType.on_, this.line,
198 									this.column);
199 							return;
200 						}
201 					}
202 					goto default;
203 				case 'd':
204 					++this.stringPos;
205 					++this.column;
206 					++e;
207 					if(this.testStrAndInc!"irective"(e)) {
208 						if(this.isTokenStop()) {
209 							this.cur = Token(TokenType.directive,
210 									this.line, this.column);
211 							return;
212 						}
213 					}
214 					goto default;
215 				case 'e':
216 					++this.stringPos;
217 					++this.column;
218 					++e;
219 					if(this.testStrAndInc!"num"(e)) {
220 						if(this.isTokenStop()) {
221 							this.cur = Token(TokenType.enum_,
222 									this.line, this.column);
223 							return;
224 						}
225 					} else if(this.testStrAndInc!"xtend"(e)) {
226 						if(this.isTokenStop()) {
227 							this.cur = Token(TokenType.extend,
228 									this.line, this.column);
229 							return;
230 						}
231 					}
232 					goto default;
233 				case 'i':
234 					++this.stringPos;
235 					++this.column;
236 					++e;
237 					if(this.testCharAndInc('n', e)) {
238 						if(this.isNotQueryParser()
239 								&& this.testCharAndInc('p', e)
240 							)
241 						{
242 							if(this.testStrAndInc!"ut"(e)) {
243 								if(this.isTokenStop()) {
244 									this.cur = Token(TokenType.input,
245 											this.line, this.column);
246 									return;
247 								}
248 							}
249 						} else if(this.testStrAndInc!"terface"(e)) {
250 							if(this.isTokenStop()) {
251 								this.cur = Token(TokenType.interface_,
252 										this.line, this.column);
253 								return;
254 							}
255 						}
256 					} else if(this.testStrAndInc!"mplements"(e)) {
257 						if(this.isTokenStop()) {
258 							this.cur = Token(TokenType.implements,
259 									this.line, this.column);
260 							return;
261 						}
262 					}
263 
264 					goto default;
265 				case 'f':
266 					++this.stringPos;
267 					++this.column;
268 					++e;
269 					if(this.testStrAndInc!"alse"(e)) {
270 						if(this.isTokenStop()) {
271 							this.cur = Token(TokenType.false_,
272 									this.line, this.column);
273 							return;
274 						}
275 					} else if(this.testStrAndInc!"ragment"(e)) {
276 						if(this.isTokenStop()) {
277 							this.cur =
278 								Token(TokenType.fragment,
279 										this.line,
280 										this.column);
281 							return;
282 						}
283 					}
284 					goto default;
285 				case 'q':
286 					++this.stringPos;
287 					++this.column;
288 					++e;
289 					if(this.testStrAndInc!"uery"(e)) {
290 						if(this.isTokenStop()) {
291 							this.cur = Token(TokenType.query,
292 									this.line, this.column);
293 							return;
294 						}
295 					}
296 					goto default;
297 				case 't':
298 					++this.stringPos;
299 					++this.column;
300 					++e;
301 					if(this.testStrAndInc!"rue"(e)) {
302 						if(this.isTokenStop()) {
303 							this.cur = Token(TokenType.true_,
304 									this.line, this.column);
305 							return;
306 						}
307 					} else if(this.isNotQueryParser()
308 							&& this.testStrAndInc!"ype"(e))
309 					{
310 						if(this.isTokenStop()) {
311 							this.cur = Token(TokenType.type,
312 									this.line, this.column);
313 							return;
314 						}
315 					}
316 					goto default;
317 				case 'n':
318 					++this.stringPos;
319 					++this.column;
320 					++e;
321 					if(this.testStrAndInc!"ull"(e)) {
322 						if(this.isTokenStop()) {
323 							this.cur = Token(TokenType.null_,
324 									this.line, this.column);
325 							return;
326 						}
327 					}
328 					goto default;
329 				case 'u':
330 					++this.stringPos;
331 					++this.column;
332 					++e;
333 					if(this.testStrAndInc!"nion"(e)) {
334 						if(this.isTokenStop()) {
335 							this.cur = Token(TokenType.union_,
336 									this.line, this.column);
337 							return;
338 						}
339 					}
340 					goto default;
341 				case '.':
342 					++this.stringPos;
343 					++this.column;
344 					++e;
345 					if(this.testStrAndInc!".."(e)) {
346 						if(this.isTokenStop()
347 								|| (this.stringPos < this.input.length
348 									&& isAlphaNum(this.input[this.stringPos])
349 									)
350 							)
351 						{
352 							this.cur = Token(TokenType.dots, this.line,
353 									this.column);
354 							return;
355 						}
356 					}
357 					throw new Exception(format(
358 							"failed to parse \"...\" at line %s column %s",
359 							this.line, this.column
360 						));
361 				case '-':
362 					++this.stringPos;
363 					++this.column;
364 					++e;
365 					goto case '0';
366 				case '+':
367 					++this.stringPos;
368 					++this.column;
369 					++e;
370 					goto case '0';
371 				case '0': .. case '9':
372 					do {
373 						++this.stringPos;
374 						++this.column;
375 						++e;
376 					} while(this.stringPos < this.input.length
377 							&& this.input[this.stringPos] >= '0'
378 							&& this.input[this.stringPos] <= '9');
379 
380 					if(this.stringPos >= this.input.length
381 							|| this.input[this.stringPos] != '.')
382 					{
383 						this.cur = Token(TokenType.intValue, this.input[b ..
384 								e], this.line, this.column);
385 						return;
386 					} else if(this.stringPos < this.input.length
387 							&& this.input[this.stringPos] == '.')
388 					{
389 						do {
390 							++this.stringPos;
391 							++this.column;
392 							++e;
393 						} while(this.stringPos < this.input.length
394 								&& this.input[this.stringPos] >= '0'
395 								&& this.input[this.stringPos] <= '9');
396 
397 						this.cur = Token(TokenType.floatValue, this.input[b ..
398 								e], this.line, this.column);
399 						return;
400 					}
401 					goto default;
402 				case '"':
403 					++this.stringPos;
404 					++this.column;
405 					++e;
406 					if(this.qp == QueryParser.no
407 							&& this.testStrAndInc!("\"\"")(e))
408 					{
409 						while(!this.testStrAndInc!("\"\"\"")(e)) {
410 							if(this.input[this.stringPos] == '\n') {
411 								this.column = 1;
412 								++this.line;
413 
414 							} else {
415 								++this.column;
416 							}
417 							++this.stringPos;
418 							++e;
419 						}
420 						this.cur = Token(TokenType.stringValue, this.input[b + 3
421 								.. e - 3], this.line, this.column);
422 					} else {
423 						while(this.stringPos < this.input.length
424 								&& (this.input[this.stringPos] != '"'
425 									|| (this.input[this.stringPos] == '"'
426 										&& this.input[this.stringPos - 1U] == '\\')
427 							 		)
428 							)
429 						{
430 							++this.stringPos;
431 							++this.column;
432 							++e;
433 						}
434 						++this.stringPos;
435 						++this.column;
436 						this.cur = Token(TokenType.stringValue, this.input[b + 1
437 								.. e], this.line, this.column);
438 					}
439 					break;
440 				default:
441 					while(!this.isTokenStop()) {
442 						//writefln("455 '%s'", this.input[this.stringPos]);
443 						++this.stringPos;
444 						++this.column;
445 						++e;
446 					}
447 					this.cur = Token(TokenType.name, this.input[b .. e],
448 							this.line, this.column
449 						);
450 					break;
451 			}
452 		}
453 	}
454 
455 	bool testCharAndInc(const(char) c, ref size_t e) @safe {
456 		if(this.stringPos < this.input.length
457 				&& this.input[this.stringPos] == c)
458 		{
459 			++this.column;
460 			++this.stringPos;
461 			++e;
462 			return true;
463 		} else {
464 			return false;
465 		}
466 	}
467 
468 	bool testStrAndInc(string s)(ref size_t e) @safe {
469 		for(size_t i = 0; i < s.length; ++i) {
470 			if(this.stringPos < this.input.length
471 					&& this.input[this.stringPos] == s[i])
472 			{
473 				++this.column;
474 				++this.stringPos;
475 				++e;
476 			} else {
477 				return false;
478 			}
479 		}
480 
481 		return true;
482 	}
483 
484 	@property bool empty() const @safe {
485 		return this.stringPos >= this.input.length
486 			&& this.cur.type == TokenType.undefined;
487 	}
488 
489 	Token front() @property @safe {
490 		return this.cur;
491 	}
492 
493 	@property Token front() const @safe @nogc pure {
494 		return this.cur;
495 	}
496 
497 	void popFront() @safe {
498 		this.buildToken();
499 	}
500 
501 	string getRestOfInput() const @safe {
502 		return this.input[this.stringPos .. $];
503 	}
504 }
505 
506 unittest {
507 	string f = "f ";
508 	auto l = Lexer(f);
509 	assert(!l.empty);
510 	assert(l.front.type == TokenType.name);
511 	assert(l.front.value == "f", format("'%s'", l.front.value));
512 }
513 
514 unittest {
515 	string f = "... ";
516 
517 	auto l = Lexer(f);
518 	assert(!l.empty);
519 	assert(l.front.type == TokenType.dots);
520 	l.popFront();
521 	assert(l.empty);
522 }
523 
524 unittest {
525 	string f = "name! ";
526 	auto l = Lexer(f);
527 	assert(!l.empty);
528 	assert(l.front.type == TokenType.name);
529 	assert(l.front.value == "name", format("'%s'", l.front.value));
530 	l.popFront();
531 	assert(!l.empty);
532 	assert(l.front.type == TokenType.exclamation);
533 	l.popFront();
534 	assert(l.empty);
535 }
536 
537 unittest {
538 	string f = "fragment";
539 	const l = Lexer(f);
540 	assert(!l.empty);
541 	assert(l.front.type == TokenType.fragment);
542 }
543 
544 unittest {
545 	string f = `
546 		mutation {
547 		  likeStory(storyID: 12345) {
548 		    story {
549 		      likeCount
550 		    }
551 		  }
552 		}`;
553 	auto l = Lexer(f);
554 	assert(!l.empty);
555 	assert(l.front.type == TokenType.mutation);
556 	l.popFront();
557 	assert(!l.empty);
558 	assert(l.front.type == TokenType.lcurly);
559 	l.popFront();
560 	assert(!l.empty);
561 	assert(l.front.type == TokenType.name);
562 	l.popFront();
563 	assert(!l.empty);
564 	assert(l.front.type == TokenType.lparen);
565 	l.popFront();
566 	assert(!l.empty);
567 	assert(l.front.type == TokenType.name);
568 	l.popFront();
569 	assert(!l.empty);
570 	assert(l.front.type == TokenType.colon, format("%s", l.front.type));
571 	l.popFront();
572 	assert(!l.empty);
573 	assert(l.front.type == TokenType.intValue);
574 	l.popFront();
575 	assert(!l.empty);
576 	assert(l.front.type == TokenType.rparen);
577 	l.popFront();
578 	assert(!l.empty);
579 	assert(l.front.type == TokenType.lcurly);
580 	l.popFront();
581 	assert(!l.empty);
582 	assert(l.front.type == TokenType.name);
583 	l.popFront();
584 	assert(!l.empty);
585 	assert(l.front.type == TokenType.lcurly);
586 	l.popFront();
587 	assert(!l.empty);
588 	assert(l.front.type == TokenType.name);
589 	l.popFront();
590 	assert(!l.empty);
591 	assert(l.front.type == TokenType.rcurly);
592 	l.popFront();
593 	assert(!l.empty);
594 	assert(l.front.type == TokenType.rcurly);
595 	l.popFront();
596 	assert(!l.empty);
597 	assert(l.front.type == TokenType.rcurly);
598 	l.popFront();
599 	assert(l.empty);
600 }
601 
602 unittest {
603 	string f = `
604 		query withFragments {
605 		  user(id: +4) {
606 			# super cool comment
607 friends(first: -10.3) {
608 		      ...friendFields
609 			  null false true
610 		    }
611 		    mutualFriends(first: 10) {
612 		      ...friendFields
613 		    }
614 		  }
615 		}
616 
617 		fragment friendFields on User {
618 		  id
619 		  name
620 		  profilePic(size: 50)
621 		}`;
622 	auto l = Lexer(f);
623 	assert(!l.empty);
624 	assert(l.front.type == TokenType.query);
625 	l.popFront();
626 	assert(!l.empty);
627 	assert(l.front.type == TokenType.name);
628 	assert(l.front.value == "withFragments");
629 	l.popFront();
630 	l.popFront();
631 	assert(!l.empty);
632 	assert(l.front.type == TokenType.name);
633 	assert(l.front.value == "user");
634 	l.popFront();
635 	l.popFront();
636 	assert(!l.empty);
637 	assert(l.front.type == TokenType.name);
638 	assert(l.front.value == "id", l.front.value);
639 	l.popFront();
640 	assert(!l.empty);
641 	assert(l.front.type == TokenType.colon);
642 	l.popFront();
643 	assert(!l.empty);
644 	assert(l.front.type == TokenType.intValue);
645 	assert(l.front.value == "+4");
646 	l.popFront();
647 	assert(!l.empty);
648 	assert(l.front.type == TokenType.rparen);
649 	l.popFront();
650 	l.popFront();
651 	assert(!l.empty);
652 	assert(l.front.type == TokenType.name);
653 	assert(l.front.value == "friends");
654 	l.popFront();
655 	assert(!l.empty);
656 	assert(l.front.type == TokenType.lparen);
657 	l.popFront();
658 	assert(!l.empty);
659 	assert(l.front.type == TokenType.name);
660 	assert(l.front.value == "first");
661 	l.popFront();
662 	l.popFront();
663 	assert(!l.empty);
664 	assert(l.front.type == TokenType.floatValue, format("%s", l.front.type));
665 	assert(l.front.value == "-10.3", l.front.value);
666 	l.popFront();
667 	l.popFront();
668 	l.popFront();
669 	assert(!l.empty);
670 	assert(l.front.type == TokenType.dots, format("%s", l.front.type));
671 	l.popFront();
672 	assert(!l.empty);
673 	assert(l.front.type == TokenType.name, format("%s", l.front.type));
674 	assert(l.front.value == "friendFields");
675 	l.popFront();
676 	assert(!l.empty);
677 	assert(l.front.type == TokenType.null_, format("%s", l.front.type));
678 	l.popFront();
679 	assert(!l.empty);
680 	assert(l.front.type == TokenType.false_, format("%s", l.front.type));
681 	l.popFront();
682 	assert(!l.empty);
683 	assert(l.front.type == TokenType.true_, format("%s", l.front.type));
684 	while(!l.empty) {
685 		l.popFront();
686 	}
687 }
688 
689 unittest {
690 	string f = `
691 		query withFragments {
692 		  user(id: "hello") {
693 		  }
694 		}`;
695 
696 	auto l = Lexer(f);
697 	assert(!l.empty);
698 	assert(l.front.type == TokenType.query);
699 	l.popFront();
700 	assert(!l.empty);
701 	assert(l.front.type == TokenType.name);
702 	assert(l.front.value == "withFragments");
703 	l.popFront();
704 	l.popFront();
705 	assert(!l.empty);
706 	assert(l.front.type == TokenType.name);
707 	assert(l.front.value == "user");
708 	l.popFront();
709 	l.popFront();
710 	assert(!l.empty);
711 	assert(l.front.type == TokenType.name);
712 	assert(l.front.value == "id", l.front.value);
713 	l.popFront();
714 	assert(!l.empty);
715 	assert(l.front.type == TokenType.colon);
716 	l.popFront();
717 	assert(!l.empty);
718 	assert(l.front.type == TokenType.stringValue);
719 	assert(l.front.value == "hello", format("'%s' '%s'", l.front.value, "hello"));
720 	l.popFront();
721 	assert(!l.empty);
722 	assert(l.front.type == TokenType.rparen);
723 }
724 
725 // Issue #20
726 unittest {
727 	string f = `# asldf
728 #
729 { foo }
730 `;
731 
732 	auto l = Lexer(f);
733 	assert(!l.empty);
734 	assert(l.front.type == TokenType.lcurly, l.front.toString());
735 	l.popFront();
736 	assert(!l.empty);
737 	assert(l.front.type == TokenType.name, l.front.toString());
738 	l.popFront();
739 	assert(!l.empty);
740 	assert(l.front.type == TokenType.rcurly, l.front.toString());
741 	l.popFront();
742 	assert(l.empty);
743 }
744 
745 unittest {
746 	string f = `""" a long comment """ `;
747 
748 	auto l = Lexer(f, QueryParser.no);
749 	assert(!l.empty);
750 	assert(l.front.type == TokenType.stringValue, l.front.toString());
751 	assert(l.front.value == " a long comment ", l.front.value);
752 	l.popFront();
753 	assert(l.empty);
754 }
755 
756 unittest {
757 	import std..string : indexOf;
758 
759 	string f = `""" a
760 
761 		long
762 
763 		comment """ `;
764 
765 	auto l = Lexer(f, QueryParser.no);
766 	assert(!l.empty);
767 	assert(l.front.type == TokenType.stringValue, l.front.toString());
768 	assert(l.front.value.indexOf("a") != -1);
769 	assert(l.front.value.indexOf("long") != -1);
770 	assert(l.front.value.indexOf("comment") != -1);
771 	assert(l.front.value.indexOf("\n") != -1);
772 	l.popFront();
773 	assert(l.empty);
774 }