1 module graphql.lexer;
2 
3 import std.experimental.logger;
4 import std.format : format;
5 import std.typecons : Flag;
6 import std.stdio;
7 
8 import graphql.tokenmodule;
9 
10 alias QueryParser = Flag!"QueryParser";
11 
12 struct Lexer {
13 	const QueryParser qp;
14 	string input;
15 	size_t stringPos;
16 
17 	size_t line;
18 	size_t column;
19 
20 	Token cur;
21 
22 	this(string input, QueryParser qp = QueryParser.yes) @safe {
23 		this.input = input;
24 		this.stringPos = 0;
25 		this.line = 1;
26 		this.column = 1;
27 		this.qp = qp;
28 		this.buildToken();
29 	}
30 
31 	bool isNotQueryParser() @safe const {
32 		return this.qp == QueryParser.no;
33 	}
34 
35 	private bool isTokenStop() const @safe {
36 		return this.stringPos >= this.input.length
37 			|| this.isTokenStop(this.input[this.stringPos]);
38 	}
39 
40 	private bool isTokenStop(const(char) c) const @safe {
41 		return
42 			c == ' ' || c == '\t' || c == '\n' || c == '(' || c == ')'
43 			|| c == '{' || c == '}' || c == '!' || c == '=' || c == '|'
44 			|| c == '[' || c == ':' || c == ']' || c == ',' || c == '@'
45 			|| c == '$';
46 	}
47 
48 	private bool eatComment() @safe {
49 		if(this.stringPos < this.input.length &&
50 				this.input[this.stringPos] == '#')
51 		{
52 			++this.stringPos;
53 			while(this.stringPos < this.input.length &&
54 				this.input[this.stringPos] != '\n')
55 			{
56 				++this.stringPos;
57 			}
58 			++this.stringPos;
59 			++this.line;
60 			this.column = 1;
61 			return true;
62 		} else {
63 			return false;
64 		}
65 	}
66 
67 	private void eatWhitespace() @safe {
68 		import std.ascii : isWhite;
69 		while(this.stringPos < this.input.length) {
70 			if(this.eatComment()) {
71 				continue;
72 			} else if(this.input[this.stringPos] == ' ') {
73 				++this.column;
74 			} else if(this.input[this.stringPos] == '\t') {
75 				++this.column;
76 			} else if(this.input[this.stringPos] == '\n') {
77 				this.column = 1;
78 				++this.line;
79 			} else {
80 				break;
81 			}
82 			++this.stringPos;
83 		}
84 	}
85 
86 	private void buildToken() @safe {
87 		import std.ascii : isAlphaNum;
88 		this.eatWhitespace();
89 
90 		if(this.stringPos >= this.input.length) {
91 			this.cur = Token(TokenType.undefined);
92 			return;
93 		}
94 
95 		if(this.input[this.stringPos] == ')') {
96 			this.cur = Token(TokenType.rparen, this.line, this.column);
97 			++this.column;
98 			++this.stringPos;
99 		} else if(this.input[this.stringPos] == '(') {
100 			this.cur = Token(TokenType.lparen, this.line, this.column);
101 			++this.column;
102 			++this.stringPos;
103 		} else if(this.input[this.stringPos] == ']') {
104 			this.cur = Token(TokenType.rbrack, this.line, this.column);
105 			++this.column;
106 			++this.stringPos;
107 		} else if(this.input[this.stringPos] == '[') {
108 			this.cur = Token(TokenType.lbrack, this.line, this.column);
109 			++this.column;
110 			++this.stringPos;
111 		} else if(this.input[this.stringPos] == '}') {
112 			this.cur = Token(TokenType.rcurly, this.line, this.column);
113 			++this.column;
114 			++this.stringPos;
115 		} else if(this.input[this.stringPos] == '$') {
116 			this.cur = Token(TokenType.dollar, this.line, this.column);
117 			++this.column;
118 			++this.stringPos;
119 		} else if(this.input[this.stringPos] == '!') {
120 			this.cur = Token(TokenType.exclamation, this.line, this.column);
121 			++this.column;
122 			++this.stringPos;
123 		} else if(this.input[this.stringPos] == '{') {
124 			this.cur = Token(TokenType.lcurly, this.line, this.column);
125 			++this.column;
126 			++this.stringPos;
127 		} else if(this.input[this.stringPos] == '|') {
128 			this.cur = Token(TokenType.pipe, this.line, this.column);
129 			++this.column;
130 			++this.stringPos;
131 		} else if(this.input[this.stringPos] == '@') {
132 			this.cur = Token(TokenType.at, this.line, this.column);
133 			++this.column;
134 			++this.stringPos;
135 		} else if(this.input[this.stringPos] == ',') {
136 			this.cur = Token(TokenType.comma, this.line, this.column);
137 			++this.column;
138 			++this.stringPos;
139 		} else if(this.input[this.stringPos] == '=') {
140 			this.cur = Token(TokenType.equal, this.line, this.column);
141 			++this.column;
142 			++this.stringPos;
143 		} else if(this.input[this.stringPos] == ':') {
144 			this.cur = Token(TokenType.colon, this.line, this.column);
145 			++this.column;
146 			++this.stringPos;
147 		} else {
148 			size_t b = this.stringPos;
149 			size_t e = this.stringPos;
150 			switch(this.input[this.stringPos]) {
151 				case 'm':
152 					++this.stringPos;
153 					++this.column;
154 					++e;
155 					if(this.testStrAndInc!"utation"(e)) {
156 						if(this.isTokenStop()) {
157 							this.cur = Token(TokenType.mutation, this.line,
158 										this.column);
159 							return;
160 						}
161 					}
162 					goto default;
163 				case 's':
164 					++this.stringPos;
165 					++this.column;
166 					++e;
167 					if(this.isNotQueryParser() &&
168 							this.testStrAndInc!"ubscription"(e))
169 					{
170 						if(this.isTokenStop()) {
171 							this.cur =
172 								Token(TokenType.subscription,
173 										this.line,
174 										this.column);
175 							return;
176 						}
177 					} else if(this.isNotQueryParser()
178 								&& this.testCharAndInc('c', e))
179 					{
180 						if(this.testStrAndInc!"alar"(e)) {
181 							if(this.isTokenStop()) {
182 								this.cur = Token(TokenType.scalar, this.line, this.column);
183 								return;
184 							}
185 						} else if(this.isNotQueryParser()
186 									&& this.testStrAndInc!"hema"(e))
187 						{
188 							if(this.isTokenStop()) {
189 								this.cur = Token(TokenType.schema, this.line, this.column);
190 								return;
191 							}
192 						}
193 					}
194 					goto default;
195 				case 'o':
196 					++this.stringPos;
197 					++this.column;
198 					++e;
199 					if(this.testCharAndInc('n', e)) {
200 						if(this.isTokenStop()) {
201 							this.cur = Token(TokenType.on_, this.line,
202 									this.column);
203 							return;
204 						}
205 					}
206 					goto default;
207 				case 'd':
208 					++this.stringPos;
209 					++this.column;
210 					++e;
211 					if(this.testStrAndInc!"irective"(e)) {
212 						if(this.isTokenStop()) {
213 							this.cur = Token(TokenType.directive,
214 									this.line, this.column);
215 							return;
216 						}
217 					}
218 					goto default;
219 				case 'e':
220 					++this.stringPos;
221 					++this.column;
222 					++e;
223 					if(this.testStrAndInc!"num"(e)) {
224 						if(this.isTokenStop()) {
225 							this.cur = Token(TokenType.enum_,
226 									this.line, this.column);
227 							return;
228 						}
229 					} else if(this.testStrAndInc!"xtend"(e)) {
230 						if(this.isTokenStop()) {
231 							this.cur = Token(TokenType.extend,
232 									this.line, this.column);
233 							return;
234 						}
235 					}
236 					goto default;
237 				case 'i':
238 					++this.stringPos;
239 					++this.column;
240 					++e;
241 					if(this.testCharAndInc('n', e)) {
242 						if(this.isNotQueryParser()
243 								&& this.testCharAndInc('p', e)
244 							)
245 						{
246 							if(this.testStrAndInc!"ut"(e)) {
247 								if(this.isTokenStop()) {
248 									this.cur = Token(TokenType.input,
249 											this.line, this.column);
250 									return;
251 								}
252 							}
253 						} else if(this.testStrAndInc!"terface"(e)) {
254 							if(this.isTokenStop()) {
255 								this.cur = Token(TokenType.interface_,
256 										this.line, this.column);
257 								return;
258 							}
259 						}
260 					} else if(this.testStrAndInc!"mplements"(e)) {
261 						if(this.isTokenStop()) {
262 							this.cur = Token(TokenType.implements,
263 									this.line, this.column);
264 							return;
265 						}
266 					}
267 
268 					goto default;
269 				case 'f':
270 					++this.stringPos;
271 					++this.column;
272 					++e;
273 					if(this.testStrAndInc!"alse"(e)) {
274 						if(this.isTokenStop()) {
275 							this.cur = Token(TokenType.false_,
276 									this.line, this.column);
277 							return;
278 						}
279 					} else if(this.testStrAndInc!"ragment"(e)) {
280 						if(this.isTokenStop()) {
281 							this.cur =
282 								Token(TokenType.fragment,
283 										this.line,
284 										this.column);
285 							return;
286 						}
287 					}
288 					goto default;
289 				case 'q':
290 					++this.stringPos;
291 					++this.column;
292 					++e;
293 					if(this.testStrAndInc!"uery"(e)) {
294 						if(this.isTokenStop()) {
295 							this.cur = Token(TokenType.query,
296 									this.line, this.column);
297 							return;
298 						}
299 					}
300 					goto default;
301 				case 't':
302 					++this.stringPos;
303 					++this.column;
304 					++e;
305 					if(this.testStrAndInc!"rue"(e)) {
306 						if(this.isTokenStop()) {
307 							this.cur = Token(TokenType.true_,
308 									this.line, this.column);
309 							return;
310 						}
311 					} else if(this.isNotQueryParser()
312 							&& this.testStrAndInc!"ype"(e))
313 					{
314 						if(this.isTokenStop()) {
315 							this.cur = Token(TokenType.type,
316 									this.line, this.column);
317 							return;
318 						}
319 					}
320 					goto default;
321 				case 'n':
322 					++this.stringPos;
323 					++this.column;
324 					++e;
325 					if(this.testStrAndInc!"ull"(e)) {
326 						if(this.isTokenStop()) {
327 							this.cur = Token(TokenType.null_,
328 									this.line, this.column);
329 							return;
330 						}
331 					}
332 					goto default;
333 				case 'u':
334 					++this.stringPos;
335 					++this.column;
336 					++e;
337 					if(this.testStrAndInc!"nion"(e)) {
338 						if(this.isTokenStop()) {
339 							this.cur = Token(TokenType.union_,
340 									this.line, this.column);
341 							return;
342 						}
343 					}
344 					goto default;
345 				case '.':
346 					++this.stringPos;
347 					++this.column;
348 					++e;
349 					if(this.testStrAndInc!".."(e)) {
350 						//if(this.stringPos < this.input.length
351 						//	&& isAlphaNum(this.input[this.stringPos]))
352 						if(this.isTokenStop()
353 								|| (this.stringPos < this.input.length
354 									&& isAlphaNum(this.input[this.stringPos])
355 									)
356 							)
357 						{
358 							this.cur = Token(TokenType.dots, this.line,
359 									this.column);
360 							return;
361 						}
362 					}
363 					throw new Exception(format(
364 							"failed to parse \"...\" at line %s column %s",
365 							this.line, this.column
366 						));
367 				case '-':
368 					++this.stringPos;
369 					++this.column;
370 					++e;
371 					goto case '0';
372 				case '+':
373 					++this.stringPos;
374 					++this.column;
375 					++e;
376 					goto case '0';
377 				case '0': .. case '9':
378 					do {
379 						++this.stringPos;
380 						++this.column;
381 						++e;
382 					} while(this.stringPos < this.input.length
383 							&& this.input[this.stringPos] >= '0'
384 							&& this.input[this.stringPos] <= '9');
385 
386 					if(this.stringPos >= this.input.length
387 							|| this.input[this.stringPos] != '.')
388 					{
389 						this.cur = Token(TokenType.intValue, this.input[b ..
390 								e], this.line, this.column);
391 						return;
392 					} else if(this.stringPos < this.input.length
393 							&& this.input[this.stringPos] == '.')
394 					{
395 						do {
396 							++this.stringPos;
397 							++this.column;
398 							++e;
399 						} while(this.stringPos < this.input.length
400 								&& this.input[this.stringPos] >= '0'
401 								&& this.input[this.stringPos] <= '9');
402 
403 						this.cur = Token(TokenType.floatValue, this.input[b ..
404 								e], this.line, this.column);
405 						return;
406 					}
407 					goto default;
408 				case '"':
409 					++this.stringPos;
410 					++this.column;
411 					++e;
412 					while(this.stringPos < this.input.length
413 							&& (this.input[this.stringPos] != '"'
414 								|| (this.input[this.stringPos] == '"'
415 									&& this.input[this.stringPos - 1U] == '\\')
416 						 		)
417 						)
418 					{
419 						++this.stringPos;
420 						++this.column;
421 						++e;
422 					}
423 					++this.stringPos;
424 					++this.column;
425 					this.cur = Token(TokenType.stringValue, this.input[b + 1
426 							.. e], this.line, this.column);
427 					break;
428 				default:
429 					while(!this.isTokenStop()) {
430 						//writefln("455 '%s'", this.input[this.stringPos]);
431 						++this.stringPos;
432 						++this.column;
433 						++e;
434 					}
435 					//writefln("%s %s %s '%s'", b, e, this.stringPos, this.input[b .. e]);
436 					//do {
437 					//	writefln("'%s'", this.input[this.stringPos]);
438 					//	++this.stringPos;
439 					//	++this.column;
440 					//	++e;
441 					//} while(!this.isTokenStop());
442 					//writefln("%s %s", TokenType.name, this.input[b .. e]);
443 					this.cur = Token(TokenType.name, this.input[b .. e],
444 							this.line, this.column
445 						);
446 					break;
447 			}
448 		}
449 	}
450 
451 	bool testCharAndInc(const(char) c, ref size_t e) @safe {
452 		if(this.stringPos < this.input.length
453 				&& this.input[this.stringPos] == c)
454 		{
455 			++this.column;
456 			++this.stringPos;
457 			++e;
458 			return true;
459 		} else {
460 			return false;
461 		}
462 	}
463 
464 	bool testStrAndInc(string s)(ref size_t e) @safe {
465 		for(size_t i = 0; i < s.length; ++i) {
466 			if(this.stringPos < this.input.length
467 					&& this.input[this.stringPos] == s[i])
468 			{
469 				++this.column;
470 				++this.stringPos;
471 				++e;
472 			} else {
473 				return false;
474 			}
475 		}
476 		return true;
477 	}
478 
479 	@property bool empty() const @safe {
480 		return this.stringPos >= this.input.length
481 			&& this.cur.type == TokenType.undefined;
482 	}
483 
484 	Token front() @property @safe {
485 		return this.cur;
486 	}
487 
488 	@property Token front() const @safe @nogc pure {
489 		return this.cur;
490 	}
491 
492 	void popFront() @safe {
493 		this.buildToken();
494 	}
495 
496 	string getRestOfInput() const @safe {
497 		return this.input[this.stringPos .. $];
498 	}
499 }
500 
501 unittest {
502 	string f = "f ";
503 	auto l = Lexer(f);
504 	assert(!l.empty);
505 	assert(l.front.type == TokenType.name);
506 	assert(l.front.value == "f", format("'%s'", l.front.value));
507 }
508 
509 unittest {
510 	string f = "... ";
511 
512 	auto l = Lexer(f);
513 	assert(!l.empty);
514 	assert(l.front.type == TokenType.dots);
515 	l.popFront();
516 	assert(l.empty);
517 }
518 
519 unittest {
520 	string f = "name! ";
521 	auto l = Lexer(f);
522 	assert(!l.empty);
523 	assert(l.front.type == TokenType.name);
524 	assert(l.front.value == "name", format("'%s'", l.front.value));
525 	l.popFront();
526 	assert(!l.empty);
527 	assert(l.front.type == TokenType.exclamation);
528 	l.popFront();
529 	assert(l.empty);
530 }
531 
532 unittest {
533 	string f = "fragment";
534 	const l = Lexer(f);
535 	assert(!l.empty);
536 	assert(l.front.type == TokenType.fragment);
537 }
538 
539 unittest {
540 	string f = `
541 		mutation {
542 		  likeStory(storyID: 12345) {
543 		    story {
544 		      likeCount
545 		    }
546 		  }
547 		}`;
548 	auto l = Lexer(f);
549 	assert(!l.empty);
550 	assert(l.front.type == TokenType.mutation);
551 	l.popFront();
552 	assert(!l.empty);
553 	assert(l.front.type == TokenType.lcurly);
554 	l.popFront();
555 	assert(!l.empty);
556 	assert(l.front.type == TokenType.name);
557 	l.popFront();
558 	assert(!l.empty);
559 	assert(l.front.type == TokenType.lparen);
560 	l.popFront();
561 	assert(!l.empty);
562 	assert(l.front.type == TokenType.name);
563 	l.popFront();
564 	assert(!l.empty);
565 	assert(l.front.type == TokenType.colon, format("%s", l.front.type));
566 	l.popFront();
567 	assert(!l.empty);
568 	assert(l.front.type == TokenType.intValue);
569 	l.popFront();
570 	assert(!l.empty);
571 	assert(l.front.type == TokenType.rparen);
572 	l.popFront();
573 	assert(!l.empty);
574 	assert(l.front.type == TokenType.lcurly);
575 	l.popFront();
576 	assert(!l.empty);
577 	assert(l.front.type == TokenType.name);
578 	l.popFront();
579 	assert(!l.empty);
580 	assert(l.front.type == TokenType.lcurly);
581 	l.popFront();
582 	assert(!l.empty);
583 	assert(l.front.type == TokenType.name);
584 	l.popFront();
585 	assert(!l.empty);
586 	assert(l.front.type == TokenType.rcurly);
587 	l.popFront();
588 	assert(!l.empty);
589 	assert(l.front.type == TokenType.rcurly);
590 	l.popFront();
591 	assert(!l.empty);
592 	assert(l.front.type == TokenType.rcurly);
593 	l.popFront();
594 	assert(l.empty);
595 }
596 
597 unittest {
598 	string f = `
599 		query withFragments {
600 		  user(id: +4) {
601 			# super cool comment
602 friends(first: -10.3) {
603 		      ...friendFields
604 			  null false true
605 		    }
606 		    mutualFriends(first: 10) {
607 		      ...friendFields
608 		    }
609 		  }
610 		}
611 
612 		fragment friendFields on User {
613 		  id
614 		  name
615 		  profilePic(size: 50)
616 		}`;
617 	auto l = Lexer(f);
618 	assert(!l.empty);
619 	assert(l.front.type == TokenType.query);
620 	l.popFront();
621 	assert(!l.empty);
622 	assert(l.front.type == TokenType.name);
623 	assert(l.front.value == "withFragments");
624 	l.popFront();
625 	l.popFront();
626 	assert(!l.empty);
627 	assert(l.front.type == TokenType.name);
628 	assert(l.front.value == "user");
629 	l.popFront();
630 	l.popFront();
631 	assert(!l.empty);
632 	assert(l.front.type == TokenType.name);
633 	assert(l.front.value == "id", l.front.value);
634 	l.popFront();
635 	assert(!l.empty);
636 	assert(l.front.type == TokenType.colon);
637 	l.popFront();
638 	assert(!l.empty);
639 	assert(l.front.type == TokenType.intValue);
640 	assert(l.front.value == "+4");
641 	l.popFront();
642 	assert(!l.empty);
643 	assert(l.front.type == TokenType.rparen);
644 	l.popFront();
645 	l.popFront();
646 	assert(!l.empty);
647 	assert(l.front.type == TokenType.name);
648 	assert(l.front.value == "friends");
649 	l.popFront();
650 	assert(!l.empty);
651 	assert(l.front.type == TokenType.lparen);
652 	l.popFront();
653 	assert(!l.empty);
654 	assert(l.front.type == TokenType.name);
655 	assert(l.front.value == "first");
656 	l.popFront();
657 	l.popFront();
658 	assert(!l.empty);
659 	assert(l.front.type == TokenType.floatValue, format("%s", l.front.type));
660 	assert(l.front.value == "-10.3", l.front.value);
661 	l.popFront();
662 	l.popFront();
663 	l.popFront();
664 	assert(!l.empty);
665 	assert(l.front.type == TokenType.dots, format("%s", l.front.type));
666 	l.popFront();
667 	assert(!l.empty);
668 	assert(l.front.type == TokenType.name, format("%s", l.front.type));
669 	assert(l.front.value == "friendFields");
670 	l.popFront();
671 	assert(!l.empty);
672 	assert(l.front.type == TokenType.null_, format("%s", l.front.type));
673 	l.popFront();
674 	assert(!l.empty);
675 	assert(l.front.type == TokenType.false_, format("%s", l.front.type));
676 	l.popFront();
677 	assert(!l.empty);
678 	assert(l.front.type == TokenType.true_, format("%s", l.front.type));
679 	while(!l.empty) {
680 		l.popFront();
681 	}
682 }
683 
684 unittest {
685 	string f = `
686 		query withFragments {
687 		  user(id: "hello") {
688 		  }
689 		}`;
690 
691 	auto l = Lexer(f);
692 	assert(!l.empty);
693 	assert(l.front.type == TokenType.query);
694 	l.popFront();
695 	assert(!l.empty);
696 	assert(l.front.type == TokenType.name);
697 	assert(l.front.value == "withFragments");
698 	l.popFront();
699 	l.popFront();
700 	assert(!l.empty);
701 	assert(l.front.type == TokenType.name);
702 	assert(l.front.value == "user");
703 	l.popFront();
704 	l.popFront();
705 	assert(!l.empty);
706 	assert(l.front.type == TokenType.name);
707 	assert(l.front.value == "id", l.front.value);
708 	l.popFront();
709 	assert(!l.empty);
710 	assert(l.front.type == TokenType.colon);
711 	l.popFront();
712 	assert(!l.empty);
713 	assert(l.front.type == TokenType.stringValue);
714 	assert(l.front.value == "hello", format("'%s' '%s'", l.front.value, "hello"));
715 	l.popFront();
716 	assert(!l.empty);
717 	assert(l.front.type == TokenType.rparen);
718 }
719 
720 // Issue #20
721 unittest {
722 	string f = `# asldf
723 #
724 { foo }
725 `;
726 
727 	auto l = Lexer(f);
728 	assert(!l.empty);
729 	assert(l.front.type == TokenType.lcurly, l.front.toString());
730 	l.popFront();
731 	assert(!l.empty);
732 	assert(l.front.type == TokenType.name, l.front.toString());
733 	l.popFront();
734 	assert(!l.empty);
735 	assert(l.front.type == TokenType.rcurly, l.front.toString());
736 	l.popFront();
737 	assert(l.empty);
738 }