1 module graphql.lexer;
2 
3 version(LDC) {
4 	import std.experimental.logger;
5 } else {
6 	import std.logger;
7 }
8 
9 import std.format : format;
10 import std.typecons : Flag;
11 import std.stdio;
12 
13 import graphql.tokenmodule;
14 
15 alias QueryParser = Flag!"QueryParser";
16 
17 struct Lexer {
18 	const QueryParser qp;
19 	string input;
20 	size_t stringPos;
21 
22 	size_t line;
23 	size_t column;
24 
25 	Token cur;
26 
27 	this(string input, QueryParser qp = QueryParser.yes) @safe {
28 		this.input = input;
29 		this.stringPos = 0;
30 		this.line = 1;
31 		this.column = 1;
32 		this.qp = qp;
33 		this.buildToken();
34 	}
35 
36 	bool isNotQueryParser() @safe const {
37 		return this.qp == QueryParser.no;
38 	}
39 
40 	private bool isTokenStop() const @safe {
41 		return this.stringPos >= this.input.length
42 			|| this.isTokenStop(this.input[this.stringPos]);
43 	}
44 
45 	private bool isTokenStop(const(char) c) const @safe {
46 		import std.ascii : isWhite;
47 		import std.algorithm.searching : canFind;
48 		return isWhite(c) || "(){}!=|[:],@$".canFind(c);
49 	}
50 
51 	private bool eatComment() @safe {
52 		if(this.stringPos < this.input.length &&
53 				this.input[this.stringPos] == '#')
54 		{
55 			++this.stringPos;
56 			while(this.stringPos < this.input.length &&
57 				this.input[this.stringPos] != '\n')
58 			{
59 				++this.stringPos;
60 			}
61 			++this.stringPos;
62 			++this.line;
63 			this.column = 1;
64 			return true;
65 		} else {
66 			return false;
67 		}
68 	}
69 
70 	private void eatWhitespace() @safe {
71 		import std.ascii : isWhite;
72 		while(this.stringPos < this.input.length) {
73 			if(this.eatComment()) {
74 				continue;
75 			} else if(this.input[this.stringPos] == '\n') {
76 				this.column = 1;
77 				++this.line;
78 			} else if(this.input[this.stringPos].isWhite) {
79 				++this.column;
80 			} else {
81 				break;
82 			}
83 			++this.stringPos;
84 		}
85 	}
86 
87 	private void buildToken() @safe {
88 		import std.ascii : isAlphaNum;
89 		this.eatWhitespace();
90 
91 		if(this.stringPos >= this.input.length) {
92 			this.cur = Token(TokenType.undefined);
93 			return;
94 		}
95 
96 		if(this.input[this.stringPos] == ')') {
97 			this.cur = Token(TokenType.rparen, this.line, this.column);
98 			++this.column;
99 			++this.stringPos;
100 		} else if(this.input[this.stringPos] == '(') {
101 			this.cur = Token(TokenType.lparen, this.line, this.column);
102 			++this.column;
103 			++this.stringPos;
104 		} else if(this.input[this.stringPos] == ']') {
105 			this.cur = Token(TokenType.rbrack, this.line, this.column);
106 			++this.column;
107 			++this.stringPos;
108 		} else if(this.input[this.stringPos] == '[') {
109 			this.cur = Token(TokenType.lbrack, this.line, this.column);
110 			++this.column;
111 			++this.stringPos;
112 		} else if(this.input[this.stringPos] == '}') {
113 			this.cur = Token(TokenType.rcurly, this.line, this.column);
114 			++this.column;
115 			++this.stringPos;
116 		} else if(this.input[this.stringPos] == '$') {
117 			this.cur = Token(TokenType.dollar, this.line, this.column);
118 			++this.column;
119 			++this.stringPos;
120 		} else if(this.input[this.stringPos] == '!') {
121 			this.cur = Token(TokenType.exclamation, this.line, this.column);
122 			++this.column;
123 			++this.stringPos;
124 		} else if(this.input[this.stringPos] == '{') {
125 			this.cur = Token(TokenType.lcurly, this.line, this.column);
126 			++this.column;
127 			++this.stringPos;
128 		} else if(this.input[this.stringPos] == '|') {
129 			this.cur = Token(TokenType.pipe, this.line, this.column);
130 			++this.column;
131 			++this.stringPos;
132 		} else if(this.input[this.stringPos] == '@') {
133 			this.cur = Token(TokenType.at, this.line, this.column);
134 			++this.column;
135 			++this.stringPos;
136 		} else if(this.input[this.stringPos] == ',') {
137 			this.cur = Token(TokenType.comma, this.line, this.column);
138 			++this.column;
139 			++this.stringPos;
140 		} else if(this.input[this.stringPos] == '=') {
141 			this.cur = Token(TokenType.equal, this.line, this.column);
142 			++this.column;
143 			++this.stringPos;
144 		} else if(this.input[this.stringPos] == ':') {
145 			this.cur = Token(TokenType.colon, this.line, this.column);
146 			++this.column;
147 			++this.stringPos;
148 		} else {
149 			size_t b = this.stringPos;
150 			size_t e = this.stringPos;
151 			switch(this.input[this.stringPos]) {
152 				case 'm':
153 					++this.stringPos;
154 					++this.column;
155 					++e;
156 					if(this.testStrAndInc!"utation"(e)) {
157 						if(this.isTokenStop()) {
158 							this.cur = Token(TokenType.mutation, this.line,
159 										this.column);
160 							return;
161 						}
162 					}
163 					goto default;
164 				case 's':
165 					++this.stringPos;
166 					++this.column;
167 					++e;
168 					if(this.isNotQueryParser() &&
169 							this.testStrAndInc!"ubscription"(e))
170 					{
171 						if(this.isTokenStop()) {
172 							this.cur =
173 								Token(TokenType.subscription,
174 										this.line,
175 										this.column);
176 							return;
177 						}
178 					} else if(this.isNotQueryParser()
179 								&& this.testCharAndInc('c', e))
180 					{
181 						if(this.testStrAndInc!"alar"(e)) {
182 							if(this.isTokenStop()) {
183 								this.cur = Token(TokenType.scalar, this.line, this.column);
184 								return;
185 							}
186 						} else if(this.isNotQueryParser()
187 									&& this.testStrAndInc!"hema"(e))
188 						{
189 							if(this.isTokenStop()) {
190 								this.cur = Token(TokenType.schema, this.line, this.column);
191 								return;
192 							}
193 						}
194 					}
195 					goto default;
196 				case 'o':
197 					++this.stringPos;
198 					++this.column;
199 					++e;
200 					if(this.testCharAndInc('n', e)) {
201 						if(this.isTokenStop()) {
202 							this.cur = Token(TokenType.on_, this.line,
203 									this.column);
204 							return;
205 						}
206 					}
207 					goto default;
208 				case 'd':
209 					++this.stringPos;
210 					++this.column;
211 					++e;
212 					if(this.testStrAndInc!"irective"(e)) {
213 						if(this.isTokenStop()) {
214 							this.cur = Token(TokenType.directive,
215 									this.line, this.column);
216 							return;
217 						}
218 					}
219 					goto default;
220 				case 'e':
221 					++this.stringPos;
222 					++this.column;
223 					++e;
224 					if(this.testStrAndInc!"num"(e)) {
225 						if(this.isTokenStop()) {
226 							this.cur = Token(TokenType.enum_,
227 									this.line, this.column);
228 							return;
229 						}
230 					} else if(this.testStrAndInc!"xtend"(e)) {
231 						if(this.isTokenStop()) {
232 							this.cur = Token(TokenType.extend,
233 									this.line, this.column);
234 							return;
235 						}
236 					}
237 					goto default;
238 				case 'i':
239 					++this.stringPos;
240 					++this.column;
241 					++e;
242 					if(this.testCharAndInc('n', e)) {
243 						if(this.isNotQueryParser()
244 								&& this.testCharAndInc('p', e)
245 							)
246 						{
247 							if(this.testStrAndInc!"ut"(e)) {
248 								if(this.isTokenStop()) {
249 									this.cur = Token(TokenType.input,
250 											this.line, this.column);
251 									return;
252 								}
253 							}
254 						} else if(this.testStrAndInc!"terface"(e)) {
255 							if(this.isTokenStop()) {
256 								this.cur = Token(TokenType.interface_,
257 										this.line, this.column);
258 								return;
259 							}
260 						}
261 					} else if(this.testStrAndInc!"mplements"(e)) {
262 						if(this.isTokenStop()) {
263 							this.cur = Token(TokenType.implements,
264 									this.line, this.column);
265 							return;
266 						}
267 					}
268 
269 					goto default;
270 				case 'f':
271 					++this.stringPos;
272 					++this.column;
273 					++e;
274 					if(this.testStrAndInc!"alse"(e)) {
275 						if(this.isTokenStop()) {
276 							this.cur = Token(TokenType.false_,
277 									this.line, this.column);
278 							return;
279 						}
280 					} else if(this.testStrAndInc!"ragment"(e)) {
281 						if(this.isTokenStop()) {
282 							this.cur =
283 								Token(TokenType.fragment,
284 										this.line,
285 										this.column);
286 							return;
287 						}
288 					}
289 					goto default;
290 				case 'q':
291 					++this.stringPos;
292 					++this.column;
293 					++e;
294 					if(this.testStrAndInc!"uery"(e)) {
295 						if(this.isTokenStop()) {
296 							this.cur = Token(TokenType.query,
297 									this.line, this.column);
298 							return;
299 						}
300 					}
301 					goto default;
302 				case 't':
303 					++this.stringPos;
304 					++this.column;
305 					++e;
306 					if(this.testStrAndInc!"rue"(e)) {
307 						if(this.isTokenStop()) {
308 							this.cur = Token(TokenType.true_,
309 									this.line, this.column);
310 							return;
311 						}
312 					} else if(this.isNotQueryParser()
313 							&& this.testStrAndInc!"ype"(e))
314 					{
315 						if(this.isTokenStop()) {
316 							this.cur = Token(TokenType.type,
317 									this.line, this.column);
318 							return;
319 						}
320 					}
321 					goto default;
322 				case 'n':
323 					++this.stringPos;
324 					++this.column;
325 					++e;
326 					if(this.testStrAndInc!"ull"(e)) {
327 						if(this.isTokenStop()) {
328 							this.cur = Token(TokenType.null_,
329 									this.line, this.column);
330 							return;
331 						}
332 					}
333 					goto default;
334 				case 'u':
335 					++this.stringPos;
336 					++this.column;
337 					++e;
338 					if(this.testStrAndInc!"nion"(e)) {
339 						if(this.isTokenStop()) {
340 							this.cur = Token(TokenType.union_,
341 									this.line, this.column);
342 							return;
343 						}
344 					}
345 					goto default;
346 				case '.':
347 					++this.stringPos;
348 					++this.column;
349 					++e;
350 					if(this.testStrAndInc!".."(e)) {
351 						if(this.isTokenStop()
352 								|| (this.stringPos < this.input.length
353 									&& isAlphaNum(this.input[this.stringPos])
354 									)
355 							)
356 						{
357 							this.cur = Token(TokenType.dots, this.line,
358 									this.column);
359 							return;
360 						}
361 					}
362 					throw new Exception(format(
363 							"failed to parse \"...\" at line %s column %s",
364 							this.line, this.column
365 						));
366 				case '-':
367 					++this.stringPos;
368 					++this.column;
369 					++e;
370 					goto case '0';
371 				case '+':
372 					++this.stringPos;
373 					++this.column;
374 					++e;
375 					goto case '0';
376 				case '0': .. case '9':
377 					do {
378 						++this.stringPos;
379 						++this.column;
380 						++e;
381 					} while(this.stringPos < this.input.length
382 							&& this.input[this.stringPos] >= '0'
383 							&& this.input[this.stringPos] <= '9');
384 
385 					if(this.stringPos >= this.input.length
386 							|| this.input[this.stringPos] != '.')
387 					{
388 						this.cur = Token(TokenType.intValue, this.input[b ..
389 								e], this.line, this.column);
390 						return;
391 					} else if(this.stringPos < this.input.length
392 							&& this.input[this.stringPos] == '.')
393 					{
394 						do {
395 							++this.stringPos;
396 							++this.column;
397 							++e;
398 						} while(this.stringPos < this.input.length
399 								&& this.input[this.stringPos] >= '0'
400 								&& this.input[this.stringPos] <= '9');
401 
402 						this.cur = Token(TokenType.floatValue, this.input[b ..
403 								e], this.line, this.column);
404 						return;
405 					}
406 					goto default;
407 				case '"':
408 					++this.stringPos;
409 					++this.column;
410 					++e;
411 					if(this.qp == QueryParser.no
412 							&& this.testStrAndInc!("\"\"")(e))
413 					{
414 						while(!this.testStrAndInc!("\"\"\"")(e)) {
415 							if(this.input[this.stringPos] == '\n') {
416 								this.column = 1;
417 								++this.line;
418 
419 							} else {
420 								++this.column;
421 							}
422 							++this.stringPos;
423 							++e;
424 						}
425 						this.cur = Token(TokenType.stringValue, this.input[b + 3
426 								.. e - 3], this.line, this.column);
427 					} else {
428 						while(this.stringPos < this.input.length
429 								&& (this.input[this.stringPos] != '"'
430 									|| (this.input[this.stringPos] == '"'
431 										&& this.input[this.stringPos - 1U] == '\\')
432 							 		)
433 							)
434 						{
435 							++this.stringPos;
436 							++this.column;
437 							++e;
438 						}
439 						++this.stringPos;
440 						++this.column;
441 						this.cur = Token(TokenType.stringValue, this.input[b + 1
442 								.. e], this.line, this.column);
443 					}
444 					break;
445 				default:
446 					while(!this.isTokenStop()) {
447 						//writefln("455 '%s'", this.input[this.stringPos]);
448 						++this.stringPos;
449 						++this.column;
450 						++e;
451 					}
452 					this.cur = Token(TokenType.name, this.input[b .. e],
453 							this.line, this.column
454 						);
455 					break;
456 			}
457 		}
458 	}
459 
460 	bool testCharAndInc(const(char) c, ref size_t e) @safe {
461 		if(this.stringPos < this.input.length
462 				&& this.input[this.stringPos] == c)
463 		{
464 			++this.column;
465 			++this.stringPos;
466 			++e;
467 			return true;
468 		} else {
469 			return false;
470 		}
471 	}
472 
473 	bool testStrAndInc(string s)(ref size_t e) @safe {
474 		for(size_t i = 0; i < s.length; ++i) {
475 			if(this.stringPos < this.input.length
476 					&& this.input[this.stringPos] == s[i])
477 			{
478 				++this.column;
479 				++this.stringPos;
480 				++e;
481 			} else {
482 				return false;
483 			}
484 		}
485 
486 		return true;
487 	}
488 
489 	@property bool empty() const @safe {
490 		return this.stringPos >= this.input.length
491 			&& this.cur.type == TokenType.undefined;
492 	}
493 
494 	Token front() @property @safe {
495 		return this.cur;
496 	}
497 
498 	@property Token front() const @safe @nogc pure {
499 		return this.cur;
500 	}
501 
502 	void popFront() @safe {
503 		this.buildToken();
504 	}
505 
506 	string getRestOfInput() const @safe {
507 		return this.input[this.stringPos .. $];
508 	}
509 }
510 
511 unittest {
512 	string f = "f ";
513 	auto l = Lexer(f);
514 	assert(!l.empty);
515 	assert(l.front.type == TokenType.name);
516 	assert(l.front.value == "f", format("'%s'", l.front.value));
517 }
518 
519 unittest {
520 	string f = "... ";
521 
522 	auto l = Lexer(f);
523 	assert(!l.empty);
524 	assert(l.front.type == TokenType.dots);
525 	l.popFront();
526 	assert(l.empty);
527 }
528 
529 unittest {
530 	string f = "name! ";
531 	auto l = Lexer(f);
532 	assert(!l.empty);
533 	assert(l.front.type == TokenType.name);
534 	assert(l.front.value == "name", format("'%s'", l.front.value));
535 	l.popFront();
536 	assert(!l.empty);
537 	assert(l.front.type == TokenType.exclamation);
538 	l.popFront();
539 	assert(l.empty);
540 }
541 
542 unittest {
543 	string f = "fragment";
544 	const l = Lexer(f);
545 	assert(!l.empty);
546 	assert(l.front.type == TokenType.fragment);
547 }
548 
549 unittest {
550 	string f = `
551 		mutation {
552 		  likeStory(storyID: 12345) {
553 		    story {
554 		      likeCount
555 		    }
556 		  }
557 		}`;
558 	auto l = Lexer(f);
559 	assert(!l.empty);
560 	assert(l.front.type == TokenType.mutation);
561 	l.popFront();
562 	assert(!l.empty);
563 	assert(l.front.type == TokenType.lcurly);
564 	l.popFront();
565 	assert(!l.empty);
566 	assert(l.front.type == TokenType.name);
567 	l.popFront();
568 	assert(!l.empty);
569 	assert(l.front.type == TokenType.lparen);
570 	l.popFront();
571 	assert(!l.empty);
572 	assert(l.front.type == TokenType.name);
573 	l.popFront();
574 	assert(!l.empty);
575 	assert(l.front.type == TokenType.colon, format("%s", l.front.type));
576 	l.popFront();
577 	assert(!l.empty);
578 	assert(l.front.type == TokenType.intValue);
579 	l.popFront();
580 	assert(!l.empty);
581 	assert(l.front.type == TokenType.rparen);
582 	l.popFront();
583 	assert(!l.empty);
584 	assert(l.front.type == TokenType.lcurly);
585 	l.popFront();
586 	assert(!l.empty);
587 	assert(l.front.type == TokenType.name);
588 	l.popFront();
589 	assert(!l.empty);
590 	assert(l.front.type == TokenType.lcurly);
591 	l.popFront();
592 	assert(!l.empty);
593 	assert(l.front.type == TokenType.name);
594 	l.popFront();
595 	assert(!l.empty);
596 	assert(l.front.type == TokenType.rcurly);
597 	l.popFront();
598 	assert(!l.empty);
599 	assert(l.front.type == TokenType.rcurly);
600 	l.popFront();
601 	assert(!l.empty);
602 	assert(l.front.type == TokenType.rcurly);
603 	l.popFront();
604 	assert(l.empty);
605 }
606 
607 unittest {
608 	string f = `
609 		query withFragments {
610 		  user(id: +4) {
611 			# super cool comment
612 friends(first: -10.3) {
613 		      ...friendFields
614 			  null false true
615 		    }
616 		    mutualFriends(first: 10) {
617 		      ...friendFields
618 		    }
619 		  }
620 		}
621 
622 		fragment friendFields on User {
623 		  id
624 		  name
625 		  profilePic(size: 50)
626 		}`;
627 	auto l = Lexer(f);
628 	assert(!l.empty);
629 	assert(l.front.type == TokenType.query);
630 	l.popFront();
631 	assert(!l.empty);
632 	assert(l.front.type == TokenType.name);
633 	assert(l.front.value == "withFragments");
634 	l.popFront();
635 	l.popFront();
636 	assert(!l.empty);
637 	assert(l.front.type == TokenType.name);
638 	assert(l.front.value == "user");
639 	l.popFront();
640 	l.popFront();
641 	assert(!l.empty);
642 	assert(l.front.type == TokenType.name);
643 	assert(l.front.value == "id", l.front.value);
644 	l.popFront();
645 	assert(!l.empty);
646 	assert(l.front.type == TokenType.colon);
647 	l.popFront();
648 	assert(!l.empty);
649 	assert(l.front.type == TokenType.intValue);
650 	assert(l.front.value == "+4");
651 	l.popFront();
652 	assert(!l.empty);
653 	assert(l.front.type == TokenType.rparen);
654 	l.popFront();
655 	l.popFront();
656 	assert(!l.empty);
657 	assert(l.front.type == TokenType.name);
658 	assert(l.front.value == "friends");
659 	l.popFront();
660 	assert(!l.empty);
661 	assert(l.front.type == TokenType.lparen);
662 	l.popFront();
663 	assert(!l.empty);
664 	assert(l.front.type == TokenType.name);
665 	assert(l.front.value == "first");
666 	l.popFront();
667 	l.popFront();
668 	assert(!l.empty);
669 	assert(l.front.type == TokenType.floatValue, format("%s", l.front.type));
670 	assert(l.front.value == "-10.3", l.front.value);
671 	l.popFront();
672 	l.popFront();
673 	l.popFront();
674 	assert(!l.empty);
675 	assert(l.front.type == TokenType.dots, format("%s", l.front.type));
676 	l.popFront();
677 	assert(!l.empty);
678 	assert(l.front.type == TokenType.name, format("%s", l.front.type));
679 	assert(l.front.value == "friendFields");
680 	l.popFront();
681 	assert(!l.empty);
682 	assert(l.front.type == TokenType.null_, format("%s", l.front.type));
683 	l.popFront();
684 	assert(!l.empty);
685 	assert(l.front.type == TokenType.false_, format("%s", l.front.type));
686 	l.popFront();
687 	assert(!l.empty);
688 	assert(l.front.type == TokenType.true_, format("%s", l.front.type));
689 	while(!l.empty) {
690 		l.popFront();
691 	}
692 }
693 
694 unittest {
695 	string f = `
696 		query withFragments {
697 		  user(id: "hello") {
698 		  }
699 		}`;
700 
701 	auto l = Lexer(f);
702 	assert(!l.empty);
703 	assert(l.front.type == TokenType.query);
704 	l.popFront();
705 	assert(!l.empty);
706 	assert(l.front.type == TokenType.name);
707 	assert(l.front.value == "withFragments");
708 	l.popFront();
709 	l.popFront();
710 	assert(!l.empty);
711 	assert(l.front.type == TokenType.name);
712 	assert(l.front.value == "user");
713 	l.popFront();
714 	l.popFront();
715 	assert(!l.empty);
716 	assert(l.front.type == TokenType.name);
717 	assert(l.front.value == "id", l.front.value);
718 	l.popFront();
719 	assert(!l.empty);
720 	assert(l.front.type == TokenType.colon);
721 	l.popFront();
722 	assert(!l.empty);
723 	assert(l.front.type == TokenType.stringValue);
724 	assert(l.front.value == "hello", format("'%s' '%s'", l.front.value, "hello"));
725 	l.popFront();
726 	assert(!l.empty);
727 	assert(l.front.type == TokenType.rparen);
728 }
729 
730 // Issue #20
731 unittest {
732 	string f = `# asldf
733 #
734 { foo }
735 `;
736 
737 	auto l = Lexer(f);
738 	assert(!l.empty);
739 	assert(l.front.type == TokenType.lcurly, l.front.toString());
740 	l.popFront();
741 	assert(!l.empty);
742 	assert(l.front.type == TokenType.name, l.front.toString());
743 	l.popFront();
744 	assert(!l.empty);
745 	assert(l.front.type == TokenType.rcurly, l.front.toString());
746 	l.popFront();
747 	assert(l.empty);
748 }
749 
750 unittest {
751 	string f = `""" a long comment """ `;
752 
753 	auto l = Lexer(f, QueryParser.no);
754 	assert(!l.empty);
755 	assert(l.front.type == TokenType.stringValue, l.front.toString());
756 	assert(l.front.value == " a long comment ", l.front.value);
757 	l.popFront();
758 	assert(l.empty);
759 }
760 
761 unittest {
762 	import std.string : indexOf;
763 
764 	string f = `""" a
765 
766 		long
767 
768 		comment """ `;
769 
770 	auto l = Lexer(f, QueryParser.no);
771 	assert(!l.empty);
772 	assert(l.front.type == TokenType.stringValue, l.front.toString());
773 	assert(l.front.value.indexOf("a") != -1);
774 	assert(l.front.value.indexOf("long") != -1);
775 	assert(l.front.value.indexOf("comment") != -1);
776 	assert(l.front.value.indexOf("\n") != -1);
777 	l.popFront();
778 	assert(l.empty);
779 }