1 module graphql.lexer;
2 
3 import std.experimental.logger;
4 import std.format : format;
5 import std.typecons : Flag;
6 import std.stdio;
7 
8 import graphql.tokenmodule;
9 
10 alias QueryParser = Flag!"QueryParser";
11 
12 struct Lexer {
13 	const QueryParser qp;
14 	string input;
15 	size_t stringPos;
16 
17 	size_t line;
18 	size_t column;
19 
20 	Token cur;
21 
22 	this(string input, QueryParser qp = QueryParser.yes) @safe {
23 		this.input = input;
24 		this.stringPos = 0;
25 		this.line = 1;
26 		this.column = 1;
27 		this.qp = qp;
28 		this.buildToken();
29 	}
30 
31 	bool isNotQueryParser() @safe const {
32 		return this.qp == QueryParser.no;
33 	}
34 
35 	private bool isTokenStop() const @safe {
36 		return this.stringPos >= this.input.length
37 			|| this.isTokenStop(this.input[this.stringPos]);
38 	}
39 
40 	private bool isTokenStop(const(char) c) const @safe {
41 		return
42 			c == ' ' || c == '\t' || c == '\n' || c == '(' || c == ')'
43 			|| c == '{' || c == '}' || c == '!' || c == '=' || c == '|'
44 			|| c == '[' || c == ':' || c == ']' || c == ',' || c == '@'
45 			|| c == '$';
46 	}
47 
48 	private bool eatComment() @safe {
49 		if(this.stringPos < this.input.length &&
50 				this.input[this.stringPos] == '#')
51 		{
52 			++this.stringPos;
53 			while(this.stringPos < this.input.length &&
54 				this.input[this.stringPos] != '\n')
55 			{
56 				++this.stringPos;
57 			}
58 			++this.stringPos;
59 			++this.line;
60 			this.column = 1;
61 			return true;
62 		} else {
63 			return false;
64 		}
65 	}
66 
67 	private void eatWhitespace() @safe {
68 		import std.ascii : isWhite;
69 		while(this.stringPos < this.input.length) {
70 			if(this.eatComment()) {
71 				continue;
72 			} else if(this.input[this.stringPos] == ' ') {
73 				++this.column;
74 			} else if(this.input[this.stringPos] == '\t') {
75 				++this.column;
76 			} else if(this.input[this.stringPos] == '\n') {
77 				this.column = 1;
78 				++this.line;
79 			} else {
80 				break;
81 			}
82 			++this.stringPos;
83 		}
84 	}
85 
86 	private void buildToken() @safe {
87 		import std.ascii : isAlphaNum;
88 		this.eatWhitespace();
89 
90 		if(this.stringPos >= this.input.length) {
91 			this.cur = Token(TokenType.undefined);
92 			return;
93 		}
94 
95 		if(this.input[this.stringPos] == ')') {
96 			this.cur = Token(TokenType.rparen, this.line, this.column);
97 			++this.column;
98 			++this.stringPos;
99 		} else if(this.input[this.stringPos] == '(') {
100 			this.cur = Token(TokenType.lparen, this.line, this.column);
101 			++this.column;
102 			++this.stringPos;
103 		} else if(this.input[this.stringPos] == ']') {
104 			this.cur = Token(TokenType.rbrack, this.line, this.column);
105 			++this.column;
106 			++this.stringPos;
107 		} else if(this.input[this.stringPos] == '[') {
108 			this.cur = Token(TokenType.lbrack, this.line, this.column);
109 			++this.column;
110 			++this.stringPos;
111 		} else if(this.input[this.stringPos] == '}') {
112 			this.cur = Token(TokenType.rcurly, this.line, this.column);
113 			++this.column;
114 			++this.stringPos;
115 		} else if(this.input[this.stringPos] == '$') {
116 			this.cur = Token(TokenType.dollar, this.line, this.column);
117 			++this.column;
118 			++this.stringPos;
119 		} else if(this.input[this.stringPos] == '!') {
120 			this.cur = Token(TokenType.exclamation, this.line, this.column);
121 			++this.column;
122 			++this.stringPos;
123 		} else if(this.input[this.stringPos] == '{') {
124 			this.cur = Token(TokenType.lcurly, this.line, this.column);
125 			++this.column;
126 			++this.stringPos;
127 		} else if(this.input[this.stringPos] == '|') {
128 			this.cur = Token(TokenType.pipe, this.line, this.column);
129 			++this.column;
130 			++this.stringPos;
131 		} else if(this.input[this.stringPos] == '@') {
132 			this.cur = Token(TokenType.at, this.line, this.column);
133 			++this.column;
134 			++this.stringPos;
135 		} else if(this.input[this.stringPos] == ',') {
136 			this.cur = Token(TokenType.comma, this.line, this.column);
137 			++this.column;
138 			++this.stringPos;
139 		} else if(this.input[this.stringPos] == '=') {
140 			this.cur = Token(TokenType.equal, this.line, this.column);
141 			++this.column;
142 			++this.stringPos;
143 		} else if(this.input[this.stringPos] == ':') {
144 			this.cur = Token(TokenType.colon, this.line, this.column);
145 			++this.column;
146 			++this.stringPos;
147 		} else {
148 			size_t b = this.stringPos;
149 			size_t e = this.stringPos;
150 			switch(this.input[this.stringPos]) {
151 				case 'm':
152 					++this.stringPos;
153 					++this.column;
154 					++e;
155 					if(this.testStrAndInc!"utation"(e)) {
156 						if(this.isTokenStop()) {
157 							this.cur = Token(TokenType.mutation, this.line,
158 										this.column);
159 							return;
160 						}
161 					}
162 					goto default;
163 				case 's':
164 					++this.stringPos;
165 					++this.column;
166 					++e;
167 					if(this.isNotQueryParser() &&
168 							this.testStrAndInc!"ubscription"(e))
169 					{
170 						if(this.isTokenStop()) {
171 							this.cur =
172 								Token(TokenType.subscription,
173 										this.line,
174 										this.column);
175 							return;
176 						}
177 					} else if(this.isNotQueryParser()
178 								&& this.testCharAndInc('c', e))
179 					{
180 						if(this.testStrAndInc!"alar"(e)) {
181 							if(this.isTokenStop()) {
182 								this.cur = Token(TokenType.scalar, this.line, this.column);
183 								return;
184 							}
185 						} else if(this.isNotQueryParser()
186 									&& this.testStrAndInc!"hema"(e))
187 						{
188 							if(this.isTokenStop()) {
189 								this.cur = Token(TokenType.schema, this.line, this.column);
190 								return;
191 							}
192 						}
193 					}
194 					goto default;
195 				case 'o':
196 					++this.stringPos;
197 					++this.column;
198 					++e;
199 					if(this.testCharAndInc('n', e)) {
200 						if(this.isTokenStop()) {
201 							this.cur = Token(TokenType.on_, this.line,
202 									this.column);
203 							return;
204 						}
205 					}
206 					goto default;
207 				case 'd':
208 					++this.stringPos;
209 					++this.column;
210 					++e;
211 					if(this.testStrAndInc!"irective"(e)) {
212 						if(this.isTokenStop()) {
213 							this.cur = Token(TokenType.directive,
214 									this.line, this.column);
215 							return;
216 						}
217 					}
218 					goto default;
219 				case 'e':
220 					++this.stringPos;
221 					++this.column;
222 					++e;
223 					if(this.testStrAndInc!"num"(e)) {
224 						if(this.isTokenStop()) {
225 							this.cur = Token(TokenType.enum_,
226 									this.line, this.column);
227 							return;
228 						}
229 					} else if(this.testStrAndInc!"xtend"(e)) {
230 						if(this.isTokenStop()) {
231 							this.cur = Token(TokenType.extend,
232 									this.line, this.column);
233 							return;
234 						}
235 					}
236 					goto default;
237 				case 'i':
238 					++this.stringPos;
239 					++this.column;
240 					++e;
241 					if(this.testCharAndInc('n', e)) {
242 						if(this.isNotQueryParser()
243 								&& this.testCharAndInc('p', e)
244 							)
245 						{
246 							if(this.testStrAndInc!"ut"(e)) {
247 								if(this.isTokenStop()) {
248 									this.cur = Token(TokenType.input,
249 											this.line, this.column);
250 									return;
251 								}
252 							}
253 						} else if(this.testStrAndInc!"terface"(e)) {
254 							if(this.isTokenStop()) {
255 								this.cur = Token(TokenType.interface_,
256 										this.line, this.column);
257 								return;
258 							}
259 						}
260 					} else if(this.testStrAndInc!"mplements"(e)) {
261 						if(this.isTokenStop()) {
262 							this.cur = Token(TokenType.implements,
263 									this.line, this.column);
264 							return;
265 						}
266 					}
267 
268 					goto default;
269 				case 'f':
270 					++this.stringPos;
271 					++this.column;
272 					++e;
273 					if(this.testStrAndInc!"alse"(e)) {
274 						if(this.isTokenStop()) {
275 							this.cur = Token(TokenType.false_,
276 									this.line, this.column);
277 							return;
278 						}
279 					} else if(this.testStrAndInc!"ragment"(e)) {
280 						if(this.isTokenStop()) {
281 							this.cur =
282 								Token(TokenType.fragment,
283 										this.line,
284 										this.column);
285 							return;
286 						}
287 					}
288 					goto default;
289 				case 'q':
290 					++this.stringPos;
291 					++this.column;
292 					++e;
293 					if(this.testStrAndInc!"uery"(e)) {
294 						if(this.isTokenStop()) {
295 							this.cur = Token(TokenType.query,
296 									this.line, this.column);
297 							return;
298 						}
299 					}
300 					goto default;
301 				case 't':
302 					++this.stringPos;
303 					++this.column;
304 					++e;
305 					if(this.testStrAndInc!"rue"(e)) {
306 						if(this.isTokenStop()) {
307 							this.cur = Token(TokenType.true_,
308 									this.line, this.column);
309 							return;
310 						}
311 					} else if(this.isNotQueryParser()
312 							&& this.testStrAndInc!"ype"(e))
313 					{
314 						if(this.isTokenStop()) {
315 							this.cur = Token(TokenType.type,
316 									this.line, this.column);
317 							return;
318 						}
319 					}
320 					goto default;
321 				case 'n':
322 					++this.stringPos;
323 					++this.column;
324 					++e;
325 					if(this.testStrAndInc!"ull"(e)) {
326 						if(this.isTokenStop()) {
327 							this.cur = Token(TokenType.null_,
328 									this.line, this.column);
329 							return;
330 						}
331 					}
332 					goto default;
333 				case 'u':
334 					++this.stringPos;
335 					++this.column;
336 					++e;
337 					if(this.testStrAndInc!"nion"(e)) {
338 						if(this.isTokenStop()) {
339 							this.cur = Token(TokenType.union_,
340 									this.line, this.column);
341 							return;
342 						}
343 					}
344 					goto default;
345 				case '.':
346 					++this.stringPos;
347 					++this.column;
348 					++e;
349 					if(this.testStrAndInc!".."(e)) {
350 						if(this.isTokenStop()
351 								|| (this.stringPos < this.input.length
352 									&& isAlphaNum(this.input[this.stringPos])
353 									)
354 							)
355 						{
356 							this.cur = Token(TokenType.dots, this.line,
357 									this.column);
358 							return;
359 						}
360 					}
361 					throw new Exception(format(
362 							"failed to parse \"...\" at line %s column %s",
363 							this.line, this.column
364 						));
365 				case '-':
366 					++this.stringPos;
367 					++this.column;
368 					++e;
369 					goto case '0';
370 				case '+':
371 					++this.stringPos;
372 					++this.column;
373 					++e;
374 					goto case '0';
375 				case '0': .. case '9':
376 					do {
377 						++this.stringPos;
378 						++this.column;
379 						++e;
380 					} while(this.stringPos < this.input.length
381 							&& this.input[this.stringPos] >= '0'
382 							&& this.input[this.stringPos] <= '9');
383 
384 					if(this.stringPos >= this.input.length
385 							|| this.input[this.stringPos] != '.')
386 					{
387 						this.cur = Token(TokenType.intValue, this.input[b ..
388 								e], this.line, this.column);
389 						return;
390 					} else if(this.stringPos < this.input.length
391 							&& this.input[this.stringPos] == '.')
392 					{
393 						do {
394 							++this.stringPos;
395 							++this.column;
396 							++e;
397 						} while(this.stringPos < this.input.length
398 								&& this.input[this.stringPos] >= '0'
399 								&& this.input[this.stringPos] <= '9');
400 
401 						this.cur = Token(TokenType.floatValue, this.input[b ..
402 								e], this.line, this.column);
403 						return;
404 					}
405 					goto default;
406 				case '"':
407 					++this.stringPos;
408 					++this.column;
409 					++e;
410 					while(this.stringPos < this.input.length
411 							&& (this.input[this.stringPos] != '"'
412 								|| (this.input[this.stringPos] == '"'
413 									&& this.input[this.stringPos - 1U] == '\\')
414 						 		)
415 						)
416 					{
417 						++this.stringPos;
418 						++this.column;
419 						++e;
420 					}
421 					++this.stringPos;
422 					++this.column;
423 					this.cur = Token(TokenType.stringValue, this.input[b + 1
424 							.. e], this.line, this.column);
425 					break;
426 				default:
427 					while(!this.isTokenStop()) {
428 						//writefln("455 '%s'", this.input[this.stringPos]);
429 						++this.stringPos;
430 						++this.column;
431 						++e;
432 					}
433 					this.cur = Token(TokenType.name, this.input[b .. e],
434 							this.line, this.column
435 						);
436 					break;
437 			}
438 		}
439 	}
440 
441 	bool testCharAndInc(const(char) c, ref size_t e) @safe {
442 		if(this.stringPos < this.input.length
443 				&& this.input[this.stringPos] == c)
444 		{
445 			++this.column;
446 			++this.stringPos;
447 			++e;
448 			return true;
449 		} else {
450 			return false;
451 		}
452 	}
453 
454 	bool testStrAndInc(string s)(ref size_t e) @safe {
455 		for(size_t i = 0; i < s.length; ++i) {
456 			if(this.stringPos < this.input.length
457 					&& this.input[this.stringPos] == s[i])
458 			{
459 				++this.column;
460 				++this.stringPos;
461 				++e;
462 			} else {
463 				return false;
464 			}
465 		}
466 		return true;
467 	}
468 
469 	@property bool empty() const @safe {
470 		return this.stringPos >= this.input.length
471 			&& this.cur.type == TokenType.undefined;
472 	}
473 
474 	Token front() @property @safe {
475 		return this.cur;
476 	}
477 
478 	@property Token front() const @safe @nogc pure {
479 		return this.cur;
480 	}
481 
482 	void popFront() @safe {
483 		this.buildToken();
484 	}
485 
486 	string getRestOfInput() const @safe {
487 		return this.input[this.stringPos .. $];
488 	}
489 }
490 
491 unittest {
492 	string f = "f ";
493 	auto l = Lexer(f);
494 	assert(!l.empty);
495 	assert(l.front.type == TokenType.name);
496 	assert(l.front.value == "f", format("'%s'", l.front.value));
497 }
498 
499 unittest {
500 	string f = "... ";
501 
502 	auto l = Lexer(f);
503 	assert(!l.empty);
504 	assert(l.front.type == TokenType.dots);
505 	l.popFront();
506 	assert(l.empty);
507 }
508 
509 unittest {
510 	string f = "name! ";
511 	auto l = Lexer(f);
512 	assert(!l.empty);
513 	assert(l.front.type == TokenType.name);
514 	assert(l.front.value == "name", format("'%s'", l.front.value));
515 	l.popFront();
516 	assert(!l.empty);
517 	assert(l.front.type == TokenType.exclamation);
518 	l.popFront();
519 	assert(l.empty);
520 }
521 
522 unittest {
523 	string f = "fragment";
524 	const l = Lexer(f);
525 	assert(!l.empty);
526 	assert(l.front.type == TokenType.fragment);
527 }
528 
529 unittest {
530 	string f = `
531 		mutation {
532 		  likeStory(storyID: 12345) {
533 		    story {
534 		      likeCount
535 		    }
536 		  }
537 		}`;
538 	auto l = Lexer(f);
539 	assert(!l.empty);
540 	assert(l.front.type == TokenType.mutation);
541 	l.popFront();
542 	assert(!l.empty);
543 	assert(l.front.type == TokenType.lcurly);
544 	l.popFront();
545 	assert(!l.empty);
546 	assert(l.front.type == TokenType.name);
547 	l.popFront();
548 	assert(!l.empty);
549 	assert(l.front.type == TokenType.lparen);
550 	l.popFront();
551 	assert(!l.empty);
552 	assert(l.front.type == TokenType.name);
553 	l.popFront();
554 	assert(!l.empty);
555 	assert(l.front.type == TokenType.colon, format("%s", l.front.type));
556 	l.popFront();
557 	assert(!l.empty);
558 	assert(l.front.type == TokenType.intValue);
559 	l.popFront();
560 	assert(!l.empty);
561 	assert(l.front.type == TokenType.rparen);
562 	l.popFront();
563 	assert(!l.empty);
564 	assert(l.front.type == TokenType.lcurly);
565 	l.popFront();
566 	assert(!l.empty);
567 	assert(l.front.type == TokenType.name);
568 	l.popFront();
569 	assert(!l.empty);
570 	assert(l.front.type == TokenType.lcurly);
571 	l.popFront();
572 	assert(!l.empty);
573 	assert(l.front.type == TokenType.name);
574 	l.popFront();
575 	assert(!l.empty);
576 	assert(l.front.type == TokenType.rcurly);
577 	l.popFront();
578 	assert(!l.empty);
579 	assert(l.front.type == TokenType.rcurly);
580 	l.popFront();
581 	assert(!l.empty);
582 	assert(l.front.type == TokenType.rcurly);
583 	l.popFront();
584 	assert(l.empty);
585 }
586 
587 unittest {
588 	string f = `
589 		query withFragments {
590 		  user(id: +4) {
591 			# super cool comment
592 friends(first: -10.3) {
593 		      ...friendFields
594 			  null false true
595 		    }
596 		    mutualFriends(first: 10) {
597 		      ...friendFields
598 		    }
599 		  }
600 		}
601 
602 		fragment friendFields on User {
603 		  id
604 		  name
605 		  profilePic(size: 50)
606 		}`;
607 	auto l = Lexer(f);
608 	assert(!l.empty);
609 	assert(l.front.type == TokenType.query);
610 	l.popFront();
611 	assert(!l.empty);
612 	assert(l.front.type == TokenType.name);
613 	assert(l.front.value == "withFragments");
614 	l.popFront();
615 	l.popFront();
616 	assert(!l.empty);
617 	assert(l.front.type == TokenType.name);
618 	assert(l.front.value == "user");
619 	l.popFront();
620 	l.popFront();
621 	assert(!l.empty);
622 	assert(l.front.type == TokenType.name);
623 	assert(l.front.value == "id", l.front.value);
624 	l.popFront();
625 	assert(!l.empty);
626 	assert(l.front.type == TokenType.colon);
627 	l.popFront();
628 	assert(!l.empty);
629 	assert(l.front.type == TokenType.intValue);
630 	assert(l.front.value == "+4");
631 	l.popFront();
632 	assert(!l.empty);
633 	assert(l.front.type == TokenType.rparen);
634 	l.popFront();
635 	l.popFront();
636 	assert(!l.empty);
637 	assert(l.front.type == TokenType.name);
638 	assert(l.front.value == "friends");
639 	l.popFront();
640 	assert(!l.empty);
641 	assert(l.front.type == TokenType.lparen);
642 	l.popFront();
643 	assert(!l.empty);
644 	assert(l.front.type == TokenType.name);
645 	assert(l.front.value == "first");
646 	l.popFront();
647 	l.popFront();
648 	assert(!l.empty);
649 	assert(l.front.type == TokenType.floatValue, format("%s", l.front.type));
650 	assert(l.front.value == "-10.3", l.front.value);
651 	l.popFront();
652 	l.popFront();
653 	l.popFront();
654 	assert(!l.empty);
655 	assert(l.front.type == TokenType.dots, format("%s", l.front.type));
656 	l.popFront();
657 	assert(!l.empty);
658 	assert(l.front.type == TokenType.name, format("%s", l.front.type));
659 	assert(l.front.value == "friendFields");
660 	l.popFront();
661 	assert(!l.empty);
662 	assert(l.front.type == TokenType.null_, format("%s", l.front.type));
663 	l.popFront();
664 	assert(!l.empty);
665 	assert(l.front.type == TokenType.false_, format("%s", l.front.type));
666 	l.popFront();
667 	assert(!l.empty);
668 	assert(l.front.type == TokenType.true_, format("%s", l.front.type));
669 	while(!l.empty) {
670 		l.popFront();
671 	}
672 }
673 
674 unittest {
675 	string f = `
676 		query withFragments {
677 		  user(id: "hello") {
678 		  }
679 		}`;
680 
681 	auto l = Lexer(f);
682 	assert(!l.empty);
683 	assert(l.front.type == TokenType.query);
684 	l.popFront();
685 	assert(!l.empty);
686 	assert(l.front.type == TokenType.name);
687 	assert(l.front.value == "withFragments");
688 	l.popFront();
689 	l.popFront();
690 	assert(!l.empty);
691 	assert(l.front.type == TokenType.name);
692 	assert(l.front.value == "user");
693 	l.popFront();
694 	l.popFront();
695 	assert(!l.empty);
696 	assert(l.front.type == TokenType.name);
697 	assert(l.front.value == "id", l.front.value);
698 	l.popFront();
699 	assert(!l.empty);
700 	assert(l.front.type == TokenType.colon);
701 	l.popFront();
702 	assert(!l.empty);
703 	assert(l.front.type == TokenType.stringValue);
704 	assert(l.front.value == "hello", format("'%s' '%s'", l.front.value, "hello"));
705 	l.popFront();
706 	assert(!l.empty);
707 	assert(l.front.type == TokenType.rparen);
708 }
709 
710 // Issue #20
711 unittest {
712 	string f = `# asldf
713 #
714 { foo }
715 `;
716 
717 	auto l = Lexer(f);
718 	assert(!l.empty);
719 	assert(l.front.type == TokenType.lcurly, l.front.toString());
720 	l.popFront();
721 	assert(!l.empty);
722 	assert(l.front.type == TokenType.name, l.front.toString());
723 	l.popFront();
724 	assert(!l.empty);
725 	assert(l.front.type == TokenType.rcurly, l.front.toString());
726 	l.popFront();
727 	assert(l.empty);
728 }