1 module http.HttpRequestParser;
2
3 import std.container;
4
5 import std.algorithm.comparison : equal;
6 import std.conv : to;
7 import std.algorithm.iteration : filter;
8 import std.uni : icmp, isAlpha;
9 import std.array : array;
10
11 import core.stdc.ctype : isalnum;
12 import core.stdc.stdlib : strtol;
13
14 import http.HttpRequest;
15
16 class HttpRequestParser
17 {
18 private
19 {
20 State _state = State.RequestMethodStart;
21 size_t _contentSize = 0;
22 string _chunkSizeStr;
23 size_t _chunkSize = 0;
24 bool _chunked = false;
25 }
26
27 public:
28 this()
29 {
30 // What to do?
31 }
32
33 enum ParseResult {
34 ParsingCompleted,
35 ParsingIncompleted,
36 ParsingError
37 }
38
39 ParseResult parse(ref HttpRequest req, const ubyte[] text)
40 {
41 return consume(req, text);
42 }
43
44 private:
45 static bool checkIfConnection(const HttpRequest.Header h)
46 {
47 return icmp(h.name[], "Connection") == 0;
48 }
49
50 ParseResult consume(ref HttpRequest req, const ubyte[] text)
51 {
52 foreach(input; text)
53 {
54 switch (_state)
55 {
56 case State.RequestMethodStart:
57 if( !isChar(input) || isControl(input) || isSpecial(input) )
58 {
59 return ParseResult.ParsingError;
60 }
61 else
62 {
63 _state = State.RequestMethod;
64 req.method ~= input;
65 }
66 break;
67 case State.RequestMethod:
68 if( input == ' ' )
69 {
70 _state = State.RequestUriStart;
71 }
72 else if( !isChar(input) || isControl(input) || isSpecial(input) )
73 {
74 return ParseResult.ParsingError;
75 }
76 else
77 {
78 req.method ~= input;
79 }
80 break;
81 case State.RequestUriStart:
82 if( isControl(input) )
83 {
84 return ParseResult.ParsingError;
85 }
86 else
87 {
88 _state = State.RequestUri;
89 req.uri ~= input;
90 }
91 break;
92 case State.RequestUri:
93 if( input == ' ' )
94 {
95 _state = State.RequestHttpVersion_h;
96 }
97 else if (input == '\r')
98 {
99 req.versionMajor = 0;
100 req.versionMinor = 9;
101
102 return ParseResult.ParsingCompleted;
103 }
104 else if( isControl(input) )
105 {
106 return ParseResult.ParsingError;
107 }
108 else
109 {
110 req.uri ~= input;
111 }
112 break;
113 case State.RequestHttpVersion_h:
114 if( input == 'H' )
115 {
116 _state = State.RequestHttpVersion_ht;
117 }
118 else
119 {
120 return ParseResult.ParsingError;
121 }
122 break;
123 case State.RequestHttpVersion_ht:
124 if( input == 'T' )
125 {
126 _state = State.RequestHttpVersion_htt;
127 }
128 else
129 {
130 return ParseResult.ParsingError;
131 }
132 break;
133 case State.RequestHttpVersion_htt:
134 if( input == 'T' )
135 {
136 _state = State.RequestHttpVersion_http;
137 }
138 else
139 {
140 return ParseResult.ParsingError;
141 }
142 break;
143 case State.RequestHttpVersion_http:
144 if( input == 'P' )
145 {
146 _state = State.RequestHttpVersion_slash;
147 }
148 else
149 {
150 return ParseResult.ParsingError;
151 }
152 break;
153 case State.RequestHttpVersion_slash:
154 if( input == '/' )
155 {
156 req.versionMajor = 0;
157 req.versionMinor = 0;
158 _state = State.RequestHttpVersion_majorStart;
159 }
160 else
161 {
162 return ParseResult.ParsingError;
163 }
164 break;
165 case State.RequestHttpVersion_majorStart:
166 if( isDigit(input) )
167 {
168 req.versionMajor = input - '0';
169 _state = State.RequestHttpVersion_major;
170 }
171 else
172 {
173 return ParseResult.ParsingError;
174 }
175 break;
176 case State.RequestHttpVersion_major:
177 if( input == '.' )
178 {
179 _state = State.RequestHttpVersion_minorStart;
180 }
181 else if (isDigit(input))
182 {
183 req.versionMajor = req.versionMajor * 10 + input - '0';
184 }
185 else
186 {
187 return ParseResult.ParsingError;
188 }
189 break;
190 case State.RequestHttpVersion_minorStart:
191 if( isDigit(input) )
192 {
193 req.versionMinor = input - '0';
194 _state = State.RequestHttpVersion_minor;
195 }
196 else
197 {
198 return ParseResult.ParsingError;
199 }
200 break;
201 case State.RequestHttpVersion_minor:
202 if( input == '\r' )
203 {
204 _state = State.ResponseHttpVersion_newLine;
205 }
206 else if( isDigit(input) )
207 {
208 req.versionMinor = req.versionMinor * 10 + input - '0';
209 }
210 else
211 {
212 return ParseResult.ParsingError;
213 }
214 break;
215 case State.ResponseHttpVersion_newLine:
216 if( input == '\n' )
217 {
218 _state = State.HeaderLineStart;
219 }
220 else
221 {
222 return ParseResult.ParsingError;
223 }
224 break;
225 case State.HeaderLineStart:
226 if( input == '\r' )
227 {
228 _state = State.ExpectingNewline_3;
229 }
230 else if( !req.headers.empty() && (input == ' ' || input == '\t') )
231 {
232 _state = State.HeaderLws;
233 }
234 else if( !isChar(input) || isControl(input) || isSpecial(input) )
235 {
236 return ParseResult.ParsingError;
237 }
238 else
239 {
240 // TODO:
241 // req.headers.push_back(Request::HeaderItem());
242 // req.headers.back().name.reserve(16);
243 // req.headers.back().value.reserve(16);
244 // req.headers.back().name.push_back(input);
245
246 // req.headers.insertBack(HttpRequest.Header(Appender!string(""), Appender!string(input)));
247 HttpRequest.Header header;
248 header.name.put(input);
249 req.headers.insertBack(header);
250
251 _state = State.HeaderName;
252 }
253 break;
254 case State.HeaderLws:
255 if( input == '\r' )
256 {
257 _state = State.ExpectingNewline_2;
258 }
259 else if( input == ' ' || input == '\t' )
260 {
261 }
262 else if( isControl(input) )
263 {
264 return ParseResult.ParsingError;
265 }
266 else
267 {
268 _state = State.HeaderValue;
269 req.headers.back.value.put(input);
270 }
271 break;
272 case State.HeaderName:
273 if( input == ':' )
274 {
275 _state = State.SpaceBeforeHeaderValue;
276 }
277 else if( !isChar(input) || isControl(input) || isSpecial(input) )
278 {
279 return ParseResult.ParsingError;
280 }
281 else
282 {
283 req.headers.back.name.put(input);
284 }
285 break;
286 case State.SpaceBeforeHeaderValue:
287 if( input == ' ' )
288 {
289 _state = State.HeaderValue;
290 }
291 else
292 {
293 return ParseResult.ParsingError;
294 }
295 break;
296 case State.HeaderValue:
297 if( input == '\r' )
298 {
299 if( req.method == "POST" || req.method == "PUT" )
300 {
301 HttpRequest.Header h = req.headers.back;
302
303 if( icmp(h.name[], "Content-Length") == 0 )
304 {
305 _contentSize = h.value.data().to!int;
306 // req.content.reserve( _contentSize );
307 }
308 else if( icmp(h.name[], "Transfer-Encoding") == 0 )
309 {
310 if( icmp(h.value[], "chunked") == 0 )
311 _chunked = true;
312 }
313 }
314 _state = State.ExpectingNewline_2;
315 }
316 else if( isControl(input) )
317 {
318 return ParseResult.ParsingError;
319 }
320 else
321 {
322 req.headers.back.value.put(input);
323 }
324 break;
325 case State.ExpectingNewline_2:
326 if( input == '\n' )
327 {
328 _state = State.HeaderLineStart;
329 }
330 else
331 {
332 return ParseResult.ParsingError;
333 }
334 break;
335 case State.ExpectingNewline_3: {
336 auto it = filter!(a => checkIfConnection(a))(req.headers.array);
337
338 if(!it.empty() )
339 {
340 HttpRequest.Header header = it.front;
341 if( icmp(header.value.data(), "Keep-Alive") == 0 )
342 {
343 req.keepAlive = true;
344 }
345 else // == Close
346 {
347 req.keepAlive = false;
348 }
349 }
350 else
351 {
352 if( req.versionMajor > 1 || (req.versionMajor == 1 && req.versionMinor == 1) )
353 req.keepAlive = true;
354 }
355
356 if( _chunked )
357 {
358 _state = State.ChunkSize;
359 }
360 else if( _contentSize == 0 )
361 {
362 if( input == '\n')
363 return ParseResult.ParsingCompleted;
364 else
365 return ParseResult.ParsingError;
366 }
367 else
368 {
369 _state = State.Post;
370 }
371 break;
372 }
373 case State.Post:
374 --_contentSize;
375 req.content ~= input;
376
377 if( _contentSize == 0 )
378 {
379 return ParseResult.ParsingCompleted;
380 }
381 break;
382 case State.ChunkSize:
383 if( isalnum(input) )
384 {
385 _chunkSizeStr ~= input;
386 }
387 else if( input == ';' )
388 {
389 _state = State.ChunkExtensionName;
390 }
391 else if( input == '\r' )
392 {
393 _state = State.ChunkSizeNewLine;
394 }
395 else
396 {
397 return ParseResult.ParsingError;
398 }
399 break;
400 case State.ChunkExtensionName:
401 if( isalnum(input) || input == ' ' )
402 {
403 // skip
404 }
405 else if( input == '=' )
406 {
407 _state = State.ChunkExtensionValue;
408 }
409 else if( input == '\r' )
410 {
411 _state = State.ChunkSizeNewLine;
412 }
413 else
414 {
415 return ParseResult.ParsingError;
416 }
417 break;
418 case State.ChunkExtensionValue:
419 if( isalnum(input) || input == ' ' )
420 {
421 // skip
422 }
423 else if( input == '\r' )
424 {
425 _state = State.ChunkSizeNewLine;
426 }
427 else
428 {
429 return ParseResult.ParsingError;
430 }
431 break;
432 case State.ChunkSizeNewLine:
433 if( input == '\n' )
434 {
435 _chunkSize = strtol(_chunkSizeStr.ptr, null, 16);
436 // _chunkSizeStr.clear();
437 // req.content.reserve(strlen(req.content) + _chunkSize);
438
439 if( _chunkSize == 0 )
440 _state = State.ChunkSizeNewLine_2;
441 else
442 _state = State.ChunkData;
443 }
444 else
445 {
446 return ParseResult.ParsingError;
447 }
448 break;
449 case State.ChunkSizeNewLine_2:
450 if( input == '\r' )
451 {
452 _state = State.ChunkSizeNewLine_3;
453 }
454 else if( isAlpha(input) )
455 {
456 _state = State.ChunkTrailerName;
457 }
458 else
459 {
460 return ParseResult.ParsingError;
461 }
462 break;
463 case State.ChunkSizeNewLine_3:
464 if( input == '\n' )
465 {
466 return ParseResult.ParsingCompleted;
467 }
468 else
469 {
470 return ParseResult.ParsingError;
471 }
472 // break;
473 case State.ChunkTrailerName:
474 if( isalnum(input) )
475 {
476 // skip
477 }
478 else if( input == ':' )
479 {
480 _state = State.ChunkTrailerValue;
481 }
482 else
483 {
484 return ParseResult.ParsingError;
485 }
486 break;
487 case State.ChunkTrailerValue:
488 if( isalnum(input) || input == ' ' )
489 {
490 // skip
491 }
492 else if( input == '\r' )
493 {
494 _state = State.ChunkSizeNewLine;
495 }
496 else
497 {
498 return ParseResult.ParsingError;
499 }
500 break;
501 case State.ChunkData:
502 req.content ~= input;
503
504 if( --_chunkSize == 0 )
505 {
506 _state = State.ChunkDataNewLine_1;
507 }
508 break;
509 case State.ChunkDataNewLine_1:
510 if( input == '\r' )
511 {
512 _state = State.ChunkDataNewLine_2;
513 }
514 else
515 {
516 return ParseResult.ParsingError;
517 }
518 break;
519 case State.ChunkDataNewLine_2:
520 if( input == '\n' )
521 {
522 _state = State.ChunkSize;
523 }
524 else
525 {
526 return ParseResult.ParsingError;
527 }
528 break;
529 default:
530 return ParseResult.ParsingError;
531 }
532 }
533
534 return ParseResult.ParsingIncompleted;
535 }
536
537 // Check if a byte is an HTTP character.
538 bool isChar(int c)
539 {
540 return c >= 0 && c <= 127;
541 }
542
543 // Check if a byte is an HTTP control character.
544 bool isControl(int c)
545 {
546 return (c >= 0 && c <= 31) || (c == 127);
547 }
548
549 // Check if a byte is defined as an HTTP special character.
550 bool isSpecial(int c)
551 {
552 switch (c)
553 {
554 case '(': case ')': case '<': case '>': case '@':
555 case ',': case ';': case ':': case '\\': case '"':
556 case '/': case '[': case ']': case '?': case '=':
557 case '{': case '}': case ' ': case '\t':
558 return true;
559 default:
560 return false;
561 }
562 }
563
564 // Check if a byte is a digit.
565 bool isDigit(int c)
566 {
567 return c >= '0' && c <= '9';
568 }
569
570 // The current state of the parser.
571 enum State
572 {
573 RequestMethodStart,
574 RequestMethod,
575 RequestUriStart,
576 RequestUri,
577 RequestHttpVersion_h,
578 RequestHttpVersion_ht,
579 RequestHttpVersion_htt,
580 RequestHttpVersion_http,
581 RequestHttpVersion_slash,
582 RequestHttpVersion_majorStart,
583 RequestHttpVersion_major,
584 RequestHttpVersion_minorStart,
585 RequestHttpVersion_minor,
586
587 ResponseStatusStart,
588 ResponseHttpVersion_ht,
589 ResponseHttpVersion_htt,
590 ResponseHttpVersion_http,
591 ResponseHttpVersion_slash,
592 ResponseHttpVersion_majorStart,
593 ResponseHttpVersion_major,
594 ResponseHttpVersion_minorStart,
595 ResponseHttpVersion_minor,
596 ResponseHttpVersion_spaceAfterVersion,
597 ResponseHttpVersion_statusCodeStart,
598 ResponseHttpVersion_spaceAfterStatusCode,
599 ResponseHttpVersion_statusTextStart,
600 ResponseHttpVersion_newLine,
601
602 HeaderLineStart,
603 HeaderLws,
604 HeaderName,
605 SpaceBeforeHeaderValue,
606 HeaderValue,
607 ExpectingNewline_2,
608 ExpectingNewline_3,
609
610 Post,
611 ChunkSize,
612 ChunkExtensionName,
613 ChunkExtensionValue,
614 ChunkSizeNewLine,
615 ChunkSizeNewLine_2,
616 ChunkSizeNewLine_3,
617 ChunkTrailerName,
618 ChunkTrailerValue,
619
620 ChunkDataNewLine_1,
621 ChunkDataNewLine_2,
622 ChunkData,
623 }
624 }
625
626 unittest
627 {
628 import http.HttpRequest;
629 import http.HttpRequestParser;
630
631 import std.stdio;
632
633 enum string text = "GET /testuri HTTP/1.1\r\n" ~
634 "User-Agent: Mozilla/5.0\r\n" ~
635 "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" ~
636 "Host: 127.0.0.1\r\n" ~
637 "\r\n";
638
639 HttpRequest request = new HttpRequest();
640
641 auto parser = new HttpRequestParser;
642
643 HttpRequestParser.ParseResult res = parser.parse(request, cast(ubyte[])text);
644
645 if ( res == HttpRequestParser.ParseResult.ParsingCompleted )
646 {
647 writeln("method: ", request.method);
648 writeln("uri", request.uri);
649 foreach (header; request.headers)
650 {
651 writeln(header.name[], ": ", header.value[]);
652 }
653
654 writeln("SUCCESS");
655 }
656 else
657 {
658 writeln("FAILED");
659 }
660 }