1 module http.HttpRequestParser;
2 
3 import std.container;
4 
5 import std.algorithm.comparison : equal;
6 import std.conv : to;
7 import std.algorithm.iteration : filter;
8 import std.uni : icmp, isAlpha;
9 import std.array : array;
10 
11 import core.stdc.ctype : isalnum;
12 import core.stdc.stdlib : strtol;
13 
14 import http.HttpRequest;
15 
16 class HttpRequestParser
17 {
18     private
19     {
20         State _state = State.RequestMethodStart;
21         size_t _contentSize = 0;
22         string _chunkSizeStr;
23         size_t _chunkSize = 0;
24         bool _chunked = false;
25     }
26 
27     public:
28         this()
29         {
30             // What to do?
31         }
32 
33         enum ParseResult {
34             ParsingCompleted,
35             ParsingIncompleted,
36             ParsingError
37         }
38 
39         ParseResult parse(ref HttpRequest req, const ubyte[] text)
40         {
41             return consume(req, text);
42         }
43 
44 private:
45     static bool checkIfConnection(const HttpRequest.Header h)
46     {
47         return icmp(h.name[], "Connection") == 0;
48     }
49 
50     ParseResult consume(ref HttpRequest req, const ubyte[] text)
51     {
52         foreach(input; text)
53         {
54             switch (_state)
55             {
56             case State.RequestMethodStart:
57                 if( !isChar(input) || isControl(input) || isSpecial(input) )
58                 {
59                     return ParseResult.ParsingError;
60                 }
61                 else
62                 {
63                     _state = State.RequestMethod;
64                     req.method ~= input;
65                 }
66                 break;
67             case State.RequestMethod:
68                 if( input == ' ' )
69                 {
70                     _state = State.RequestUriStart;
71                 }
72                 else if( !isChar(input) || isControl(input) || isSpecial(input) )
73                 {
74                     return ParseResult.ParsingError;
75                 }
76                 else
77                 {
78                     req.method ~= input;
79                 }
80                 break;
81             case State.RequestUriStart:
82                 if( isControl(input) )
83                 {
84                     return ParseResult.ParsingError;
85                 }
86                 else
87                 {
88                     _state = State.RequestUri;
89                     req.uri ~= input;
90                 }
91                 break;
92             case State.RequestUri:
93                 if( input == ' ' )
94                 {
95                     _state = State.RequestHttpVersion_h;
96                 }
97                 else if (input == '\r')
98                 {
99                     req.versionMajor = 0;
100                     req.versionMinor = 9;
101 
102                     return ParseResult.ParsingCompleted;
103                 }
104                 else if( isControl(input) )
105                 {
106                     return ParseResult.ParsingError;
107                 }
108                 else
109                 {
110                     req.uri ~= input;
111                 }
112                 break;
113             case State.RequestHttpVersion_h:
114                 if( input == 'H' )
115                 {
116                     _state = State.RequestHttpVersion_ht;
117                 }
118                 else
119                 {
120                     return ParseResult.ParsingError;
121                 }
122                 break;
123             case State.RequestHttpVersion_ht:
124                 if( input == 'T' )
125                 {
126                     _state = State.RequestHttpVersion_htt;
127                 }
128                 else
129                 {
130                     return ParseResult.ParsingError;
131                 }
132                 break;
133             case State.RequestHttpVersion_htt:
134                 if( input == 'T' )
135                 {
136                     _state = State.RequestHttpVersion_http;
137                 }
138                 else
139                 {
140                     return ParseResult.ParsingError;
141                 }
142                 break;
143             case State.RequestHttpVersion_http:
144                 if( input == 'P' )
145                 {
146                     _state = State.RequestHttpVersion_slash;
147                 }
148                 else
149                 {
150                     return ParseResult.ParsingError;
151                 }
152                 break;
153             case State.RequestHttpVersion_slash:
154                 if( input == '/' )
155                 {
156                     req.versionMajor = 0;
157                     req.versionMinor = 0;
158                     _state = State.RequestHttpVersion_majorStart;
159                 }
160                 else
161                 {
162                     return ParseResult.ParsingError;
163                 }
164                 break;
165             case State.RequestHttpVersion_majorStart:
166                 if( isDigit(input) )
167                 {
168                     req.versionMajor = input - '0';
169                     _state = State.RequestHttpVersion_major;
170                 }
171                 else
172                 {
173                     return ParseResult.ParsingError;
174                 }
175                 break;
176             case State.RequestHttpVersion_major:
177                 if( input == '.' )
178                 {
179                     _state = State.RequestHttpVersion_minorStart;
180                 }
181                 else if (isDigit(input))
182                 {
183                     req.versionMajor = req.versionMajor * 10 + input - '0';
184                 }
185                 else
186                 {
187                     return ParseResult.ParsingError;
188                 }
189                 break;
190             case State.RequestHttpVersion_minorStart:
191                 if( isDigit(input) )
192                 {
193                     req.versionMinor = input - '0';
194                     _state = State.RequestHttpVersion_minor;
195                 }
196                 else
197                 {
198                     return ParseResult.ParsingError;
199                 }
200                 break;
201             case State.RequestHttpVersion_minor:
202                 if( input == '\r' )
203                 {
204                     _state = State.ResponseHttpVersion_newLine;
205                 }
206                 else if( isDigit(input) )
207                 {
208                     req.versionMinor = req.versionMinor * 10 + input - '0';
209                 }
210                 else
211                 {
212                     return ParseResult.ParsingError;
213                 }
214                 break;
215             case State.ResponseHttpVersion_newLine:
216                 if( input == '\n' )
217                 {
218                     _state = State.HeaderLineStart;
219                 }
220                 else
221                 {
222                     return ParseResult.ParsingError;
223                 }
224                 break;
225             case State.HeaderLineStart:
226                 if( input == '\r' )
227                 {
228                     _state = State.ExpectingNewline_3;
229                 }
230                 else if( !req.headers.empty() && (input == ' ' || input == '\t') )
231                 {
232                     _state = State.HeaderLws;
233                 }
234                 else if( !isChar(input) || isControl(input) || isSpecial(input) )
235                 {
236                     return ParseResult.ParsingError;
237                 }
238                 else
239                 {
240                     // TODO:
241                     // req.headers.push_back(Request::HeaderItem());
242                     // req.headers.back().name.reserve(16);
243                     // req.headers.back().value.reserve(16);
244                     // req.headers.back().name.push_back(input);
245 
246                     // req.headers.insertBack(HttpRequest.Header(Appender!string(""), Appender!string(input)));
247                     HttpRequest.Header header;
248                     header.name.put(input);
249                     req.headers.insertBack(header);
250                     
251                     _state = State.HeaderName;
252                 }
253                 break;
254             case State.HeaderLws:
255                 if( input == '\r' )
256                 {
257                     _state = State.ExpectingNewline_2;
258                 }
259                 else if( input == ' ' || input == '\t' )
260                 {
261                 }
262                 else if( isControl(input) )
263                 {
264                     return ParseResult.ParsingError;
265                 }
266                 else
267                 {
268                     _state = State.HeaderValue;
269                     req.headers.back.value.put(input);
270                 }
271                 break;
272             case State.HeaderName:
273                 if( input == ':' )
274                 {
275                     _state = State.SpaceBeforeHeaderValue;
276                 }
277                 else if( !isChar(input) || isControl(input) || isSpecial(input) )
278                 {
279                     return ParseResult.ParsingError;
280                 }
281                 else
282                 {
283                     req.headers.back.name.put(input);
284                 }
285                 break;
286             case State.SpaceBeforeHeaderValue:
287                 if( input == ' ' )
288                 {
289                     _state = State.HeaderValue;
290                 }
291                 else
292                 {
293                     return ParseResult.ParsingError;
294                 }
295                 break;
296             case State.HeaderValue:
297                 if( input == '\r' )
298                 {
299                     if( req.method == "POST" || req.method == "PUT" )
300                     {
301                         HttpRequest.Header h = req.headers.back;
302 
303                         if( icmp(h.name[], "Content-Length") == 0 )
304                         {
305                             _contentSize = h.value.data().to!int;
306                             // req.content.reserve( _contentSize );
307                         }
308                         else if( icmp(h.name[], "Transfer-Encoding") == 0 )
309                         {
310                             if( icmp(h.value[], "chunked") == 0 )
311                                 _chunked = true;
312                         }
313                     }
314                     _state = State.ExpectingNewline_2;
315                 }
316                 else if( isControl(input) )
317                 {
318                     return ParseResult.ParsingError;
319                 }
320                 else
321                 {
322                     req.headers.back.value.put(input);
323                 }
324                 break;
325             case State.ExpectingNewline_2:
326                 if( input == '\n' )
327                 {
328                     _state = State.HeaderLineStart;
329                 }
330                 else
331                 {
332                     return ParseResult.ParsingError;
333                 }
334                 break;
335             case State.ExpectingNewline_3: {
336                 auto it = filter!(a => checkIfConnection(a))(req.headers.array);
337 
338                 if(!it.empty() )
339                 {
340                     HttpRequest.Header header = it.front;
341                     if( icmp(header.value.data(), "Keep-Alive") == 0 )
342                     {
343                         req.keepAlive = true;
344                     }
345                     else  // == Close
346                     {
347                         req.keepAlive = false;
348                     }
349                 }
350                 else
351                 {
352                     if( req.versionMajor > 1 || (req.versionMajor == 1 && req.versionMinor == 1) )
353                         req.keepAlive = true;
354                 }
355 
356                 if( _chunked )
357                 {
358                     _state = State.ChunkSize;
359                 }
360                 else if( _contentSize == 0 )
361                 {
362                     if( input == '\n')
363                         return ParseResult.ParsingCompleted;
364                     else
365                         return ParseResult.ParsingError;
366                 }
367                 else
368                 {
369                     _state = State.Post;
370                 }
371                 break;
372             }
373             case State.Post:
374                 --_contentSize;
375                 req.content ~= input;
376 
377                 if( _contentSize == 0 )
378                 {
379                     return ParseResult.ParsingCompleted;
380                 }
381                 break;
382             case State.ChunkSize:
383                 if( isalnum(input) )
384                 {
385                     _chunkSizeStr ~= input;
386                 }
387                 else if( input == ';' )
388                 {
389                     _state = State.ChunkExtensionName;
390                 }
391                 else if( input == '\r' )
392                 {
393                     _state = State.ChunkSizeNewLine;
394                 }
395                 else
396                 {
397                     return ParseResult.ParsingError;
398                 }
399                 break;
400             case State.ChunkExtensionName:
401                 if( isalnum(input) || input == ' ' )
402                 {
403                     // skip
404                 }
405                 else if( input == '=' )
406                 {
407                     _state = State.ChunkExtensionValue;
408                 }
409                 else if( input == '\r' )
410                 {
411                     _state = State.ChunkSizeNewLine;
412                 }
413                 else
414                 {
415                     return ParseResult.ParsingError;
416                 }
417                 break;
418             case State.ChunkExtensionValue:
419                 if( isalnum(input) || input == ' ' )
420                 {
421                     // skip
422                 }
423                 else if( input == '\r' )
424                 {
425                     _state = State.ChunkSizeNewLine;
426                 }
427                 else
428                 {
429                     return ParseResult.ParsingError;
430                 }
431                 break;
432             case State.ChunkSizeNewLine:
433                 if( input == '\n' )
434                 {
435                     _chunkSize = strtol(_chunkSizeStr.ptr, null, 16);
436                     // _chunkSizeStr.clear();
437                     // req.content.reserve(strlen(req.content) + _chunkSize);
438 
439                     if( _chunkSize == 0 )
440                         _state = State.ChunkSizeNewLine_2;
441                     else
442                         _state = State.ChunkData;
443                 }
444                 else
445                 {
446                     return ParseResult.ParsingError;
447                 }
448                 break;
449             case State.ChunkSizeNewLine_2:
450                 if( input == '\r' )
451                 {
452                     _state = State.ChunkSizeNewLine_3;
453                 }
454                 else if( isAlpha(input) )
455                 {
456                     _state = State.ChunkTrailerName;
457                 }
458                 else
459                 {
460                     return ParseResult.ParsingError;
461                 }
462                 break;
463             case State.ChunkSizeNewLine_3:
464                 if( input == '\n' )
465                 {
466                     return ParseResult.ParsingCompleted;
467                 }
468                 else
469                 {
470                     return ParseResult.ParsingError;
471                 }
472                 // break;
473             case State.ChunkTrailerName:
474                 if( isalnum(input) )
475                 {
476                     // skip
477                 }
478                 else if( input == ':' )
479                 {
480                     _state = State.ChunkTrailerValue;
481                 }
482                 else
483                 {
484                     return ParseResult.ParsingError;
485                 }
486                 break;
487             case State.ChunkTrailerValue:
488                 if( isalnum(input) || input == ' ' )
489                 {
490                     // skip
491                 }
492                 else if( input == '\r' )
493                 {
494                     _state = State.ChunkSizeNewLine;
495                 }
496                 else
497                 {
498                     return ParseResult.ParsingError;
499                 }
500                 break;
501             case State.ChunkData:
502                 req.content ~= input;
503 
504                 if( --_chunkSize == 0 )
505                 {
506                     _state = State.ChunkDataNewLine_1;
507                 }
508                 break;
509             case State.ChunkDataNewLine_1:
510                 if( input == '\r' )
511                 {
512                     _state = State.ChunkDataNewLine_2;
513                 }
514                 else
515                 {
516                     return ParseResult.ParsingError;
517                 }
518                 break;
519             case State.ChunkDataNewLine_2:
520                 if( input == '\n' )
521                 {
522                     _state = State.ChunkSize;
523                 }
524                 else
525                 {
526                     return ParseResult.ParsingError;
527                 }
528                 break;
529             default:
530                 return ParseResult.ParsingError;
531             }
532         }
533 
534         return ParseResult.ParsingIncompleted;
535     }
536 
537     // Check if a byte is an HTTP character.
538     bool isChar(int c)
539     {
540         return c >= 0 && c <= 127;
541     }
542 
543     // Check if a byte is an HTTP control character.
544     bool isControl(int c)
545     {
546         return (c >= 0 && c <= 31) || (c == 127);
547     }
548 
549     // Check if a byte is defined as an HTTP special character.
550     bool isSpecial(int c)
551     {
552         switch (c)
553         {
554         case '(': case ')': case '<': case '>': case '@':
555         case ',': case ';': case ':': case '\\': case '"':
556         case '/': case '[': case ']': case '?': case '=':
557         case '{': case '}': case ' ': case '\t':
558             return true;
559         default:
560             return false;
561         }
562     }
563 
564     // Check if a byte is a digit.
565     bool isDigit(int c)
566     {
567         return c >= '0' && c <= '9';
568     }
569 
570     // The current state of the parser.
571     enum State
572     {
573         RequestMethodStart,
574         RequestMethod,
575         RequestUriStart,
576         RequestUri,
577         RequestHttpVersion_h,
578         RequestHttpVersion_ht,
579         RequestHttpVersion_htt,
580         RequestHttpVersion_http,
581         RequestHttpVersion_slash,
582         RequestHttpVersion_majorStart,
583         RequestHttpVersion_major,
584         RequestHttpVersion_minorStart,
585         RequestHttpVersion_minor,
586 
587         ResponseStatusStart,
588         ResponseHttpVersion_ht,
589         ResponseHttpVersion_htt,
590         ResponseHttpVersion_http,
591         ResponseHttpVersion_slash,
592         ResponseHttpVersion_majorStart,
593         ResponseHttpVersion_major,
594         ResponseHttpVersion_minorStart,
595         ResponseHttpVersion_minor,
596         ResponseHttpVersion_spaceAfterVersion,
597         ResponseHttpVersion_statusCodeStart,
598         ResponseHttpVersion_spaceAfterStatusCode,
599         ResponseHttpVersion_statusTextStart,
600         ResponseHttpVersion_newLine,
601 
602         HeaderLineStart,
603         HeaderLws,
604         HeaderName,
605         SpaceBeforeHeaderValue,
606         HeaderValue,
607         ExpectingNewline_2,
608         ExpectingNewline_3,
609 
610         Post,
611         ChunkSize,
612         ChunkExtensionName,
613         ChunkExtensionValue,
614         ChunkSizeNewLine,
615         ChunkSizeNewLine_2,
616         ChunkSizeNewLine_3,
617         ChunkTrailerName,
618         ChunkTrailerValue,
619 
620         ChunkDataNewLine_1,
621         ChunkDataNewLine_2,
622         ChunkData,
623     }
624 }
625 
626 unittest
627 {
628     import http.HttpRequest;
629     import http.HttpRequestParser;
630 
631     import std.stdio;
632 
633     enum string text = "GET /testuri HTTP/1.1\r\n" ~
634                         "User-Agent: Mozilla/5.0\r\n" ~ 
635                         "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" ~
636                         "Host: 127.0.0.1\r\n" ~
637                         "\r\n";
638 
639     HttpRequest request = new HttpRequest();
640     
641     auto parser = new HttpRequestParser;
642 
643     HttpRequestParser.ParseResult res = parser.parse(request, cast(ubyte[])text);
644 
645     if ( res == HttpRequestParser.ParseResult.ParsingCompleted )
646     {
647         writeln("method: ", request.method);
648         writeln("uri", request.uri);
649         foreach (header; request.headers)
650         {
651             writeln(header.name[], ": ", header.value[]);
652         }
653 
654         writeln("SUCCESS");
655     }
656     else
657     {
658         writeln("FAILED");
659     }
660 }