1 module http.HttpRequestParser; 2 3 import std.container; 4 5 import std.algorithm.comparison : equal; 6 import std.conv : to; 7 import std.algorithm.iteration : filter; 8 import std.uni : icmp, isAlpha; 9 import std.array : array; 10 11 import core.stdc.ctype : isalnum; 12 import core.stdc.stdlib : strtol; 13 14 import http.HttpRequest; 15 16 class HttpRequestParser 17 { 18 private 19 { 20 State _state = State.RequestMethodStart; 21 size_t _contentSize = 0; 22 string _chunkSizeStr; 23 size_t _chunkSize = 0; 24 bool _chunked = false; 25 } 26 27 public: 28 this() 29 { 30 // What to do? 31 } 32 33 enum ParseResult { 34 ParsingCompleted, 35 ParsingIncompleted, 36 ParsingError 37 } 38 39 ParseResult parse(ref HttpRequest req, const ubyte[] text) 40 { 41 return consume(req, text); 42 } 43 44 private: 45 static bool checkIfConnection(const HttpRequest.Header h) 46 { 47 return icmp(h.name[], "Connection") == 0; 48 } 49 50 ParseResult consume(ref HttpRequest req, const ubyte[] text) 51 { 52 foreach(input; text) 53 { 54 switch (_state) 55 { 56 case State.RequestMethodStart: 57 if( !isChar(input) || isControl(input) || isSpecial(input) ) 58 { 59 return ParseResult.ParsingError; 60 } 61 else 62 { 63 _state = State.RequestMethod; 64 req.method ~= input; 65 } 66 break; 67 case State.RequestMethod: 68 if( input == ' ' ) 69 { 70 _state = State.RequestUriStart; 71 } 72 else if( !isChar(input) || isControl(input) || isSpecial(input) ) 73 { 74 return ParseResult.ParsingError; 75 } 76 else 77 { 78 req.method ~= input; 79 } 80 break; 81 case State.RequestUriStart: 82 if( isControl(input) ) 83 { 84 return ParseResult.ParsingError; 85 } 86 else 87 { 88 _state = State.RequestUri; 89 req.uri ~= input; 90 } 91 break; 92 case State.RequestUri: 93 if( input == ' ' ) 94 { 95 _state = State.RequestHttpVersion_h; 96 } 97 else if (input == '\r') 98 { 99 req.versionMajor = 0; 100 req.versionMinor = 9; 101 102 return ParseResult.ParsingCompleted; 103 } 104 else if( isControl(input) ) 105 { 106 return ParseResult.ParsingError; 107 } 108 else 109 { 110 req.uri ~= input; 111 } 112 break; 113 case State.RequestHttpVersion_h: 114 if( input == 'H' ) 115 { 116 _state = State.RequestHttpVersion_ht; 117 } 118 else 119 { 120 return ParseResult.ParsingError; 121 } 122 break; 123 case State.RequestHttpVersion_ht: 124 if( input == 'T' ) 125 { 126 _state = State.RequestHttpVersion_htt; 127 } 128 else 129 { 130 return ParseResult.ParsingError; 131 } 132 break; 133 case State.RequestHttpVersion_htt: 134 if( input == 'T' ) 135 { 136 _state = State.RequestHttpVersion_http; 137 } 138 else 139 { 140 return ParseResult.ParsingError; 141 } 142 break; 143 case State.RequestHttpVersion_http: 144 if( input == 'P' ) 145 { 146 _state = State.RequestHttpVersion_slash; 147 } 148 else 149 { 150 return ParseResult.ParsingError; 151 } 152 break; 153 case State.RequestHttpVersion_slash: 154 if( input == '/' ) 155 { 156 req.versionMajor = 0; 157 req.versionMinor = 0; 158 _state = State.RequestHttpVersion_majorStart; 159 } 160 else 161 { 162 return ParseResult.ParsingError; 163 } 164 break; 165 case State.RequestHttpVersion_majorStart: 166 if( isDigit(input) ) 167 { 168 req.versionMajor = input - '0'; 169 _state = State.RequestHttpVersion_major; 170 } 171 else 172 { 173 return ParseResult.ParsingError; 174 } 175 break; 176 case State.RequestHttpVersion_major: 177 if( input == '.' ) 178 { 179 _state = State.RequestHttpVersion_minorStart; 180 } 181 else if (isDigit(input)) 182 { 183 req.versionMajor = req.versionMajor * 10 + input - '0'; 184 } 185 else 186 { 187 return ParseResult.ParsingError; 188 } 189 break; 190 case State.RequestHttpVersion_minorStart: 191 if( isDigit(input) ) 192 { 193 req.versionMinor = input - '0'; 194 _state = State.RequestHttpVersion_minor; 195 } 196 else 197 { 198 return ParseResult.ParsingError; 199 } 200 break; 201 case State.RequestHttpVersion_minor: 202 if( input == '\r' ) 203 { 204 _state = State.ResponseHttpVersion_newLine; 205 } 206 else if( isDigit(input) ) 207 { 208 req.versionMinor = req.versionMinor * 10 + input - '0'; 209 } 210 else 211 { 212 return ParseResult.ParsingError; 213 } 214 break; 215 case State.ResponseHttpVersion_newLine: 216 if( input == '\n' ) 217 { 218 _state = State.HeaderLineStart; 219 } 220 else 221 { 222 return ParseResult.ParsingError; 223 } 224 break; 225 case State.HeaderLineStart: 226 if( input == '\r' ) 227 { 228 _state = State.ExpectingNewline_3; 229 } 230 else if( !req.headers.empty() && (input == ' ' || input == '\t') ) 231 { 232 _state = State.HeaderLws; 233 } 234 else if( !isChar(input) || isControl(input) || isSpecial(input) ) 235 { 236 return ParseResult.ParsingError; 237 } 238 else 239 { 240 // TODO: 241 // req.headers.push_back(Request::HeaderItem()); 242 // req.headers.back().name.reserve(16); 243 // req.headers.back().value.reserve(16); 244 // req.headers.back().name.push_back(input); 245 246 // req.headers.insertBack(HttpRequest.Header(Appender!string(""), Appender!string(input))); 247 HttpRequest.Header header; 248 header.name.put(input); 249 req.headers.insertBack(header); 250 251 _state = State.HeaderName; 252 } 253 break; 254 case State.HeaderLws: 255 if( input == '\r' ) 256 { 257 _state = State.ExpectingNewline_2; 258 } 259 else if( input == ' ' || input == '\t' ) 260 { 261 } 262 else if( isControl(input) ) 263 { 264 return ParseResult.ParsingError; 265 } 266 else 267 { 268 _state = State.HeaderValue; 269 req.headers.back.value.put(input); 270 } 271 break; 272 case State.HeaderName: 273 if( input == ':' ) 274 { 275 _state = State.SpaceBeforeHeaderValue; 276 } 277 else if( !isChar(input) || isControl(input) || isSpecial(input) ) 278 { 279 return ParseResult.ParsingError; 280 } 281 else 282 { 283 req.headers.back.name.put(input); 284 } 285 break; 286 case State.SpaceBeforeHeaderValue: 287 if( input == ' ' ) 288 { 289 _state = State.HeaderValue; 290 } 291 else 292 { 293 return ParseResult.ParsingError; 294 } 295 break; 296 case State.HeaderValue: 297 if( input == '\r' ) 298 { 299 if( req.method == "POST" || req.method == "PUT" ) 300 { 301 HttpRequest.Header h = req.headers.back; 302 303 if( icmp(h.name[], "Content-Length") == 0 ) 304 { 305 _contentSize = h.value.data().to!int; 306 // req.content.reserve( _contentSize ); 307 } 308 else if( icmp(h.name[], "Transfer-Encoding") == 0 ) 309 { 310 if( icmp(h.value[], "chunked") == 0 ) 311 _chunked = true; 312 } 313 } 314 _state = State.ExpectingNewline_2; 315 } 316 else if( isControl(input) ) 317 { 318 return ParseResult.ParsingError; 319 } 320 else 321 { 322 req.headers.back.value.put(input); 323 } 324 break; 325 case State.ExpectingNewline_2: 326 if( input == '\n' ) 327 { 328 _state = State.HeaderLineStart; 329 } 330 else 331 { 332 return ParseResult.ParsingError; 333 } 334 break; 335 case State.ExpectingNewline_3: { 336 auto it = filter!(a => checkIfConnection(a))(req.headers.array); 337 338 if(!it.empty() ) 339 { 340 HttpRequest.Header header = it.front; 341 if( icmp(header.value.data(), "Keep-Alive") == 0 ) 342 { 343 req.keepAlive = true; 344 } 345 else // == Close 346 { 347 req.keepAlive = false; 348 } 349 } 350 else 351 { 352 if( req.versionMajor > 1 || (req.versionMajor == 1 && req.versionMinor == 1) ) 353 req.keepAlive = true; 354 } 355 356 if( _chunked ) 357 { 358 _state = State.ChunkSize; 359 } 360 else if( _contentSize == 0 ) 361 { 362 if( input == '\n') 363 return ParseResult.ParsingCompleted; 364 else 365 return ParseResult.ParsingError; 366 } 367 else 368 { 369 _state = State.Post; 370 } 371 break; 372 } 373 case State.Post: 374 --_contentSize; 375 req.content ~= input; 376 377 if( _contentSize == 0 ) 378 { 379 return ParseResult.ParsingCompleted; 380 } 381 break; 382 case State.ChunkSize: 383 if( isalnum(input) ) 384 { 385 _chunkSizeStr ~= input; 386 } 387 else if( input == ';' ) 388 { 389 _state = State.ChunkExtensionName; 390 } 391 else if( input == '\r' ) 392 { 393 _state = State.ChunkSizeNewLine; 394 } 395 else 396 { 397 return ParseResult.ParsingError; 398 } 399 break; 400 case State.ChunkExtensionName: 401 if( isalnum(input) || input == ' ' ) 402 { 403 // skip 404 } 405 else if( input == '=' ) 406 { 407 _state = State.ChunkExtensionValue; 408 } 409 else if( input == '\r' ) 410 { 411 _state = State.ChunkSizeNewLine; 412 } 413 else 414 { 415 return ParseResult.ParsingError; 416 } 417 break; 418 case State.ChunkExtensionValue: 419 if( isalnum(input) || input == ' ' ) 420 { 421 // skip 422 } 423 else if( input == '\r' ) 424 { 425 _state = State.ChunkSizeNewLine; 426 } 427 else 428 { 429 return ParseResult.ParsingError; 430 } 431 break; 432 case State.ChunkSizeNewLine: 433 if( input == '\n' ) 434 { 435 _chunkSize = strtol(_chunkSizeStr.ptr, null, 16); 436 // _chunkSizeStr.clear(); 437 // req.content.reserve(strlen(req.content) + _chunkSize); 438 439 if( _chunkSize == 0 ) 440 _state = State.ChunkSizeNewLine_2; 441 else 442 _state = State.ChunkData; 443 } 444 else 445 { 446 return ParseResult.ParsingError; 447 } 448 break; 449 case State.ChunkSizeNewLine_2: 450 if( input == '\r' ) 451 { 452 _state = State.ChunkSizeNewLine_3; 453 } 454 else if( isAlpha(input) ) 455 { 456 _state = State.ChunkTrailerName; 457 } 458 else 459 { 460 return ParseResult.ParsingError; 461 } 462 break; 463 case State.ChunkSizeNewLine_3: 464 if( input == '\n' ) 465 { 466 return ParseResult.ParsingCompleted; 467 } 468 else 469 { 470 return ParseResult.ParsingError; 471 } 472 // break; 473 case State.ChunkTrailerName: 474 if( isalnum(input) ) 475 { 476 // skip 477 } 478 else if( input == ':' ) 479 { 480 _state = State.ChunkTrailerValue; 481 } 482 else 483 { 484 return ParseResult.ParsingError; 485 } 486 break; 487 case State.ChunkTrailerValue: 488 if( isalnum(input) || input == ' ' ) 489 { 490 // skip 491 } 492 else if( input == '\r' ) 493 { 494 _state = State.ChunkSizeNewLine; 495 } 496 else 497 { 498 return ParseResult.ParsingError; 499 } 500 break; 501 case State.ChunkData: 502 req.content ~= input; 503 504 if( --_chunkSize == 0 ) 505 { 506 _state = State.ChunkDataNewLine_1; 507 } 508 break; 509 case State.ChunkDataNewLine_1: 510 if( input == '\r' ) 511 { 512 _state = State.ChunkDataNewLine_2; 513 } 514 else 515 { 516 return ParseResult.ParsingError; 517 } 518 break; 519 case State.ChunkDataNewLine_2: 520 if( input == '\n' ) 521 { 522 _state = State.ChunkSize; 523 } 524 else 525 { 526 return ParseResult.ParsingError; 527 } 528 break; 529 default: 530 return ParseResult.ParsingError; 531 } 532 } 533 534 return ParseResult.ParsingIncompleted; 535 } 536 537 // Check if a byte is an HTTP character. 538 bool isChar(int c) 539 { 540 return c >= 0 && c <= 127; 541 } 542 543 // Check if a byte is an HTTP control character. 544 bool isControl(int c) 545 { 546 return (c >= 0 && c <= 31) || (c == 127); 547 } 548 549 // Check if a byte is defined as an HTTP special character. 550 bool isSpecial(int c) 551 { 552 switch (c) 553 { 554 case '(': case ')': case '<': case '>': case '@': 555 case ',': case ';': case ':': case '\\': case '"': 556 case '/': case '[': case ']': case '?': case '=': 557 case '{': case '}': case ' ': case '\t': 558 return true; 559 default: 560 return false; 561 } 562 } 563 564 // Check if a byte is a digit. 565 bool isDigit(int c) 566 { 567 return c >= '0' && c <= '9'; 568 } 569 570 // The current state of the parser. 571 enum State 572 { 573 RequestMethodStart, 574 RequestMethod, 575 RequestUriStart, 576 RequestUri, 577 RequestHttpVersion_h, 578 RequestHttpVersion_ht, 579 RequestHttpVersion_htt, 580 RequestHttpVersion_http, 581 RequestHttpVersion_slash, 582 RequestHttpVersion_majorStart, 583 RequestHttpVersion_major, 584 RequestHttpVersion_minorStart, 585 RequestHttpVersion_minor, 586 587 ResponseStatusStart, 588 ResponseHttpVersion_ht, 589 ResponseHttpVersion_htt, 590 ResponseHttpVersion_http, 591 ResponseHttpVersion_slash, 592 ResponseHttpVersion_majorStart, 593 ResponseHttpVersion_major, 594 ResponseHttpVersion_minorStart, 595 ResponseHttpVersion_minor, 596 ResponseHttpVersion_spaceAfterVersion, 597 ResponseHttpVersion_statusCodeStart, 598 ResponseHttpVersion_spaceAfterStatusCode, 599 ResponseHttpVersion_statusTextStart, 600 ResponseHttpVersion_newLine, 601 602 HeaderLineStart, 603 HeaderLws, 604 HeaderName, 605 SpaceBeforeHeaderValue, 606 HeaderValue, 607 ExpectingNewline_2, 608 ExpectingNewline_3, 609 610 Post, 611 ChunkSize, 612 ChunkExtensionName, 613 ChunkExtensionValue, 614 ChunkSizeNewLine, 615 ChunkSizeNewLine_2, 616 ChunkSizeNewLine_3, 617 ChunkTrailerName, 618 ChunkTrailerValue, 619 620 ChunkDataNewLine_1, 621 ChunkDataNewLine_2, 622 ChunkData, 623 } 624 } 625 626 unittest 627 { 628 import http.HttpRequest; 629 import http.HttpRequestParser; 630 631 import std.stdio; 632 633 enum string text = "GET /testuri HTTP/1.1\r\n" ~ 634 "User-Agent: Mozilla/5.0\r\n" ~ 635 "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" ~ 636 "Host: 127.0.0.1\r\n" ~ 637 "\r\n"; 638 639 HttpRequest request = new HttpRequest(); 640 641 auto parser = new HttpRequestParser; 642 643 HttpRequestParser.ParseResult res = parser.parse(request, cast(ubyte[])text); 644 645 if ( res == HttpRequestParser.ParseResult.ParsingCompleted ) 646 { 647 writeln("method: ", request.method); 648 writeln("uri", request.uri); 649 foreach (header; request.headers) 650 { 651 writeln(header.name[], ": ", header.value[]); 652 } 653 654 writeln("SUCCESS"); 655 } 656 else 657 { 658 writeln("FAILED"); 659 } 660 }