Libevhtp  1.2.13
parser.c
Go to the documentation of this file.
1 #include <stdlib.h>
2 #include <stddef.h>
3 #include <ctype.h>
4 
5 #include "internal.h"
6 #include "evhtp/parser.h"
7 #include "evhtp/config.h"
8 #include "log.h"
9 
10 #if '\n' != '\x0a' || 'A' != 65
11 #error "You have somehow found a non-ASCII host. We can't build here."
12 #endif
13 
14 #define PARSER_STACK_MAX 8192
15 #define LF (unsigned char)10
16 #define CR (unsigned char)13
17 #define CRLF "\x0d\x0a"
18 
27 };
28 
30  parser_flag_chunked = (1 << 0),
34 };
35 
37  s_start = 0,
80 };
81 
85 
86 
87 struct htparser {
88  htpparse_error error;
89  parser_state state;
90  parser_flags flags;
91  eval_hdr_val heval;
92 
93  htp_type type;
94  htp_scheme scheme;
95  htp_method method;
96 
97  unsigned char multipart;
98  unsigned char major;
99  unsigned char minor;
100  uint64_t content_len; /* this gets decremented as data passes through */
101  uint64_t orig_content_len; /* this contains the original length of the body */
102  uint64_t bytes_read;
104  unsigned int status; /* only for responses */
105  unsigned int status_count; /* only for responses */
106 
108  char * host_offset;
109  char * port_offset;
110  char * path_offset;
111  char * args_offset;
112 
113  void * userdata;
114 
115  size_t buf_idx;
116  /* Must be last since htparser_init memsets up to the offset of this buffer */
118 };
119 
120 #ifdef EVHTP_DEBUG
121 static void
122 log_htparser__s_(struct htparser * p)
123 {
124  log_debug(
125  "struct htparser {\n"
126  " htpparse_error = %d\n"
127  " parser_state = %d\n"
128  " parser_flags = %d\n"
129  " eval_hdr_val = %d\n"
130  " htp_type = %d\n"
131  " htp_scheme = %d\n"
132  " htp_method = %d\n"
133  " multipart = %c\n"
134  " major = %c\n"
135  " minor = %c\n"
136  " content_len = %zu\n"
137  " orig_clen = %zu\n"
138  " bytes_read = %zu\n"
139  " total_read = %zu\n"
140  " status = %d\n"
141  " status_count = %d\n"
142  " scheme_offset = %s\n"
143  " host_offset = %s\n"
144  " port_offset = %s\n"
145  " path_offset = %s\n"
146  " args_offset = %s\n"
147  " userdata = %p\n"
148  " buf_idx = %zu\n"
149  " buf = %s\n"
150  "};",
151  p->error,
152  p->state,
153  p->flags,
154  p->heval,
155  p->type,
156  p->scheme,
157  p->method,
158  p->multipart,
159  p->major,
160  p->minor,
161  p->content_len,
162  p->orig_content_len,
163  p->bytes_read,
164  p->total_bytes_read,
165  p->status,
166  p->status_count,
167  p->scheme_offset,
168  p->host_offset,
169  p->port_offset,
170  p->path_offset,
171  p->args_offset,
172  p->userdata,
173  p->buf_idx,
174  p->buf);
175 } /* log_htparser__s_ */
176 
177 #else
178 #define log_htparser__s_(p)
179 #endif
180 
181 static uint32_t usual[] = {
182  0xffffdbfe,
183  0x7fff37d6,
184  0xffffffff,
185  0xffffffff,
186  0xffffffff,
187  0xffffffff,
188  0xffffffff,
189  0xffffffff
190 };
191 
192 static int8_t unhex[256] = {
193  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
194  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
195  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
196  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
197  -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
198  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
199  -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
200  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
201 };
202 
203 static const char * errstr_map[] = {
204  "htparse_error_none",
205  "htparse_error_too_big",
206  "htparse_error_invalid_method",
207  "htparse_error_invalid_requestline",
208  "htparse_error_invalid_schema",
209  "htparse_error_invalid_protocol",
210  "htparse_error_invalid_version",
211  "htparse_error_invalid_header",
212  "htparse_error_invalid_chunk_size",
213  "htparse_error_invalid_chunk",
214  "htparse_error_invalid_state",
215  "htparse_error_user",
216  "htparse_error_unknown"
217 };
218 
219 static const char * method_strmap[] = {
220  "GET",
221  "HEAD",
222  "POST",
223  "PUT",
224  "DELETE",
225  "MKCOL",
226  "COPY",
227  "MOVE",
228  "OPTIONS",
229  "PROPFIND",
230  "PROPATCH",
231  "LOCK",
232  "UNLOCK",
233  "TRACE",
234  "CONNECT",
235  "PATCH",
236 };
237 
238 #define _MIN_READ(a, b) ((a) < (b) ? (a) : (b))
239 
240 #ifndef HOST_BIG_ENDIAN
241 /* Little-endian cmp macros */
242 #define _str3_cmp(m, c0, c1, c2, c3) \
243  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0)
244 
245 #define _str3Ocmp(m, c0, c1, c2, c3) \
246  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0)
247 
248 #define _str4cmp(m, c0, c1, c2, c3) \
249  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0)
250 
251 #define _str5cmp(m, c0, c1, c2, c3, c4) \
252  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
253  && m[4] == c4
254 
255 #define _str6cmp(m, c0, c1, c2, c3, c4, c5) \
256  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
257  && (((uint32_t *)m)[1] & 0xffff) == ((c5 << 8) | c4)
258 
259 #define _str7_cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \
260  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
261  && ((uint32_t *)m)[1] == ((c7 << 24) | (c6 << 16) | (c5 << 8) | c4)
262 
263 #define _str8cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \
264  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
265  && ((uint32_t *)m)[1] == ((c7 << 24) | (c6 << 16) | (c5 << 8) | c4)
266 
267 #define _str9cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8) \
268  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
269  && ((uint32_t *)m)[1] == ((c7 << 24) | (c6 << 16) | (c5 << 8) | c4) \
270  && m[8] == c8
271 #else
272 /* Big endian cmp macros */
273 #define _str3_cmp(m, c0, c1, c2, c3) \
274  m[0] == c0 && m[1] == c1 && m[2] == c2
275 
276 #define _str3Ocmp(m, c0, c1, c2, c3) \
277  m[0] == c0 && m[2] == c2 && m[3] == c3
278 
279 #define _str4cmp(m, c0, c1, c2, c3) \
280  m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3
281 
282 #define _str5cmp(m, c0, c1, c2, c3, c4) \
283  m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 && m[4] == c4
284 
285 #define _str6cmp(m, c0, c1, c2, c3, c4, c5) \
286  m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 \
287  && m[4] == c4 && m[5] == c5
288 
289 #define _str7_cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \
290  m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 \
291  && m[4] == c4 && m[5] == c5 && m[6] == c6
292 
293 #define _str8cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \
294  m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 \
295  && m[4] == c4 && m[5] == c5 && m[6] == c6 && m[7] == c7
296 
297 #define _str9cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8) \
298  m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 \
299  && m[4] == c4 && m[5] == c5 && m[6] == c6 && m[7] == c7 && m[8] == c8
300 
301 #endif
302 
303 #define __HTPARSE_GENHOOK(__n) \
304  static inline int hook_ ## __n ## _run(htparser * p, htparse_hooks * hooks) { \
305  log_debug("enter"); \
306  if (hooks && (hooks)->__n) \
307  { \
308  return (hooks)->__n(p); \
309  } \
310  \
311  return 0; \
312  }
313 
314 #define __HTPARSE_GENDHOOK(__n) \
315  static inline int hook_ ## __n ## _run(htparser * p, \
316  htparse_hooks * hooks, \
317  const char * s, size_t l) { \
318  log_debug("enter"); \
319  if (hooks && (hooks)->__n) \
320  { \
321  return (hooks)->__n(p, s, l); \
322  } \
323  \
324  return 0; \
325  }
326 
327 __HTPARSE_GENHOOK(on_msg_begin)
328 __HTPARSE_GENHOOK(on_hdrs_begin)
329 __HTPARSE_GENHOOK(on_hdrs_complete)
330 __HTPARSE_GENHOOK(on_new_chunk)
331 __HTPARSE_GENHOOK(on_chunk_complete)
332 __HTPARSE_GENHOOK(on_chunks_complete)
333 __HTPARSE_GENHOOK(on_msg_complete)
334 
337 __HTPARSE_GENDHOOK(host)
338 __HTPARSE_GENDHOOK(port)
339 __HTPARSE_GENDHOOK(path)
342 __HTPARSE_GENDHOOK(hdr_key)
343 __HTPARSE_GENDHOOK(hdr_val)
344 __HTPARSE_GENDHOOK(body)
345 __HTPARSE_GENDHOOK(hostname)
346 
347 
348 static inline uint64_t
349 str_to_uint64(char * str, size_t n, int * err)
350 {
351  uint64_t value;
352 
353  /* Trim whitespace after value. */
354  while (n && isblank(str[n - 1]))
355  {
356  n--;
357  }
358 
359  if (n > 20)
360  {
361  /* 18446744073709551615 is 20 bytes */
362  *err = 1;
363  return 0;
364  }
365 
366  for (value = 0; n--; str++)
367  {
368  uint64_t check;
369 
370  if (*str < '0' || *str > '9')
371  {
372  *err = 1;
373  return 0;
374  }
375 
376  check = value * 10 + (*str - '0');
377 
378  if ((value && check <= value))
379  {
380  *err = 1;
381  return 0;
382  }
383 
384  value = check;
385  }
386 
387  return value;
388 }
389 
390 static inline ssize_t
391 _str_to_ssize_t(char * str, size_t n)
392 {
393  ssize_t value;
394 
395  if (n == 0)
396  {
397  return -1;
398  }
399 
400  for (value = 0; n--; str++)
401  {
402  if (*str < '0' || *str > '9')
403  {
404  return -1;
405  }
406 
407  value = value * 10 + (*str - '0');
408 
409 #if 0
410  if (value > INTMAX_MAX)
411  {
412  return -1;
413  }
414 #endif
415  }
416 
417  return value;
418 }
419 
420 htpparse_error
422 {
423  return p->error;
424 }
425 
426 const char *
428 {
429  htpparse_error e = htparser_get_error(p);
430 
431  if (e > htparse_error_generic)
432  {
433  return "htparse_no_such_error";
434  }
435 
436  return errstr_map[e];
437 }
438 
439 unsigned int
441 {
442  return p->status;
443 }
444 
445 int
447 {
448  if (p->major > 0 && p->minor > 0)
449  {
451  {
452  return 0;
453  } else {
454  return 1;
455  }
456  } else {
458  {
459  return 1;
460  } else {
461  return 0;
462  }
463  }
464 
465  return 0;
466 }
467 
468 htp_scheme
470 {
471  return p->scheme;
472 }
473 
474 htp_method
476 {
477  return p->method;
478 }
479 
480 const char *
481 htparser_get_methodstr_m(htp_method meth)
482 {
483  if (meth >= htp_method_UNKNOWN)
484  {
485  return NULL;
486  }
487 
488  return method_strmap[meth];
489 }
490 
491 const char *
493 {
494  return htparser_get_methodstr_m(p->method);
495 }
496 
497 void
498 htparser_set_major(htparser * p, unsigned char major)
499 {
500  p->major = major;
501 }
502 
503 void
504 htparser_set_minor(htparser * p, unsigned char minor)
505 {
506  p->minor = minor;
507 }
508 
509 unsigned char
511 {
512  return p->major;
513 }
514 
515 unsigned char
517 {
518  return p->minor;
519 }
520 
521 unsigned char
523 {
524  return p->multipart;
525 }
526 
527 void *
529 {
530  return p->userdata;
531 }
532 
533 void
535 {
536  p->userdata = ud;
537 }
538 
539 uint64_t
541 {
542  return p->content_len;
543 }
544 
545 uint64_t
547 {
548  return p->orig_content_len;
549 }
550 
551 uint64_t
553 {
554  return p->bytes_read;
555 }
556 
557 uint64_t
559 {
560  return p->total_bytes_read;
561 }
562 
563 void
565 {
566  /* Do not memset entire string buffer. */
567  memset(p, 0, offsetof(htparser, buf));
568  p->buf[0] = '\0';
569  p->state = s_start;
570  p->error = htparse_error_none;
571  p->method = htp_method_UNKNOWN;
572  p->type = type;
573 }
574 
575 htparser *
577 {
578  return malloc(sizeof(htparser));
579 }
580 
581 static int
582 is_host_char(unsigned char ch)
583 {
584  char c = (unsigned char)(ch | 0x20);
585 
586  if (c >= 'a' && c <= 'z')
587  {
588  return 1;
589  }
590 
591  if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-')
592  {
593  return 1;
594  }
595 
596  return 0;
597 }
598 
599 static htp_method
600 get_method(const char * m, const size_t sz)
601 {
602  switch (sz) {
603  case 3:
604  if (_str3_cmp(m, 'G', 'E', 'T', '\0'))
605  {
606  return htp_method_GET;
607  }
608 
609  if (_str3_cmp(m, 'P', 'U', 'T', '\0'))
610  {
611  return htp_method_PUT;
612  }
613 
614  break;
615  case 4:
616  if (m[1] == 'O')
617  {
618  if (_str3Ocmp(m, 'P', 'O', 'S', 'T'))
619  {
620  return htp_method_POST;
621  }
622 
623  if (_str3Ocmp(m, 'C', 'O', 'P', 'Y'))
624  {
625  return htp_method_COPY;
626  }
627 
628  if (_str3Ocmp(m, 'M', 'O', 'V', 'E'))
629  {
630  return htp_method_MOVE;
631  }
632 
633  if (_str3Ocmp(m, 'L', 'O', 'C', 'K'))
634  {
635  return htp_method_LOCK;
636  }
637  } else {
638  if (_str4cmp(m, 'H', 'E', 'A', 'D'))
639  {
640  return htp_method_HEAD;
641  }
642  }
643 
644  break;
645  case 5:
646  if (_str5cmp(m, 'M', 'K', 'C', 'O', 'L'))
647  {
648  return htp_method_MKCOL;
649  }
650 
651  if (_str5cmp(m, 'T', 'R', 'A', 'C', 'E'))
652  {
653  return htp_method_TRACE;
654  }
655 
656  if (_str5cmp(m, 'P', 'A', 'T', 'C', 'H'))
657  {
658  return htp_method_PATCH;
659  }
660 
661  break;
662  case 6:
663  if (_str6cmp(m, 'D', 'E', 'L', 'E', 'T', 'E'))
664  {
665  return htp_method_DELETE;
666  }
667 
668  if (_str6cmp(m, 'U', 'N', 'L', 'O', 'C', 'K'))
669  {
670  return htp_method_UNLOCK;
671  }
672 
673  break;
674  case 7:
675  if (_str7_cmp(m, 'O', 'P', 'T', 'I', 'O', 'N', 'S', '\0'))
676  {
677  return htp_method_OPTIONS;
678  }
679 
680  if (_str7_cmp(m, 'C', 'O', 'N', 'N', 'E', 'C', 'T', '\0'))
681  {
682  return htp_method_CONNECT;
683  }
684 
685  break;
686  case 8:
687  if (_str8cmp(m, 'P', 'R', 'O', 'P', 'F', 'I', 'N', 'D'))
688  {
689  return htp_method_PROPFIND;
690  }
691 
692  break;
693 
694  case 9:
695  if (_str9cmp(m, 'P', 'R', 'O', 'P', 'P', 'A', 'T', 'C', 'H'))
696  {
697  return htp_method_PROPPATCH;
698  }
699 
700  break;
701  } /* switch */
702 
703  return htp_method_UNKNOWN;
704 } /* get_method */
705 
706 #define HTP_SET_BUF(CH) do { \
707  if (evhtp_likely((p->buf_idx + 1) < PARSER_STACK_MAX)) { \
708  p->buf[p->buf_idx++] = CH; \
709  p->buf[p->buf_idx] = '\0'; \
710  } else { \
711  p->error = htparse_error_too_big; \
712  return i + 1; \
713  } \
714 } while (0)
715 
716 
717 size_t
718 htparser_run(htparser * p, htparse_hooks * hooks, const char * data, size_t len)
719 {
720  unsigned char ch;
721  char c;
722  size_t i;
723 
724  log_debug("enter");
725  log_debug("p == %p", p);
726 
727  p->error = htparse_error_none;
728  p->bytes_read = 0;
729 
730  for (i = 0; i < len; i++)
731  {
732  int res;
733  int err;
734 
735  ch = data[i];
736 
737  log_debug("[%p] data[%zu] = %c (%x)", p, i, isprint(ch) ? ch : ' ', ch);
738 
739  p->total_bytes_read += 1;
740  p->bytes_read += 1;
741 
742  switch (p->state) {
743  case s_start:
744  log_debug("[%p] s_start", p);
745 
746  if (ch == CR || ch == LF)
747  {
748  break;
749  }
750 
751  if ((ch < 'A' || ch > 'Z') && ch != '_')
752  {
753  p->error = htparse_error_inval_reqline;
754 
755  log_debug("s_start invalid fist char '%c'", ch);
756  log_htparser__s_(p);
757 
758  return i + 1;
759  }
760 
761 
762  p->flags = 0;
763  p->error = htparse_error_none;
764  p->method = htp_method_UNKNOWN;
765  p->multipart = 0;
766  p->major = 0;
767  p->minor = 0;
768  p->content_len = 0;
769  p->orig_content_len = 0;
770  p->status = 0;
771  p->status_count = 0;
772  p->scheme_offset = NULL;
773  p->host_offset = NULL;
774  p->port_offset = NULL;
775  p->path_offset = NULL;
776  p->args_offset = NULL;
777 
778 
779  res = hook_on_msg_begin_run(p, hooks);
780 
781  HTP_SET_BUF(ch);
782 
783  if (evhtp_likely(p->type == htp_type_request)) {
784  p->state = s_method;
785  } else if (p->type == htp_type_response && ch == 'H') {
786  p->state = s_http_H;
787  } else {
788  log_debug("not type of request or response?");
789  log_htparser__s_(p);
790 
791  p->error = htparse_error_inval_reqline;
792  return i + 1;
793  }
794 
795  if (res)
796  {
797  p->error = htparse_error_user;
798  return i + 1;
799  }
800 
801  break;
802 
803  case s_method:
804  log_debug("[%p] s_method", p);
805 
806  do {
807  if (ch == ' ')
808  {
809  p->method = get_method(p->buf, p->buf_idx);
810  res = hook_method_run(p, hooks, p->buf, p->buf_idx);
811 
812  p->buf_idx = 0;
814 
815  if (res)
816  {
817  p->error = htparse_error_user;
818  return i + 1;
819  }
820 
821  break;
822  } else {
823  if ((ch < 'A' || ch > 'Z') && ch != '_')
824  {
825  p->error = htparse_error_inval_method;
826  return i + 1;
827  }
828 
829  HTP_SET_BUF(ch);
830  }
831 
832  ch = data[++i];
833  } while (i < len);
834 
835  break;
836  case s_spaces_before_uri:
837  log_debug("[%p] s_spaces_before_uri", p);
838 
839  /* CONNECT is special - RFC 2817 section 5.2:
840  * The Request-URI portion of the Request-Line is
841  * always an 'authority' as defined by URI Generic
842  * Syntax [2], which is to say the host name and port
843  * number destination of the requested connection
844  * separated by a colon
845  */
846  if (p->method == htp_method_CONNECT)
847  {
848  switch (ch) {
849  case ' ':
850  break;
851  case '[':
852  /* Literal IPv6 address start. */
853  HTP_SET_BUF(ch);
854 
855  p->host_offset = &p->buf[p->buf_idx];
856  p->state = s_host_ipv6;
857  break;
858  default:
859  if (!is_host_char(ch))
860  {
861  p->error = htparse_error_inval_reqline;
862  log_htparser__s_(p);
863 
864  return i + 1;
865  }
866 
867  p->host_offset = &p->buf[p->buf_idx];
868 
869  HTP_SET_BUF(ch);
870 
871  p->state = s_host;
872  break;
873  } /* switch */
874 
875  break;
876  }
877 
878  switch (ch) {
879  case ' ':
880  break;
881  case '/':
882  p->path_offset = &p->buf[p->buf_idx];
883 
884  HTP_SET_BUF(ch);
885 
887  break;
888  default:
889  c = (unsigned char)(ch | 0x20);
890 
891  if (c >= 'a' && c <= 'z') {
892  p->scheme_offset = &p->buf[p->buf_idx];
893 
894  HTP_SET_BUF(ch);
895 
896  p->state = s_schema;
897  break;
898  }
899 
900  p->error = htparse_error_inval_reqline;
901  log_htparser__s_(p);
902 
903  return i + 1;
904  } /* switch */
905 
906  break;
907  case s_schema:
908  log_debug("[%p] s_schema", p);
909 
910  c = (unsigned char)(ch | 0x20);
911 
912  if (c >= 'a' && c <= 'z') {
913  HTP_SET_BUF(ch);
914  break;
915  }
916 
917  switch (ch) {
918  case ':':
919  p->scheme = htp_scheme_unknown;
920 
921  switch (p->buf_idx) {
922  case 3:
923  if (_str3_cmp(p->scheme_offset, 'f', 't', 'p', '\0'))
924  {
925  p->scheme = htp_scheme_ftp;
926  break;
927  }
928 
929  if (_str3_cmp(p->scheme_offset, 'n', 'f', 's', '\0'))
930  {
931  p->scheme = htp_scheme_nfs;
932  break;
933  }
934 
935  break;
936  case 4:
937  if (_str4cmp(p->scheme_offset, 'h', 't', 't', 'p'))
938  {
939  p->scheme = htp_scheme_http;
940  break;
941  }
942  break;
943  case 5:
944  if (_str5cmp(p->scheme_offset, 'h', 't', 't', 'p', 's'))
945  {
946  p->scheme = htp_scheme_https;
947  break;
948  }
949  break;
950  } /* switch */
951 
952  res = hook_scheme_run(p, hooks,
953  p->scheme_offset,
954  (&p->buf[p->buf_idx] - p->scheme_offset));
955 
956  HTP_SET_BUF(ch);
957 
958  p->state = s_schema_slash;
959 
960  if (res) {
961  p->error = htparse_error_user;
962  return i + 1;
963  }
964 
965  break;
966  default:
967  p->error = htparse_error_inval_schema;
968  return i + 1;
969  } /* switch */
970 
971  break;
972  case s_schema_slash:
973  log_debug("[%p] s_schema_slash", p);
974 
975  switch (ch) {
976  case '/':
977  HTP_SET_BUF(ch);
978 
980  break;
981  default:
982  p->error = htparse_error_inval_schema;
983  return i + 1;
984  }
985  break;
987  log_debug("[%p] s_schema_slash_slash", p);
988 
989  switch (ch) {
990  case '/':
991  HTP_SET_BUF(ch);
992  p->host_offset = &p->buf[p->buf_idx];
993 
994  p->state = s_host;
995  break;
996  default:
997  p->error = htparse_error_inval_schema;
998  return i + 1;
999  }
1000  break;
1001  case s_host:
1002  if (ch == '[') {
1003  /* Literal IPv6 address start. */
1004  HTP_SET_BUF(ch);
1005  p->host_offset = &p->buf[p->buf_idx];
1006 
1007  p->state = s_host_ipv6;
1008  break;
1009  }
1010 
1011  if (is_host_char(ch)) {
1012  HTP_SET_BUF(ch);
1013  break;
1014  }
1015 
1016  res = hook_host_run(p, hooks,
1017  p->host_offset,
1018  (&p->buf[p->buf_idx] - p->host_offset));
1019 
1020  if (res)
1021  {
1022  p->error = htparse_error_user;
1023  return i + 1;
1024  }
1025 
1026  /* successfully parsed a NON-IPV6 hostname, knowing this, the
1027  * current character in 'ch' is actually the next state, so we
1028  * we fall through to avoid another loop.
1029  */
1030  case s_host_done:
1031  res = 0;
1032 
1033  switch (ch) {
1034  case ':':
1035  HTP_SET_BUF(ch);
1036 
1037  p->port_offset = &p->buf[p->buf_idx];
1038  p->state = s_port;
1039  break;
1040  case ' ':
1041  /* this technically should never happen, but we should
1042  * check anyway
1043  */
1044  if (i == 0)
1045  {
1046  p->error = htparse_error_inval_state;
1047  return i + 1;
1048  }
1049 
1050  i--;
1051  ch = '/';
1052  /* to accept requests like <method> <proto>://<host> <ver>
1053  * we fallthrough to the next case.
1054  */
1055  case '/':
1056  p->path_offset = &p->buf[p->buf_idx];
1057 
1058  HTP_SET_BUF(ch);
1059 
1061  break;
1062  default:
1063  p->error = htparse_error_inval_schema;
1064  return i + 1;
1065  } /* switch */
1066 
1067  if (res)
1068  {
1069  p->error = htparse_error_user;
1070  return i + 1;
1071  }
1072 
1073  break;
1074  case s_host_ipv6:
1075  c = (unsigned char)(ch | 0x20);
1076 
1077  if ((c >= 'a' && c <= 'f')
1078  || (ch >= '0' && ch <= '9')
1079  || ch == ':'
1080  || ch == '.') {
1081  HTP_SET_BUF(ch);
1082  break;
1083  }
1084 
1085  switch (ch) {
1086  case ']':
1087  res = hook_host_run(p, hooks, p->host_offset,
1088  (&p->buf[p->buf_idx] - p->host_offset));
1089  if (res) {
1090  p->error = htparse_error_user;
1091  return i + 1;
1092  }
1093 
1094  HTP_SET_BUF(ch);
1095 
1096  p->state = s_host_done;
1097  break;
1098  default:
1099  p->error = htparse_error_inval_schema;
1100  return i + 1;
1101  } /* switch */
1102  break;
1103  case s_port:
1104  if (ch >= '0' && ch <= '9') {
1105  HTP_SET_BUF(ch);
1106  break;
1107  }
1108 
1109  res = hook_port_run(p, hooks, p->port_offset,
1110  (&p->buf[p->buf_idx] - p->port_offset));
1111 
1112  switch (ch) {
1113  case ' ':
1114  /* this technically should never happen, but we should
1115  * check anyway
1116  */
1117  if (i == 0)
1118  {
1119  p->error = htparse_error_inval_state;
1120  return i + 1;
1121  }
1122 
1123  i--;
1124  ch = '/';
1125  /* to accept requests like <method> <proto>://<host> <ver>
1126  * we fallthrough to the next case.
1127  */
1128  case '/':
1129  HTP_SET_BUF(ch);
1130  p->path_offset = &p->buf[p->buf_idx - 1];
1131 
1133  break;
1134  default:
1135  p->error = htparse_error_inval_reqline;
1136  log_debug("[s_port] inval_reqline");
1137  log_htparser__s_(p);
1138 
1139  return i + 1;
1140  } /* switch */
1141 
1142  if (res)
1143  {
1144  p->error = htparse_error_user;
1145  return i + 1;
1146  }
1147 
1148  break;
1149  case s_after_slash_in_uri:
1150  log_debug("[%p] s_after_slash_in_uri", p);
1151 
1152  res = 0;
1153 
1154  if (usual[ch >> 5] & (1 << (ch & 0x1f)))
1155  {
1156  HTP_SET_BUF(ch);
1157 
1158  p->state = s_check_uri;
1159  break;
1160  }
1161 
1162  switch (ch) {
1163  case ' ':
1164  {
1165  int r1 = hook_path_run(p, hooks, p->path_offset,
1166  (&p->buf[p->buf_idx] - p->path_offset));
1167  int r2 = hook_uri_run(p, hooks, p->buf, p->buf_idx);
1168 
1169  p->state = s_http_09;
1170  p->buf_idx = 0;
1171 
1172  if (r1 || r2)
1173  {
1174  res = 1;
1175  }
1176  }
1177 
1178  break;
1179  case CR:
1180  p->minor = 9;
1181  p->state = s_almost_done;
1182  break;
1183  case LF:
1184  p->minor = 9;
1185  p->state = s_hdrline_start;
1186  break;
1187  case '.':
1188  case '%':
1189  case '/':
1190  case '#':
1191  HTP_SET_BUF(ch);
1192  p->state = s_uri;
1193  break;
1194  case '?':
1195  res = hook_path_run(p, hooks, p->path_offset,
1196  (&p->buf[p->buf_idx] - p->path_offset));
1197 
1198  HTP_SET_BUF(ch);
1199 
1200  p->args_offset = &p->buf[p->buf_idx];
1201  p->state = s_uri;
1202 
1203  break;
1204  default:
1205  HTP_SET_BUF(ch);
1206 
1207  p->state = s_check_uri;
1208  break;
1209  } /* switch */
1210 
1211  if (res)
1212  {
1213  p->error = htparse_error_user;
1214  return i + 1;
1215  }
1216 
1217  break;
1218 
1219  case s_check_uri:
1220 
1221  res = 0;
1222 
1223  do {
1224  log_debug("[%p] s_check_uri", p);
1225 
1226  if (usual[ch >> 5] & (1 << (ch & 0x1f))) {
1227  HTP_SET_BUF(ch);
1228  } else {
1229  break;
1230  }
1231 
1232  ch = data[++i];
1233  } while (i < len);
1234 
1235  switch (ch) {
1236  case ' ':
1237  {
1238  int r1 = 0;
1239  int r2 = 0;
1240 
1241  if (p->args_offset)
1242  {
1243  r1 = hook_args_run(p, hooks, p->args_offset,
1244  (&p->buf[p->buf_idx] - p->args_offset));
1245  } else {
1246  r1 = hook_path_run(p, hooks, p->path_offset,
1247  (&p->buf[p->buf_idx] - p->path_offset));
1248  }
1249 
1250  r2 = hook_uri_run(p, hooks, p->buf, p->buf_idx);
1251  p->buf_idx = 0;
1252  p->state = s_http_09;
1253 
1254  if (r1 || r2)
1255  {
1256  res = 1;
1257  }
1258  }
1259  break;
1260  case '/':
1261  HTP_SET_BUF(ch);
1262 
1264  break;
1265  case CR:
1266  p->minor = 9;
1267  p->buf_idx = 0;
1268  p->state = s_almost_done;
1269  break;
1270  case LF:
1271  p->minor = 9;
1272  p->buf_idx = 0;
1273 
1274  p->state = s_hdrline_start;
1275  break;
1276  case '?':
1277  res = hook_path_run(p, hooks,
1278  p->path_offset,
1279  (&p->buf[p->buf_idx] - p->path_offset));
1280  HTP_SET_BUF(ch);
1281 
1282  p->args_offset = &p->buf[p->buf_idx];
1283  p->state = s_uri;
1284  break;
1285  default:
1286  HTP_SET_BUF(ch);
1287 
1288  p->state = s_uri;
1289  break;
1290  } /* switch */
1291 
1292  if (res)
1293  {
1294  p->error = htparse_error_user;
1295  return i + 1;
1296  }
1297 
1298  break;
1299 
1300  case s_uri:
1301  log_debug("[%p] s_uri", p);
1302 
1303  res = 0;
1304 
1305  do {
1306  if (usual[ch >> 5] & (1 << (ch & 0x1f))) {
1307  HTP_SET_BUF(ch);
1308  } else {
1309  break;
1310  }
1311 
1312  ch = data[++i];
1313  } while (i < len);
1314 
1315  switch (ch) {
1316  case ' ':
1317  {
1318  int r1 = 0;
1319  int r2 = 0;
1320 
1321  if (p->args_offset)
1322  {
1323  r1 = hook_args_run(p, hooks, p->args_offset,
1324  (&p->buf[p->buf_idx] - p->args_offset));
1325  } else {
1326  r1 = hook_path_run(p, hooks, p->path_offset,
1327  (&p->buf[p->buf_idx] - p->path_offset));
1328  }
1329 
1330  p->buf_idx = 0;
1331  p->state = s_http_09;
1332 
1333  if (r1 || r2)
1334  {
1335  res = 1;
1336  }
1337  }
1338  break;
1339  case CR:
1340  p->minor = 9;
1341  p->buf_idx = 0;
1342  p->state = s_almost_done;
1343  break;
1344  case LF:
1345  p->minor = 9;
1346  p->buf_idx = 0;
1347  p->state = s_hdrline_start;
1348  break;
1349  case '?':
1350  /* RFC 3986 section 3.4:
1351  * The query component is indicated by the
1352  * first question mark ("?") character and
1353  * terminated by a number sign ("#") character
1354  * or by the end of the URI. */
1355  if (!p->args_offset) {
1356  res = hook_path_run(p, hooks, p->path_offset,
1357  (&p->buf[p->buf_idx] - p->path_offset));
1358 
1359  HTP_SET_BUF(ch);
1360  p->args_offset = &p->buf[p->buf_idx];
1361  break;
1362  }
1363  /* Fall through. */
1364  default:
1365  HTP_SET_BUF(ch);
1366  break;
1367  } /* switch */
1368 
1369  if (res)
1370  {
1371  p->error = htparse_error_user;
1372  return i + 1;
1373  }
1374 
1375  break;
1376 
1377  case s_http_09:
1378  log_debug("[%p] s_http_09", p);
1379 
1380  switch (ch) {
1381  case ' ':
1382  break;
1383  case CR:
1384  p->minor = 9;
1385  p->buf_idx = 0;
1386  p->state = s_almost_done;
1387  break;
1388  case LF:
1389  p->minor = 9;
1390  p->buf_idx = 0;
1391  p->state = s_hdrline_start;
1392  break;
1393  case 'H':
1394  p->buf_idx = 0;
1395  p->state = s_http_H;
1396  break;
1397  default:
1398  p->error = htparse_error_inval_proto;
1399  return i + 1;
1400  } /* switch */
1401 
1402  break;
1403  case s_http_H:
1404  log_debug("[%p] s_http_H", p);
1405 
1406  switch (ch) {
1407  case 'T':
1408  p->state = s_http_HT;
1409  break;
1410  default:
1411  p->error = htparse_error_inval_proto;
1412  return i + 1;
1413  }
1414  break;
1415  case s_http_HT:
1416  switch (ch) {
1417  case 'T':
1418  p->state = s_http_HTT;
1419  break;
1420  default:
1421  p->error = htparse_error_inval_proto;
1422  return i + 1;
1423  }
1424  break;
1425  case s_http_HTT:
1426  switch (ch) {
1427  case 'P':
1428  p->state = s_http_HTTP;
1429  break;
1430  default:
1431  p->error = htparse_error_inval_proto;
1432  return i + 1;
1433  }
1434  break;
1435  case s_http_HTTP:
1436  switch (ch) {
1437  case '/':
1439  break;
1440  default:
1441  p->error = htparse_error_inval_proto;
1442  return i + 1;
1443  }
1444  break;
1445  case s_first_major_digit:
1446  if (ch < '1' || ch > '9')
1447  {
1448  p->error = htparse_error_inval_ver;
1449  return i + 1;
1450  }
1451 
1452  p->major = ch - '0';
1453  p->state = s_major_digit;
1454  break;
1455  case s_major_digit:
1456  if (ch == '.')
1457  {
1459  break;
1460  }
1461 
1462  if (ch < '0' || ch > '9')
1463  {
1464  p->error = htparse_error_inval_ver;
1465  return i + 1;
1466  }
1467 
1468  p->major = p->major * 10 + ch - '0';
1469  break;
1470  case s_first_minor_digit:
1471  if (ch < '0' || ch > '9')
1472  {
1473  p->error = htparse_error_inval_ver;
1474  return i + 1;
1475  }
1476 
1477  p->minor = ch - '0';
1478  p->state = s_minor_digit;
1479  break;
1480  case s_minor_digit:
1481  switch (ch) {
1482  case ' ':
1483  if (evhtp_likely(p->type == htp_type_request))
1484  {
1486  } else if (p->type == htp_type_response)
1487  {
1488  p->state = s_status;
1489  }
1490 
1491  break;
1492  case CR:
1493  p->state = s_almost_done;
1494  break;
1495  case LF:
1496  /* LF without a CR? error.... */
1497  p->error = htparse_error_inval_reqline;
1498  log_debug("[s_minor_digit] LF without CR!");
1499  log_htparser__s_(p);
1500 
1501  return i + 1;
1502  default:
1503  if (ch < '0' || ch > '9')
1504  {
1505  p->error = htparse_error_inval_ver;
1506  return i + 1;
1507  }
1508 
1509  p->minor = p->minor * 10 + ch - '0';
1510  break;
1511  } /* switch */
1512  break;
1513  case s_status:
1514  /* http response status code */
1515  if (ch == ' ')
1516  {
1517  if (p->status)
1518  {
1519  p->state = s_status_text;
1520  }
1521  break;
1522  }
1523 
1524  if (ch < '0' || ch > '9')
1525  {
1526  p->error = htparse_error_status;
1527  return i + 1;
1528  }
1529 
1530  p->status = p->status * 10 + ch - '0';
1531 
1532  if (++p->status_count == 3)
1533  {
1535  }
1536 
1537  break;
1538  case s_space_after_status:
1539  switch (ch) {
1540  case ' ':
1541  p->state = s_status_text;
1542  break;
1543  case CR:
1544  p->state = s_almost_done;
1545  break;
1546  case LF:
1547  p->state = s_hdrline_start;
1548  break;
1549  default:
1550  p->error = htparse_error_generic;
1551  return i + 1;
1552  }
1553  break;
1554  case s_status_text:
1555  switch (ch) {
1556  case CR:
1557  p->state = s_almost_done;
1558  break;
1559  case LF:
1560  p->state = s_hdrline_start;
1561  break;
1562  default:
1563  break;
1564  }
1565  break;
1566  case s_spaces_after_digit:
1567  switch (ch) {
1568  case ' ':
1569  break;
1570  case CR:
1571  p->state = s_almost_done;
1572  break;
1573  case LF:
1574  p->state = s_hdrline_start;
1575  break;
1576  default:
1577  p->error = htparse_error_inval_ver;
1578  return i + 1;
1579  }
1580  break;
1581 
1582  case s_almost_done:
1583  switch (ch) {
1584  case LF:
1585  if (p->type == htp_type_response && p->status >= 100 && p->status < 200)
1586  {
1587  res = hook_on_hdrs_begin_run(p, hooks);
1588 
1589  if (res)
1590  {
1591  p->error = htparse_error_user;
1592  return i + 1;
1593  }
1594 
1595  p->status = 0;
1596  p->status_count = 0;
1597  p->state = s_start;
1598  break;
1599  }
1600 
1601  p->state = s_done;
1602  res = hook_on_hdrs_begin_run(p, hooks);
1603  if (res)
1604  {
1605  p->error = htparse_error_user;
1606  return i + 1;
1607  }
1608  break;
1609  default:
1610  p->error = htparse_error_inval_reqline;
1611  log_htparser__s_(p);
1612 
1613  return i + 1;
1614  } /* switch */
1615  break;
1616  case s_done:
1617  switch (ch) {
1618  case CR:
1620  break;
1621  case LF:
1622  return i + 1;
1623  default:
1624  goto hdrline_start;
1625  }
1626  break;
1627 hdrline_start:
1628  case s_hdrline_start:
1629  log_debug("[%p] s_hdrline_start", p);
1630 
1631  p->buf_idx = 0;
1632 
1633  switch (ch) {
1634  case CR:
1636  break;
1637  case LF:
1639  break;
1640  default:
1641  HTP_SET_BUF(ch);
1642 
1643  p->state = s_hdrline_hdr_key;
1644  break;
1645  }
1646 
1647  break;
1648  case s_hdrline_hdr_key:
1649  log_debug("[%p] s_hdrline_hdr_key", p);
1650 
1651  do {
1652  if (evhtp_unlikely(ch == ':'))
1653  {
1654  res = hook_hdr_key_run(p, hooks, p->buf, p->buf_idx);
1655 
1656  /* figure out if the value of this header is valueable */
1657  p->heval = eval_hdr_val_none;
1658 
1659  switch (p->buf_idx + 1) {
1660  case 5:
1661  if (!strcasecmp(p->buf, "host"))
1662  {
1664  }
1665  break;
1666  case 11:
1667  if (!strcasecmp(p->buf, "connection"))
1668  {
1670  }
1671  break;
1672  case 13:
1673  if (!strcasecmp(p->buf, "content-type"))
1674  {
1676  }
1677  break;
1678  case 15:
1679  if (!strcasecmp(p->buf, "content-length"))
1680  {
1682  }
1683  break;
1684  case 17:
1685  if (!strcasecmp(p->buf, "proxy-connection"))
1686  {
1688  }
1689  break;
1690  case 18:
1691  if (!strcasecmp(p->buf, "transfer-encoding"))
1692  {
1694  }
1695  break;
1696  } /* switch */
1697 
1698  p->buf_idx = 0;
1700 
1701  if (res)
1702  {
1703  p->error = htparse_error_user;
1704  return i + 1;
1705  }
1706 
1707  break;
1708  }
1709 
1710  switch (ch) {
1711  case CR:
1713  break;
1714  case LF:
1716  break;
1717  default:
1718  HTP_SET_BUF(ch);
1719  break;
1720  }
1721 
1722  if (p->state != s_hdrline_hdr_key)
1723  {
1724  break;
1725  }
1726 
1727  ch = data[++i];
1728  } while (i < len);
1729 
1730  break;
1731 
1733  log_debug("[%p] s_hdrline_hdr_space_before_val", p);
1734 
1735  switch (ch) {
1736  case ' ':
1737  break;
1738  case CR:
1739  /*
1740  * we have an empty header value here, so we set the buf
1741  * to empty, set the state to hdrline_hdr_val, and
1742  * decrement the start byte counter.
1743  */
1744  HTP_SET_BUF(' ');
1745  p->state = s_hdrline_hdr_val;
1746 
1747  /*
1748  * make sure the next pass comes back to this CR byte,
1749  * so it matches in s_hdrline_hdr_val.
1750  */
1751  i--;
1752  break;
1753  case LF:
1754  /* never got a CR for an empty header, this is an
1755  * invalid state.
1756  */
1757  p->error = htparse_error_inval_hdr;
1758  return i + 1;
1759  default:
1760  HTP_SET_BUF(ch);
1761  p->state = s_hdrline_hdr_val;
1762  break;
1763  } /* switch */
1764  break;
1765  case s_hdrline_hdr_val:
1766  err = 0;
1767 
1768  do {
1769  log_debug("[%p] s_hdrline_hdr_val", p);
1770  if (ch == CR)
1771  {
1772  switch (p->heval) {
1773  case eval_hdr_val_none:
1774  break;
1775  case eval_hdr_val_hostname:
1776  if (hook_hostname_run(p, hooks, p->buf, p->buf_idx))
1777  {
1779  p->error = htparse_error_user;
1780  return i + 1;
1781  }
1782 
1783  break;
1785  p->content_len = str_to_uint64(p->buf, p->buf_idx, &err);
1786  p->orig_content_len = p->content_len;
1787 
1788  log_debug("[%p] s_hdrline_hdr_val content-lenth = %zu", p, p->content_len);
1789 
1790  if (err == 1)
1791  {
1792  p->error = htparse_error_too_big;
1793  return i + 1;
1794  }
1795 
1796  break;
1798  switch (p->buf[0]) {
1799  char A_case;
1800  char C_case;
1801  const char * S_buf;
1802 
1803  case 'K':
1804  case 'k':
1805  if (p->buf_idx != 10)
1806  {
1807  break;
1808  }
1809 
1810  A_case = (p->buf[5] == 'A') ? 'A' : 'a';
1811  S_buf = (const char *)(p->buf + 1);
1812 
1813  if (_str9cmp(S_buf,
1814  'e', 'e', 'p', '-', A_case, 'l', 'i', 'v', 'e'))
1815  {
1817  }
1818  break;
1819  case 'c':
1820  case 'C':
1821  if (p->buf_idx != 5)
1822  {
1823  break;
1824  }
1825 
1826  C_case = (p->buf[0] == 'C') ? 'C' : 'c';
1827  S_buf = (const char *)p->buf;
1828 
1829  if (_str5cmp(S_buf, C_case, 'l', 'o', 's', 'e'))
1830  {
1832  }
1833  break;
1834  } /* switch */
1835  break;
1837  if (p->buf_idx != 7)
1838  {
1839  break;
1840  }
1841 
1842  switch (p->buf[0]) {
1843  const char * S_buf;
1844 
1845  case 'c':
1846  case 'C':
1847  if (p->buf_idx != 7)
1848  {
1849  break;
1850  }
1851 
1852  S_buf = (const char *)(p->buf + 1);
1853 
1854  if (_str6cmp(S_buf, 'h', 'u', 'n', 'k', 'e', 'd'))
1855  {
1856  p->flags |= parser_flag_chunked;
1857  }
1858 
1859  break;
1860  }
1861 
1862  break;
1864  if (p->buf_idx != 9)
1865  {
1866  break;
1867  }
1868 
1869  switch (p->buf[0]) {
1870  const char * S_buf;
1871 
1872  case 'm':
1873  case 'M':
1874  S_buf = (const char *)(p->buf + 1);
1875 
1876  if (_str8cmp(S_buf, 'u', 'l', 't', 'i', 'p', 'a', 'r', 't'))
1877  {
1878  p->multipart = 1;
1879  }
1880 
1881  break;
1882  }
1883 
1884  break;
1886  default:
1887  break;
1888  } /* switch */
1889 
1891 
1892  break;
1893  }
1894 
1895  switch (ch) {
1896  case LF:
1897  /* LF before CR? invalid */
1898  p->error = htparse_error_inval_hdr;
1899  return i + 1;
1900  default:
1901  HTP_SET_BUF(ch);
1902  break;
1903  } /* switch */
1904 
1905  if (p->state != s_hdrline_hdr_val)
1906  {
1907  break;
1908  }
1909 
1910  ch = data[++i];
1911  } while (i < len);
1912 
1913  break;
1915  log_debug("[%p] s_hdrline_hdr_almost_done", p);
1916 
1917  res = 0;
1918  switch (ch) {
1919  case LF:
1920  if (p->flags & parser_flag_trailing)
1921  {
1922  res = hook_on_msg_complete_run(p, hooks);
1923  p->state = s_start;
1924  break;
1925  }
1926 
1928  break;
1929  default:
1930  p->error = htparse_error_inval_hdr;
1931  return i + 1;
1932  }
1933 
1934  if (res)
1935  {
1936  p->error = htparse_error_user;
1937  return i + 1;
1938  }
1939 
1940  break;
1941  case s_hdrline_hdr_done:
1942  log_debug("[%p] s_hdrline_hdr_done", p);
1943 
1944  switch (ch) {
1945  case CR:
1946  res = hook_hdr_val_run(p, hooks, p->buf, p->buf_idx);
1948 
1949  if (res)
1950  {
1951  p->error = htparse_error_user;
1952  return i + 1;
1953  }
1954 
1955  break;
1956  case LF:
1957  /* got LFLF? is this valid? */
1958  p->error = htparse_error_inval_hdr;
1959 
1960  return i + 1;
1961  case '\t':
1962  /* this is a multiline header value, we must go back to
1963  * reading as a header value */
1964  p->state = s_hdrline_hdr_val;
1965  break;
1966  default:
1967  res = hook_hdr_val_run(p, hooks, p->buf, p->buf_idx);
1968  p->buf_idx = 0;
1969 
1970  HTP_SET_BUF(ch);
1971 
1972  p->state = s_hdrline_hdr_key;
1973 
1974  if (res) {
1975  p->error = htparse_error_user;
1976  return i + 1;
1977  }
1978 
1979  break;
1980  } /* switch */
1981  break;
1982  case s_hdrline_almost_done:
1983  log_debug("[%p] s_hdrline_almost_done", p);
1984 
1985  switch (ch) {
1986  case LF:
1987  res = hook_on_hdrs_complete_run(p, hooks);
1988 
1989  if (res != 0)
1990  {
1991  p->error = htparse_error_user;
1992  return i + 1;
1993  }
1994 
1995  p->buf_idx = 0;
1996 
1997  if (p->flags & parser_flag_trailing)
1998  {
1999  res = hook_on_msg_complete_run(p, hooks);
2000  p->state = s_start;
2001  } else if (p->flags & parser_flag_chunked)
2002  {
2004  } else if (p->content_len > 0)
2005  {
2006  p->state = s_body_read;
2007  } else if (p->content_len == 0)
2008  {
2009  res = hook_on_msg_complete_run(p, hooks);
2010  p->state = s_start;
2011  } else {
2012  p->state = s_hdrline_done;
2013  }
2014 
2015  if (res != 0)
2016  {
2017  p->error = htparse_error_user;
2018  return i + 1;
2019  }
2020  break;
2021 
2022  default:
2023  p->error = htparse_error_inval_hdr;
2024  return i + 1;
2025  } /* switch */
2026 
2027  if (res != 0)
2028  {
2029  p->error = htparse_error_user;
2030  return i + 1;
2031  }
2032 
2033  break;
2034  case s_hdrline_done:
2035  log_debug("[%p] s_hdrline_done", p);
2036 
2037  res = 0;
2038 
2039  if (p->flags & parser_flag_trailing)
2040  {
2041  res = hook_on_msg_complete_run(p, hooks);
2042  p->state = s_start;
2043  } else if (p->flags & parser_flag_chunked)
2044  {
2046  i--;
2047  } else if (p->content_len > 0)
2048  {
2049  p->state = s_body_read;
2050  i--;
2051  } else if (p->content_len == 0)
2052  {
2053  res = hook_on_msg_complete_run(p, hooks);
2054  p->state = s_start;
2055  }
2056 
2057  if (res)
2058  {
2059  p->error = htparse_error_user;
2060  return i + 1;
2061  }
2062 
2063  break;
2064  case s_chunk_size_start:
2065  c = unhex[(unsigned char)ch];
2066 
2067  if (c == -1)
2068  {
2069  p->error = htparse_error_inval_chunk_sz;
2070  return i + 1;
2071  }
2072 
2073  p->content_len = c;
2074  p->state = s_chunk_size;
2075  break;
2076  case s_chunk_size:
2077  if (ch == CR)
2078  {
2080  break;
2081  }
2082 
2083  c = unhex[(unsigned char)ch];
2084 
2085  if (c == -1)
2086  {
2087  p->error = htparse_error_inval_chunk_sz;
2088  return i + 1;
2089  }
2090 
2091  p->content_len *= 16;
2092  p->content_len += c;
2093  break;
2094 
2096  if (ch != LF)
2097  {
2098  p->error = htparse_error_inval_chunk_sz;
2099  return i + 1;
2100  }
2101 
2102  p->orig_content_len = p->content_len;
2103 
2104  if (p->content_len == 0)
2105  {
2106  res = hook_on_chunks_complete_run(p, hooks);
2107 
2109  p->state = s_hdrline_start;
2110  } else {
2111  res = hook_on_new_chunk_run(p, hooks);
2112 
2113  p->state = s_chunk_data;
2114  }
2115 
2116  if (res)
2117  {
2118  p->error = htparse_error_user;
2119  return i + 1;
2120  }
2121 
2122  break;
2123 
2124  case s_chunk_data:
2125  res = 0;
2126  {
2127  const char * pp = &data[i];
2128  const char * pe = (const char *)(data + len);
2129  size_t to_read = _MIN_READ(pe - pp, p->content_len);
2130 
2131  if (to_read > 0)
2132  {
2133  res = hook_body_run(p, hooks, pp, to_read);
2134 
2135  i += to_read - 1;
2136  }
2137 
2138  if (to_read == p->content_len)
2139  {
2141  }
2142 
2143  p->content_len -= to_read;
2144  }
2145 
2146  if (res)
2147  {
2148  p->error = htparse_error_user;
2149  return i + 1;
2150  }
2151 
2152  break;
2153 
2155  if (ch != CR)
2156  {
2157  p->error = htparse_error_inval_chunk;
2158  return i + 1;
2159  }
2160 
2161  p->state = s_chunk_data_done;
2162  break;
2163 
2164  case s_chunk_data_done:
2165  if (ch != LF)
2166  {
2167  p->error = htparse_error_inval_chunk;
2168  return i + 1;
2169  }
2170 
2171  p->orig_content_len = 0;
2173 
2174  if (hook_on_chunk_complete_run(p, hooks))
2175  {
2176  p->error = htparse_error_user;
2177  return i + 1;
2178  }
2179 
2180  break;
2181 
2182  case s_body_read:
2183  res = 0;
2184 
2185  {
2186  const char * pp = &data[i];
2187  const char * pe = (const char *)(data + len);
2188  size_t to_read = _MIN_READ(pe - pp, p->content_len);
2189 
2190  if (to_read > 0)
2191  {
2192  res = hook_body_run(p, hooks, pp, to_read);
2193 
2194  i += to_read - 1;
2195  p->content_len -= to_read;
2196  }
2197 
2198  if (p->content_len == 0)
2199  {
2200  res = hook_on_msg_complete_run(p, hooks);
2201  p->state = s_start;
2202  }
2203 
2204  if (res)
2205  {
2206  p->error = htparse_error_user;
2207  return i + 1;
2208  }
2209  }
2210 
2211  break;
2212 
2213  default:
2214  log_debug("[%p] This is a silly state....", p);
2215  p->error = htparse_error_inval_state;
2216  return i + 1;
2217  } /* switch */
2218 
2219  /* If we successfully completed a request/response we return
2220  * to caller, and leave it up to him to call us again if
2221  * parsing should continue. */
2222  if (p->state == s_start)
2223  {
2224  return i + 1;
2225  }
2226  } /* switch */
2227 
2228  return i;
2229 } /* htparser_run */
htp_type type
Definition: parser.c:93
uint64_t content_len
Definition: parser.c:100
Definition: parser.c:61
#define HTP_SET_BUF(CH)
Definition: parser.c:706
unsigned char multipart
Definition: parser.c:97
htp_scheme htparser_get_scheme(htparser *p)
Definition: parser.c:469
#define CR
Definition: parser.c:16
unsigned int status
Definition: parser.c:104
void * args
Definition: thread.c:119
uint64_t orig_content_len
Definition: parser.c:101
static int8_t unhex[256]
Definition: parser.c:192
char * args_offset
Definition: parser.c:111
char buf[PARSER_STACK_MAX]
Definition: parser.c:117
unsigned char major
Definition: parser.c:98
htpparse_error error
Definition: parser.c:88
static const char * errstr_map[]
Definition: parser.c:203
#define __HTPARSE_GENHOOK(__n)
Definition: parser.c:303
static int is_host_char(unsigned char ch)
Definition: parser.c:582
#define _str4cmp(m, c0, c1, c2, c3)
Definition: parser.c:248
static uint32_t usual[]
Definition: parser.c:181
#define _MIN_READ(a, b)
Definition: parser.c:238
htp_method method
Definition: parser.c:95
const char * htparser_get_methodstr_m(htp_method meth)
Definition: parser.c:481
unsigned char htparser_get_multipart(htparser *p)
Definition: parser.c:522
char * port_offset
Definition: parser.c:109
Definition: parser.c:37
void htparser_set_minor(htparser *p, unsigned char minor)
Definition: parser.c:504
#define __HTPARSE_GENDHOOK(__n)
Definition: parser.c:314
#define _str9cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8)
Definition: parser.c:267
uint64_t htparser_get_total_bytes_read(htparser *p)
Definition: parser.c:558
parser_state state
Definition: parser.c:89
void * userdata
Definition: parser.c:113
htp_method htparser_get_method(htparser *p)
Definition: parser.c:475
uint64_t htparser_get_bytes_read(htparser *p)
Definition: parser.c:552
#define _str8cmp(m, c0, c1, c2, c3, c4, c5, c6, c7)
Definition: parser.c:263
#define _str6cmp(m, c0, c1, c2, c3, c4, c5)
Definition: parser.c:255
size_t htparser_run(htparser *p, htparse_hooks *hooks, const char *data, size_t len)
Definition: parser.c:718
Definition: parser.c:43
const char * htparser_get_strerror(htparser *p)
Definition: parser.c:427
#define _str7_cmp(m, c0, c1, c2, c3, c4, c5, c6, c7)
Definition: parser.c:259
#define _str3Ocmp(m, c0, c1, c2, c3)
Definition: parser.c:245
size_t buf_idx
Definition: parser.c:115
#define _str5cmp(m, c0, c1, c2, c3, c4)
Definition: parser.c:251
eval_hdr_val heval
Definition: parser.c:91
parser_state
Definition: parser.c:36
static const char * method_strmap[]
Definition: parser.c:219
uint64_t bytes_read
Definition: parser.c:102
#define PARSER_STACK_MAX
Definition: parser.c:14
#define log_htparser__s_(p)
Definition: parser.c:178
char * host_offset
Definition: parser.c:108
void htparser_init(htparser *p, htp_type type)
Definition: parser.c:564
uint64_t total_bytes_read
Definition: parser.c:103
const char * htparser_get_methodstr(htparser *p)
Definition: parser.c:492
parser_flags flags
Definition: parser.c:90
unsigned char htparser_get_major(htparser *p)
Definition: parser.c:510
char * scheme_offset
Definition: parser.c:107
static htp_method get_method(const char *m, const size_t sz)
Definition: parser.c:600
unsigned int status_count
Definition: parser.c:105
Definition: parser.c:46
void htparser_set_userdata(htparser *p, void *ud)
Definition: parser.c:534
static uint64_t str_to_uint64(char *str, size_t n, int *err)
Definition: parser.c:349
parser_flags
Definition: parser.c:29
void * htparser_get_userdata(htparser *p)
Definition: parser.c:528
static ssize_t _str_to_ssize_t(char *str, size_t n)
Definition: parser.c:391
htp_scheme scheme
Definition: parser.c:94
htpparse_error htparser_get_error(htparser *p)
Definition: parser.c:421
uint64_t htparser_get_content_pending(htparser *p)
Definition: parser.c:540
Definition: parser.c:49
htparser * htparser_new(void)
Definition: parser.c:576
unsigned int htparser_get_status(htparser *p)
Definition: parser.c:440
uint64_t htparser_get_content_length(htparser *p)
Definition: parser.c:546
eval_hdr_val
Definition: parser.c:19
unsigned char minor
Definition: parser.c:99
unsigned char htparser_get_minor(htparser *p)
Definition: parser.c:516
#define log_debug(M,...)
Definition: log.h:25
#define evhtp_unlikely(x)
Definition: internal.h:18
#define _str3_cmp(m, c0, c1, c2, c3)
Definition: parser.c:242
int htparser_should_keep_alive(htparser *p)
Definition: parser.c:446
#define LF
Definition: parser.c:15
void htparser_set_major(htparser *p, unsigned char major)
Definition: parser.c:498
#define evhtp_likely(x)
Definition: internal.h:17
char * path_offset
Definition: parser.c:110