Libevhtp  1.2.13
parser.c
Go to the documentation of this file.
1 #include <stdlib.h>
2 #include <stddef.h>
3 #include <ctype.h>
4 #include <string.h>
5 
6 #ifdef EVHTP_HAS_SYS_TYPES
7 #include <sys/types.h>
8 #endif
9 
10 #include "internal.h"
11 #include "evhtp/parser.h"
12 #include "evhtp/config.h"
13 
14 #if '\n' != '\x0a' || 'A' != 65
15 #error "You have somehow found a non-ASCII host. We can't build here."
16 #endif
17 
18 #define PARSER_STACK_MAX 8192
19 #define LF (unsigned char)10
20 #define CR (unsigned char)13
21 #define CRLF "\x0d\x0a"
22 
31 };
32 
34  parser_flag_chunked = (1 << 0),
38 };
39 
41  s_start = 0,
84 };
85 
89 
90 
91 struct htparser {
92  htpparse_error error;
96 
97  htp_type type;
98  htp_scheme scheme;
99  htp_method method;
100 
101  unsigned char multipart;
102  unsigned char major;
103  unsigned char minor;
104  uint64_t content_len; /* this gets decremented as data passes through */
105  uint64_t orig_content_len; /* this contains the original length of the body */
106  uint64_t bytes_read;
108  unsigned int status; /* only for responses */
109  unsigned int status_count; /* only for responses */
110 
112  char * host_offset;
113  char * port_offset;
114  char * path_offset;
115  char * args_offset;
116 
117  void * userdata;
118 
119  size_t buf_idx;
120  /* Must be last since htparser_init memsets up to the offset of this buffer */
122 };
123 
124 #ifdef EVHTP_DEBUG
125 static void
126 log_htparser__s_(struct htparser * p)
127 {
128  log_debug(
129  "struct htparser {\n"
130  " htpparse_error = %d\n"
131  " parser_state = %d\n"
132  " parser_flags = %d\n"
133  " eval_hdr_val = %d\n"
134  " htp_type = %d\n"
135  " htp_scheme = %d\n"
136  " htp_method = %d\n"
137  " multipart = %c\n"
138  " major = %c\n"
139  " minor = %c\n"
140  " content_len = %zu\n"
141  " orig_clen = %zu\n"
142  " bytes_read = %zu\n"
143  " total_read = %zu\n"
144  " status = %d\n"
145  " status_count = %d\n"
146  " scheme_offset = %s\n"
147  " host_offset = %s\n"
148  " port_offset = %s\n"
149  " path_offset = %s\n"
150  " args_offset = %s\n"
151  " userdata = %p\n"
152  " buf_idx = %zu\n"
153  " buf = %s\n"
154  "};",
155  p->error,
156  p->state,
157  p->flags,
158  p->heval,
159  p->type,
160  p->scheme,
161  p->method,
162  p->multipart,
163  p->major,
164  p->minor,
165  p->content_len,
166  p->orig_content_len,
167  p->bytes_read,
168  p->total_bytes_read,
169  p->status,
170  p->status_count,
171  p->scheme_offset,
172  p->host_offset,
173  p->port_offset,
174  p->path_offset,
175  p->args_offset,
176  p->userdata,
177  p->buf_idx,
178  p->buf);
179 } /* log_htparser__s_ */
180 
181 #else
182 #define log_htparser__s_(p)
183 #endif
184 
185 static uint32_t usual[] = {
186  0xffffdbfe,
187  0x7fff37d6,
188  0xffffffff,
189  0xffffffff,
190  0xffffffff,
191  0xffffffff,
192  0xffffffff,
193  0xffffffff
194 };
195 
196 static int8_t unhex[256] = {
197  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
198  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
199  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
200  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
201  -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
202  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
203  -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
204  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
205 };
206 
207 static const char * errstr_map[] = {
208  "htparse_error_none",
209  "htparse_error_too_big",
210  "htparse_error_invalid_method",
211  "htparse_error_invalid_requestline",
212  "htparse_error_invalid_schema",
213  "htparse_error_invalid_protocol",
214  "htparse_error_invalid_version",
215  "htparse_error_invalid_header",
216  "htparse_error_invalid_chunk_size",
217  "htparse_error_invalid_chunk",
218  "htparse_error_invalid_state",
219  "htparse_error_user",
220  "htparse_error_status",
221  "htparse_error_unknown"
222 };
223 
224 static const char * method_strmap[] = {
225  "GET",
226  "HEAD",
227  "POST",
228  "PUT",
229  "DELETE",
230  "MKCOL",
231  "COPY",
232  "MOVE",
233  "OPTIONS",
234  "PROPFIND",
235  "PROPATCH",
236  "LOCK",
237  "UNLOCK",
238  "TRACE",
239  "CONNECT",
240  "PATCH",
241 };
242 
243 #define _MIN_READ(a, b) ((a) < (b) ? (a) : (b))
244 
245 #ifndef HOST_BIG_ENDIAN
246 /* Little-endian cmp macros */
247 #define _str3_cmp(m, c0, c1, c2, c3) \
248  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0)
249 
250 #define _str3Ocmp(m, c0, c1, c2, c3) \
251  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0)
252 
253 #define _str4cmp(m, c0, c1, c2, c3) \
254  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0)
255 
256 #define _str5cmp(m, c0, c1, c2, c3, c4) \
257  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
258  && m[4] == c4
259 
260 #define _str6cmp(m, c0, c1, c2, c3, c4, c5) \
261  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
262  && (((uint32_t *)m)[1] & 0xffff) == ((c5 << 8) | c4)
263 
264 #define _str7_cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \
265  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
266  && ((uint32_t *)m)[1] == ((c7 << 24) | (c6 << 16) | (c5 << 8) | c4)
267 
268 #define _str8cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \
269  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
270  && ((uint32_t *)m)[1] == ((c7 << 24) | (c6 << 16) | (c5 << 8) | c4)
271 
272 #define _str9cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8) \
273  *(uint32_t *)m == ((c3 << 24) | (c2 << 16) | (c1 << 8) | c0) \
274  && ((uint32_t *)m)[1] == ((c7 << 24) | (c6 << 16) | (c5 << 8) | c4) \
275  && m[8] == c8
276 #else
277 /* Big endian cmp macros */
278 #define _str3_cmp(m, c0, c1, c2, c3) \
279  m[0] == c0 && m[1] == c1 && m[2] == c2
280 
281 #define _str3Ocmp(m, c0, c1, c2, c3) \
282  m[0] == c0 && m[2] == c2 && m[3] == c3
283 
284 #define _str4cmp(m, c0, c1, c2, c3) \
285  m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3
286 
287 #define _str5cmp(m, c0, c1, c2, c3, c4) \
288  m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 && m[4] == c4
289 
290 #define _str6cmp(m, c0, c1, c2, c3, c4, c5) \
291  m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 \
292  && m[4] == c4 && m[5] == c5
293 
294 #define _str7_cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \
295  m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 \
296  && m[4] == c4 && m[5] == c5 && m[6] == c6
297 
298 #define _str8cmp(m, c0, c1, c2, c3, c4, c5, c6, c7) \
299  m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 \
300  && m[4] == c4 && m[5] == c5 && m[6] == c6 && m[7] == c7
301 
302 #define _str9cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8) \
303  m[0] == c0 && m[1] == c1 && m[2] == c2 && m[3] == c3 \
304  && m[4] == c4 && m[5] == c5 && m[6] == c6 && m[7] == c7 && m[8] == c8
305 
306 #endif
307 
308 #define __HTPARSE_GENHOOK(__n) \
309  static inline int hook_ ## __n ## _run(htparser * p, htparse_hooks * hooks) { \
310  log_debug("enter"); \
311  if (hooks && (hooks)->__n) \
312  { \
313  return (hooks)->__n(p); \
314  } \
315  \
316  return 0; \
317  }
318 
319 #define __HTPARSE_GENDHOOK(__n) \
320  static inline int hook_ ## __n ## _run(htparser * p, \
321  htparse_hooks * hooks, \
322  const char * s, size_t l) { \
323  log_debug("enter"); \
324  if (hooks && (hooks)->__n) \
325  { \
326  return (hooks)->__n(p, s, l); \
327  } \
328  \
329  return 0; \
330  }
331 
332 __HTPARSE_GENHOOK(on_msg_begin)
333 __HTPARSE_GENHOOK(on_hdrs_begin)
334 __HTPARSE_GENHOOK(on_hdrs_complete)
335 __HTPARSE_GENHOOK(on_new_chunk)
336 __HTPARSE_GENHOOK(on_chunk_complete)
337 __HTPARSE_GENHOOK(on_chunks_complete)
338 __HTPARSE_GENHOOK(on_msg_complete)
339 
340 __HTPARSE_GENDHOOK(method)
341 __HTPARSE_GENDHOOK(scheme)
342 __HTPARSE_GENDHOOK(host)
343 __HTPARSE_GENDHOOK(port)
344 __HTPARSE_GENDHOOK(path)
347 __HTPARSE_GENDHOOK(hdr_key)
348 __HTPARSE_GENDHOOK(hdr_val)
349 __HTPARSE_GENDHOOK(body)
350 __HTPARSE_GENDHOOK(hostname)
351 
352 
353 static inline uint64_t
354 str_to_uint64(char * str, size_t n, int * err)
355 {
356  uint64_t value;
357 
358  /* Trim whitespace after value. */
359  while (n && isblank(str[n - 1]))
360  {
361  n--;
362  }
363 
364  if (n > 20)
365  {
366  /* 18446744073709551615 is 20 bytes */
367  *err = 1;
368  return 0;
369  }
370 
371  for (value = 0; n--; str++)
372  {
373  uint64_t check;
374 
375  if (*str < '0' || *str > '9')
376  {
377  *err = 1;
378  return 0;
379  }
380 
381  check = value * 10 + (*str - '0');
382 
383  if ((value && check <= value))
384  {
385  *err = 1;
386  return 0;
387  }
388 
389  value = check;
390  }
391 
392  return value;
393 }
394 
395 static inline ssize_t
396 _str_to_ssize_t(char * str, size_t n)
397 {
398  ssize_t value;
399 
400  if (n == 0)
401  {
402  return -1;
403  }
404 
405  for (value = 0; n--; str++)
406  {
407  if (*str < '0' || *str > '9')
408  {
409  return -1;
410  }
411 
412  value = value * 10 + (*str - '0');
413 
414 #if 0
415  if (value > INTMAX_MAX)
416  {
417  return -1;
418  }
419 #endif
420  }
421 
422  return value;
423 }
424 
425 htpparse_error
427 {
428  return p->error;
429 }
430 
431 const char *
433 {
434  htpparse_error e = htparser_get_error(p);
435 
436  if (e > htparse_error_generic)
437  {
438  return "htparse_no_such_error";
439  }
440 
441  return errstr_map[e];
442 }
443 
444 unsigned int
446 {
447  return p->status;
448 }
449 
450 int
452 {
453  if (p->major > 0 && p->minor > 0)
454  {
456  {
457  return 0;
458  } else {
459  return 1;
460  }
461  } else {
463  {
464  return 1;
465  } else {
466  return 0;
467  }
468  }
469 
470  return 0;
471 }
472 
473 htp_scheme
475 {
476  return p->scheme;
477 }
478 
479 htp_method
481 {
482  return p->method;
483 }
484 
485 const char *
486 htparser_get_methodstr_m(htp_method meth)
487 {
488  if (meth >= htp_method_UNKNOWN)
489  {
490  return NULL;
491  }
492 
493  return method_strmap[meth];
494 }
495 
496 const char *
498 {
499  return htparser_get_methodstr_m(p->method);
500 }
501 
502 void
503 htparser_set_major(htparser * p, unsigned char major)
504 {
505  p->major = major;
506 }
507 
508 void
509 htparser_set_minor(htparser * p, unsigned char minor)
510 {
511  p->minor = minor;
512 }
513 
514 unsigned char
516 {
517  return p->major;
518 }
519 
520 unsigned char
522 {
523  return p->minor;
524 }
525 
526 unsigned char
528 {
529  return p->multipart;
530 }
531 
532 void *
534 {
535  return p->userdata;
536 }
537 
538 void
540 {
541  p->userdata = ud;
542 }
543 
544 uint64_t
546 {
547  return p->content_len;
548 }
549 
550 uint64_t
552 {
553  return p->orig_content_len;
554 }
555 
556 uint64_t
558 {
559  return p->bytes_read;
560 }
561 
562 uint64_t
564 {
565  return p->total_bytes_read;
566 }
567 
568 void
569 htparser_init(htparser * p, htp_type type)
570 {
571  /* Do not memset entire string buffer. */
572  memset(p, 0, offsetof(htparser, buf));
573  p->buf[0] = '\0';
574  p->state = s_start;
575  p->error = htparse_error_none;
576  p->method = htp_method_UNKNOWN;
577  p->type = type;
578 }
579 
580 htparser *
582 {
583  return malloc(sizeof(htparser));
584 }
585 
586 static int
587 is_host_char(unsigned char ch)
588 {
589  char c = (unsigned char)(ch | 0x20);
590 
591  if (c >= 'a' && c <= 'z')
592  {
593  return 1;
594  }
595 
596  if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-')
597  {
598  return 1;
599  }
600 
601  return 0;
602 }
603 
604 static htp_method
605 get_method(const char * m, const size_t sz)
606 {
607  switch (sz) {
608  case 3:
609  if (_str3_cmp(m, 'G', 'E', 'T', '\0'))
610  {
611  return htp_method_GET;
612  }
613 
614  if (_str3_cmp(m, 'P', 'U', 'T', '\0'))
615  {
616  return htp_method_PUT;
617  }
618 
619  break;
620  case 4:
621  if (m[1] == 'O')
622  {
623  if (_str3Ocmp(m, 'P', 'O', 'S', 'T'))
624  {
625  return htp_method_POST;
626  }
627 
628  if (_str3Ocmp(m, 'C', 'O', 'P', 'Y'))
629  {
630  return htp_method_COPY;
631  }
632 
633  if (_str3Ocmp(m, 'M', 'O', 'V', 'E'))
634  {
635  return htp_method_MOVE;
636  }
637 
638  if (_str3Ocmp(m, 'L', 'O', 'C', 'K'))
639  {
640  return htp_method_LOCK;
641  }
642  } else {
643  if (_str4cmp(m, 'H', 'E', 'A', 'D'))
644  {
645  return htp_method_HEAD;
646  }
647  }
648 
649  break;
650  case 5:
651  if (_str5cmp(m, 'M', 'K', 'C', 'O', 'L'))
652  {
653  return htp_method_MKCOL;
654  }
655 
656  if (_str5cmp(m, 'T', 'R', 'A', 'C', 'E'))
657  {
658  return htp_method_TRACE;
659  }
660 
661  if (_str5cmp(m, 'P', 'A', 'T', 'C', 'H'))
662  {
663  return htp_method_PATCH;
664  }
665 
666  break;
667  case 6:
668  if (_str6cmp(m, 'D', 'E', 'L', 'E', 'T', 'E'))
669  {
670  return htp_method_DELETE;
671  }
672 
673  if (_str6cmp(m, 'U', 'N', 'L', 'O', 'C', 'K'))
674  {
675  return htp_method_UNLOCK;
676  }
677 
678  break;
679  case 7:
680  if (_str7_cmp(m, 'O', 'P', 'T', 'I', 'O', 'N', 'S', '\0'))
681  {
682  return htp_method_OPTIONS;
683  }
684 
685  if (_str7_cmp(m, 'C', 'O', 'N', 'N', 'E', 'C', 'T', '\0'))
686  {
687  return htp_method_CONNECT;
688  }
689 
690  break;
691  case 8:
692  if (_str8cmp(m, 'P', 'R', 'O', 'P', 'F', 'I', 'N', 'D'))
693  {
694  return htp_method_PROPFIND;
695  }
696 
697  break;
698 
699  case 9:
700  if (_str9cmp(m, 'P', 'R', 'O', 'P', 'P', 'A', 'T', 'C', 'H'))
701  {
702  return htp_method_PROPPATCH;
703  }
704 
705  break;
706  } /* switch */
707 
708  return htp_method_UNKNOWN;
709 } /* get_method */
710 
711 #define HTP_SET_BUF(CH) do { \
712  if (evhtp_likely((p->buf_idx + 1) < PARSER_STACK_MAX)) { \
713  p->buf[p->buf_idx++] = CH; \
714  p->buf[p->buf_idx] = '\0'; \
715  } else { \
716  p->error = htparse_error_too_big; \
717  return i + 1; \
718  } \
719 } while (0)
720 
721 
722 size_t
723 htparser_run(htparser * p, htparse_hooks * hooks, const char * data, size_t len)
724 {
725  unsigned char ch;
726  char c;
727  size_t i;
728 
729  log_debug("enter");
730  log_debug("p == %p", p);
731 
732  p->error = htparse_error_none;
733  p->bytes_read = 0;
734 
735  for (i = 0; i < len; i++)
736  {
737  int res;
738  int err;
739 
740  ch = data[i];
741 
742  log_debug("[%p] data[%zu] = %c (%x)", p, i, isprint(ch) ? ch : ' ', ch);
743 
744  p->total_bytes_read += 1;
745  p->bytes_read += 1;
746 
747  switch (p->state) {
748  case s_start:
749  log_debug("[%p] s_start", p);
750 
751  if (ch == CR || ch == LF)
752  {
753  break;
754  }
755 
756  if ((ch < 'A' || ch > 'Z') && ch != '_')
757  {
758  p->error = htparse_error_inval_reqline;
759 
760  log_debug("s_start invalid fist char '%c'", ch);
761  log_htparser__s_(p);
762 
763  return i + 1;
764  }
765 
766 
767  p->flags = 0;
768  p->error = htparse_error_none;
769  p->method = htp_method_UNKNOWN;
770  p->multipart = 0;
771  p->major = 0;
772  p->minor = 0;
773  p->content_len = 0;
774  p->orig_content_len = 0;
775  p->status = 0;
776  p->status_count = 0;
777  p->scheme_offset = NULL;
778  p->host_offset = NULL;
779  p->port_offset = NULL;
780  p->path_offset = NULL;
781  p->args_offset = NULL;
782 
783 
784  res = hook_on_msg_begin_run(p, hooks);
785 
786  HTP_SET_BUF(ch);
787 
788  if (evhtp_likely(p->type == htp_type_request)) {
789  p->state = s_method;
790  } else if (p->type == htp_type_response && ch == 'H') {
791  p->state = s_http_H;
792  } else {
793  log_debug("not type of request or response?");
794  log_htparser__s_(p);
795 
796  p->error = htparse_error_inval_reqline;
797  return i + 1;
798  }
799 
800  if (res)
801  {
802  p->error = htparse_error_user;
803  return i + 1;
804  }
805 
806  break;
807 
808  case s_method:
809  log_debug("[%p] s_method", p);
810 
811  do {
812  if (ch == ' ')
813  {
814  p->method = get_method(p->buf, p->buf_idx);
815  res = hook_method_run(p, hooks, p->buf, p->buf_idx);
816 
817  p->buf_idx = 0;
819 
820  if (res)
821  {
822  p->error = htparse_error_user;
823  return i + 1;
824  }
825 
826  break;
827  } else {
828  if ((ch < 'A' || ch > 'Z') && ch != '_')
829  {
830  p->error = htparse_error_inval_method;
831  return i + 1;
832  }
833 
834  HTP_SET_BUF(ch);
835  }
836 
837  ch = data[++i];
838  } while (i < len);
839 
840  break;
841  case s_spaces_before_uri:
842  log_debug("[%p] s_spaces_before_uri", p);
843 
844  /* CONNECT is special - RFC 2817 section 5.2:
845  * The Request-URI portion of the Request-Line is
846  * always an 'authority' as defined by URI Generic
847  * Syntax [2], which is to say the host name and port
848  * number destination of the requested connection
849  * separated by a colon
850  */
851  if (p->method == htp_method_CONNECT)
852  {
853  switch (ch) {
854  case ' ':
855  break;
856  case '[':
857  /* Literal IPv6 address start. */
858  HTP_SET_BUF(ch);
859 
860  p->host_offset = &p->buf[p->buf_idx];
861  p->state = s_host_ipv6;
862  break;
863  default:
864  if (!is_host_char(ch))
865  {
866  p->error = htparse_error_inval_reqline;
867  log_htparser__s_(p);
868 
869  return i + 1;
870  }
871 
872  p->host_offset = &p->buf[p->buf_idx];
873 
874  HTP_SET_BUF(ch);
875 
876  p->state = s_host;
877  break;
878  } /* switch */
879 
880  break;
881  }
882 
883  switch (ch) {
884  case ' ':
885  break;
886  case '/':
887  p->path_offset = &p->buf[p->buf_idx];
888 
889  HTP_SET_BUF(ch);
890 
892  break;
893  default:
894  c = (unsigned char)(ch | 0x20);
895 
896  if (c >= 'a' && c <= 'z') {
897  p->scheme_offset = &p->buf[p->buf_idx];
898 
899  HTP_SET_BUF(ch);
900 
901  p->state = s_schema;
902  break;
903  }
904 
905  p->error = htparse_error_inval_reqline;
906  log_htparser__s_(p);
907 
908  return i + 1;
909  } /* switch */
910 
911  break;
912  case s_schema:
913  log_debug("[%p] s_schema", p);
914 
915  c = (unsigned char)(ch | 0x20);
916 
917  if (c >= 'a' && c <= 'z') {
918  HTP_SET_BUF(ch);
919  break;
920  }
921 
922  switch (ch) {
923  case ':':
924  p->scheme = htp_scheme_unknown;
925 
926  switch (p->buf_idx) {
927  case 3:
928  if (_str3_cmp(p->scheme_offset, 'f', 't', 'p', '\0'))
929  {
930  p->scheme = htp_scheme_ftp;
931  break;
932  }
933 
934  if (_str3_cmp(p->scheme_offset, 'n', 'f', 's', '\0'))
935  {
936  p->scheme = htp_scheme_nfs;
937  break;
938  }
939 
940  break;
941  case 4:
942  if (_str4cmp(p->scheme_offset, 'h', 't', 't', 'p'))
943  {
944  p->scheme = htp_scheme_http;
945  break;
946  }
947  break;
948  case 5:
949  if (_str5cmp(p->scheme_offset, 'h', 't', 't', 'p', 's'))
950  {
951  p->scheme = htp_scheme_https;
952  break;
953  }
954  break;
955  } /* switch */
956 
957  res = hook_scheme_run(p, hooks,
958  p->scheme_offset,
959  (&p->buf[p->buf_idx] - p->scheme_offset));
960 
961  HTP_SET_BUF(ch);
962 
963  p->state = s_schema_slash;
964 
965  if (res) {
966  p->error = htparse_error_user;
967  return i + 1;
968  }
969 
970  break;
971  default:
972  p->error = htparse_error_inval_schema;
973  return i + 1;
974  } /* switch */
975 
976  break;
977  case s_schema_slash:
978  log_debug("[%p] s_schema_slash", p);
979 
980  switch (ch) {
981  case '/':
982  HTP_SET_BUF(ch);
983 
985  break;
986  default:
987  p->error = htparse_error_inval_schema;
988  return i + 1;
989  }
990  break;
992  log_debug("[%p] s_schema_slash_slash", p);
993 
994  switch (ch) {
995  case '/':
996  HTP_SET_BUF(ch);
997  p->host_offset = &p->buf[p->buf_idx];
998 
999  p->state = s_host;
1000  break;
1001  default:
1002  p->error = htparse_error_inval_schema;
1003  return i + 1;
1004  }
1005  break;
1006  case s_host:
1007  if (ch == '[') {
1008  /* Literal IPv6 address start. */
1009  HTP_SET_BUF(ch);
1010  p->host_offset = &p->buf[p->buf_idx];
1011 
1012  p->state = s_host_ipv6;
1013  break;
1014  }
1015 
1016  if (is_host_char(ch)) {
1017  HTP_SET_BUF(ch);
1018  break;
1019  }
1020 
1021  res = hook_host_run(p, hooks,
1022  p->host_offset,
1023  (&p->buf[p->buf_idx] - p->host_offset));
1024 
1025  if (res)
1026  {
1027  p->error = htparse_error_user;
1028  return i + 1;
1029  }
1030 
1031  /* successfully parsed a NON-IPV6 hostname, knowing this, the
1032  * current character in 'ch' is actually the next state, so we
1033  * we fall through to avoid another loop.
1034  */
1035  case s_host_done:
1036  res = 0;
1037 
1038  switch (ch) {
1039  case ':':
1040  HTP_SET_BUF(ch);
1041 
1042  p->port_offset = &p->buf[p->buf_idx];
1043  p->state = s_port;
1044  break;
1045  case ' ':
1046  /* this technically should never happen, but we should
1047  * check anyway
1048  */
1049  if (i == 0)
1050  {
1051  p->error = htparse_error_inval_state;
1052  return i + 1;
1053  }
1054 
1055  i--;
1056  ch = '/';
1057  /* to accept requests like <method> <proto>://<host> <ver>
1058  * we fallthrough to the next case.
1059  */
1060  case '/':
1061  p->path_offset = &p->buf[p->buf_idx];
1062 
1063  HTP_SET_BUF(ch);
1064 
1066  break;
1067  default:
1068  p->error = htparse_error_inval_schema;
1069  return i + 1;
1070  } /* switch */
1071 
1072  if (res)
1073  {
1074  p->error = htparse_error_user;
1075  return i + 1;
1076  }
1077 
1078  break;
1079  case s_host_ipv6:
1080  c = (unsigned char)(ch | 0x20);
1081 
1082  if ((c >= 'a' && c <= 'f')
1083  || (ch >= '0' && ch <= '9')
1084  || ch == ':'
1085  || ch == '.') {
1086  HTP_SET_BUF(ch);
1087  break;
1088  }
1089 
1090  switch (ch) {
1091  case ']':
1092  res = hook_host_run(p, hooks, p->host_offset,
1093  (&p->buf[p->buf_idx] - p->host_offset));
1094  if (res) {
1095  p->error = htparse_error_user;
1096  return i + 1;
1097  }
1098 
1099  HTP_SET_BUF(ch);
1100 
1101  p->state = s_host_done;
1102  break;
1103  default:
1104  p->error = htparse_error_inval_schema;
1105  return i + 1;
1106  } /* switch */
1107  break;
1108  case s_port:
1109  if (ch >= '0' && ch <= '9') {
1110  HTP_SET_BUF(ch);
1111  break;
1112  }
1113 
1114  res = hook_port_run(p, hooks, p->port_offset,
1115  (&p->buf[p->buf_idx] - p->port_offset));
1116 
1117  switch (ch) {
1118  case ' ':
1119  /* this technically should never happen, but we should
1120  * check anyway
1121  */
1122  if (i == 0)
1123  {
1124  p->error = htparse_error_inval_state;
1125  return i + 1;
1126  }
1127 
1128  i--;
1129  ch = '/';
1130  /* to accept requests like <method> <proto>://<host> <ver>
1131  * we fallthrough to the next case.
1132  */
1133  case '/':
1134  HTP_SET_BUF(ch);
1135  p->path_offset = &p->buf[p->buf_idx - 1];
1136 
1138  break;
1139  default:
1140  p->error = htparse_error_inval_reqline;
1141  log_debug("[s_port] inval_reqline");
1142  log_htparser__s_(p);
1143 
1144  return i + 1;
1145  } /* switch */
1146 
1147  if (res)
1148  {
1149  p->error = htparse_error_user;
1150  return i + 1;
1151  }
1152 
1153  break;
1154  case s_after_slash_in_uri:
1155  log_debug("[%p] s_after_slash_in_uri", p);
1156 
1157  res = 0;
1158 
1159  if (usual[ch >> 5] & (1 << (ch & 0x1f)))
1160  {
1161  HTP_SET_BUF(ch);
1162 
1163  p->state = s_check_uri;
1164  break;
1165  }
1166 
1167  switch (ch) {
1168  case ' ':
1169  {
1170  int r1 = hook_path_run(p, hooks, p->path_offset,
1171  (&p->buf[p->buf_idx] - p->path_offset));
1172  int r2 = hook_uri_run(p, hooks, p->buf, p->buf_idx);
1173 
1174  p->state = s_http_09;
1175  p->buf_idx = 0;
1176 
1177  if (r1 || r2)
1178  {
1179  res = 1;
1180  }
1181  }
1182 
1183  break;
1184  case CR:
1185  p->minor = 9;
1186  p->state = s_almost_done;
1187  break;
1188  case LF:
1189  p->minor = 9;
1190  p->state = s_hdrline_start;
1191  break;
1192  case '.':
1193  case '%':
1194  case '/':
1195  case '#':
1196  HTP_SET_BUF(ch);
1197  p->state = s_uri;
1198  break;
1199  case '?':
1200  res = hook_path_run(p, hooks, p->path_offset,
1201  (&p->buf[p->buf_idx] - p->path_offset));
1202 
1203  HTP_SET_BUF(ch);
1204 
1205  p->args_offset = &p->buf[p->buf_idx];
1206  p->state = s_uri;
1207 
1208  break;
1209  default:
1210  HTP_SET_BUF(ch);
1211 
1212  p->state = s_check_uri;
1213  break;
1214  } /* switch */
1215 
1216  if (res)
1217  {
1218  p->error = htparse_error_user;
1219  return i + 1;
1220  }
1221 
1222  break;
1223 
1224  case s_check_uri:
1225 
1226  res = 0;
1227 
1228  do {
1229  log_debug("[%p] s_check_uri", p);
1230 
1231  if (usual[ch >> 5] & (1 << (ch & 0x1f))) {
1232  HTP_SET_BUF(ch);
1233  } else {
1234  break;
1235  }
1236 
1237  ch = data[++i];
1238  } while (i < len);
1239 
1240  switch (ch) {
1241  case ' ':
1242  {
1243  int r1 = 0;
1244  int r2 = 0;
1245 
1246  if (p->args_offset)
1247  {
1248  r1 = hook_args_run(p, hooks, p->args_offset,
1249  (&p->buf[p->buf_idx] - p->args_offset));
1250  } else {
1251  r1 = hook_path_run(p, hooks, p->path_offset,
1252  (&p->buf[p->buf_idx] - p->path_offset));
1253  }
1254 
1255  r2 = hook_uri_run(p, hooks, p->buf, p->buf_idx);
1256  p->buf_idx = 0;
1257  p->state = s_http_09;
1258 
1259  if (r1 || r2)
1260  {
1261  res = 1;
1262  }
1263  }
1264  break;
1265  case '/':
1266  HTP_SET_BUF(ch);
1267 
1269  break;
1270  case CR:
1271  p->minor = 9;
1272  p->buf_idx = 0;
1273  p->state = s_almost_done;
1274  break;
1275  case LF:
1276  p->minor = 9;
1277  p->buf_idx = 0;
1278 
1279  p->state = s_hdrline_start;
1280  break;
1281  case '?':
1282  res = hook_path_run(p, hooks,
1283  p->path_offset,
1284  (&p->buf[p->buf_idx] - p->path_offset));
1285  HTP_SET_BUF(ch);
1286 
1287  p->args_offset = &p->buf[p->buf_idx];
1288  p->state = s_uri;
1289  break;
1290  default:
1291  HTP_SET_BUF(ch);
1292 
1293  p->state = s_uri;
1294  break;
1295  } /* switch */
1296 
1297  if (res)
1298  {
1299  p->error = htparse_error_user;
1300  return i + 1;
1301  }
1302 
1303  break;
1304 
1305  case s_uri:
1306  log_debug("[%p] s_uri", p);
1307 
1308  res = 0;
1309 
1310  do {
1311  if (usual[ch >> 5] & (1 << (ch & 0x1f))) {
1312  HTP_SET_BUF(ch);
1313  } else {
1314  break;
1315  }
1316 
1317  ch = data[++i];
1318  } while (i < len);
1319 
1320  switch (ch) {
1321  case ' ':
1322  {
1323  int r1 = 0;
1324  int r2 = 0;
1325 
1326  if (p->args_offset)
1327  {
1328  r1 = hook_args_run(p, hooks, p->args_offset,
1329  (&p->buf[p->buf_idx] - p->args_offset));
1330  } else {
1331  r1 = hook_path_run(p, hooks, p->path_offset,
1332  (&p->buf[p->buf_idx] - p->path_offset));
1333  }
1334 
1335  p->buf_idx = 0;
1336  p->state = s_http_09;
1337 
1338  if (r1 || r2)
1339  {
1340  res = 1;
1341  }
1342  }
1343  break;
1344  case CR:
1345  p->minor = 9;
1346  p->buf_idx = 0;
1347  p->state = s_almost_done;
1348  break;
1349  case LF:
1350  p->minor = 9;
1351  p->buf_idx = 0;
1352  p->state = s_hdrline_start;
1353  break;
1354  case '?':
1355  /* RFC 3986 section 3.4:
1356  * The query component is indicated by the
1357  * first question mark ("?") character and
1358  * terminated by a number sign ("#") character
1359  * or by the end of the URI. */
1360  if (!p->args_offset) {
1361  res = hook_path_run(p, hooks, p->path_offset,
1362  (&p->buf[p->buf_idx] - p->path_offset));
1363 
1364  HTP_SET_BUF(ch);
1365  p->args_offset = &p->buf[p->buf_idx];
1366  break;
1367  }
1368  /* Fall through. */
1369  default:
1370  HTP_SET_BUF(ch);
1371  break;
1372  } /* switch */
1373 
1374  if (res)
1375  {
1376  p->error = htparse_error_user;
1377  return i + 1;
1378  }
1379 
1380  break;
1381 
1382  case s_http_09:
1383  log_debug("[%p] s_http_09", p);
1384 
1385  switch (ch) {
1386  case ' ':
1387  break;
1388  case CR:
1389  p->minor = 9;
1390  p->buf_idx = 0;
1391  p->state = s_almost_done;
1392  break;
1393  case LF:
1394  p->minor = 9;
1395  p->buf_idx = 0;
1396  p->state = s_hdrline_start;
1397  break;
1398  case 'H':
1399  p->buf_idx = 0;
1400  p->state = s_http_H;
1401  break;
1402  default:
1403  p->error = htparse_error_inval_proto;
1404  return i + 1;
1405  } /* switch */
1406 
1407  break;
1408  case s_http_H:
1409  log_debug("[%p] s_http_H", p);
1410 
1411  switch (ch) {
1412  case 'T':
1413  p->state = s_http_HT;
1414  break;
1415  default:
1416  p->error = htparse_error_inval_proto;
1417  return i + 1;
1418  }
1419  break;
1420  case s_http_HT:
1421  switch (ch) {
1422  case 'T':
1423  p->state = s_http_HTT;
1424  break;
1425  default:
1426  p->error = htparse_error_inval_proto;
1427  return i + 1;
1428  }
1429  break;
1430  case s_http_HTT:
1431  switch (ch) {
1432  case 'P':
1433  p->state = s_http_HTTP;
1434  break;
1435  default:
1436  p->error = htparse_error_inval_proto;
1437  return i + 1;
1438  }
1439  break;
1440  case s_http_HTTP:
1441  switch (ch) {
1442  case '/':
1444  break;
1445  default:
1446  p->error = htparse_error_inval_proto;
1447  return i + 1;
1448  }
1449  break;
1450  case s_first_major_digit:
1451  if (ch < '1' || ch > '9')
1452  {
1453  p->error = htparse_error_inval_ver;
1454  return i + 1;
1455  }
1456 
1457  p->major = ch - '0';
1458  p->state = s_major_digit;
1459  break;
1460  case s_major_digit:
1461  if (ch == '.')
1462  {
1464  break;
1465  }
1466 
1467  if (ch < '0' || ch > '9')
1468  {
1469  p->error = htparse_error_inval_ver;
1470  return i + 1;
1471  }
1472 
1473  p->major = p->major * 10 + ch - '0';
1474  break;
1475  case s_first_minor_digit:
1476  if (ch < '0' || ch > '9')
1477  {
1478  p->error = htparse_error_inval_ver;
1479  return i + 1;
1480  }
1481 
1482  p->minor = ch - '0';
1483  p->state = s_minor_digit;
1484  break;
1485  case s_minor_digit:
1486  switch (ch) {
1487  case ' ':
1488  if (evhtp_likely(p->type == htp_type_request))
1489  {
1491  } else if (p->type == htp_type_response)
1492  {
1493  p->state = s_status;
1494  }
1495 
1496  break;
1497  case CR:
1498  p->state = s_almost_done;
1499  break;
1500  case LF:
1501  /* LF without a CR? error.... */
1502  p->error = htparse_error_inval_reqline;
1503  log_debug("[s_minor_digit] LF without CR!");
1504  log_htparser__s_(p);
1505 
1506  return i + 1;
1507  default:
1508  if (ch < '0' || ch > '9')
1509  {
1510  p->error = htparse_error_inval_ver;
1511  return i + 1;
1512  }
1513 
1514  p->minor = p->minor * 10 + ch - '0';
1515  break;
1516  } /* switch */
1517  break;
1518  case s_status:
1519  /* http response status code */
1520  if (ch == ' ')
1521  {
1522  if (p->status)
1523  {
1524  p->state = s_status_text;
1525  }
1526  break;
1527  }
1528 
1529  if (ch < '0' || ch > '9')
1530  {
1531  p->error = htparse_error_status;
1532  return i + 1;
1533  }
1534 
1535  p->status = p->status * 10 + ch - '0';
1536 
1537  if (++p->status_count == 3)
1538  {
1540  }
1541 
1542  break;
1543  case s_space_after_status:
1544  switch (ch) {
1545  case ' ':
1546  p->state = s_status_text;
1547  break;
1548  case CR:
1549  p->state = s_almost_done;
1550  break;
1551  case LF:
1552  p->state = s_hdrline_start;
1553  break;
1554  default:
1555  p->error = htparse_error_generic;
1556  return i + 1;
1557  }
1558  break;
1559  case s_status_text:
1560  switch (ch) {
1561  case CR:
1562  p->state = s_almost_done;
1563  break;
1564  case LF:
1565  p->state = s_hdrline_start;
1566  break;
1567  default:
1568  break;
1569  }
1570  break;
1571  case s_spaces_after_digit:
1572  switch (ch) {
1573  case ' ':
1574  break;
1575  case CR:
1576  p->state = s_almost_done;
1577  break;
1578  case LF:
1579  p->state = s_hdrline_start;
1580  break;
1581  default:
1582  p->error = htparse_error_inval_ver;
1583  return i + 1;
1584  }
1585  break;
1586 
1587  case s_almost_done:
1588  switch (ch) {
1589  case LF:
1590  if (p->type == htp_type_response && p->status >= 100 && p->status < 200)
1591  {
1592  res = hook_on_hdrs_begin_run(p, hooks);
1593 
1594  if (res)
1595  {
1596  p->error = htparse_error_user;
1597  return i + 1;
1598  }
1599 
1600  p->status = 0;
1601  p->status_count = 0;
1602  p->state = s_start;
1603  break;
1604  }
1605 
1606  p->state = s_done;
1607  res = hook_on_hdrs_begin_run(p, hooks);
1608  if (res)
1609  {
1610  p->error = htparse_error_user;
1611  return i + 1;
1612  }
1613  break;
1614  default:
1615  p->error = htparse_error_inval_reqline;
1616  log_htparser__s_(p);
1617 
1618  return i + 1;
1619  } /* switch */
1620  break;
1621  case s_done:
1622  switch (ch) {
1623  case CR:
1625  break;
1626  case LF:
1627  return i + 1;
1628  default:
1629  goto hdrline_start;
1630  }
1631  break;
1632 hdrline_start:
1633  case s_hdrline_start:
1634  log_debug("[%p] s_hdrline_start", p);
1635 
1636  p->buf_idx = 0;
1637 
1638  switch (ch) {
1639  case CR:
1641  break;
1642  case LF:
1644  break;
1645  default:
1646  HTP_SET_BUF(ch);
1647 
1648  p->state = s_hdrline_hdr_key;
1649  break;
1650  }
1651 
1652  break;
1653  case s_hdrline_hdr_key:
1654  log_debug("[%p] s_hdrline_hdr_key", p);
1655 
1656  do {
1657  if (evhtp_unlikely(ch == ':'))
1658  {
1659  res = hook_hdr_key_run(p, hooks, p->buf, p->buf_idx);
1660 
1661  /* figure out if the value of this header is valueable */
1662  p->heval = eval_hdr_val_none;
1663 
1664  switch (p->buf_idx + 1) {
1665  case 5:
1666  if (!strcasecmp(p->buf, "host"))
1667  {
1669  }
1670  break;
1671  case 11:
1672  if (!strcasecmp(p->buf, "connection"))
1673  {
1675  }
1676  break;
1677  case 13:
1678  if (!strcasecmp(p->buf, "content-type"))
1679  {
1681  }
1682  break;
1683  case 15:
1684  if (!strcasecmp(p->buf, "content-length"))
1685  {
1687  }
1688  break;
1689  case 17:
1690  if (!strcasecmp(p->buf, "proxy-connection"))
1691  {
1693  }
1694  break;
1695  case 18:
1696  if (!strcasecmp(p->buf, "transfer-encoding"))
1697  {
1699  }
1700  break;
1701  } /* switch */
1702 
1703  p->buf_idx = 0;
1705 
1706  if (res)
1707  {
1708  p->error = htparse_error_user;
1709  return i + 1;
1710  }
1711 
1712  break;
1713  }
1714 
1715  switch (ch) {
1716  case CR:
1718  break;
1719  case LF:
1721  break;
1722  default:
1723  HTP_SET_BUF(ch);
1724  break;
1725  }
1726 
1727  if (p->state != s_hdrline_hdr_key)
1728  {
1729  break;
1730  }
1731 
1732  ch = data[++i];
1733  } while (i < len);
1734 
1735  break;
1736 
1738  log_debug("[%p] s_hdrline_hdr_space_before_val", p);
1739 
1740  switch (ch) {
1741  case ' ':
1742  break;
1743  case CR:
1744  /*
1745  * we have an empty header value here, so we set the buf
1746  * to empty, set the state to hdrline_hdr_val, and
1747  * decrement the start byte counter.
1748  */
1749  HTP_SET_BUF(' ');
1750  p->state = s_hdrline_hdr_val;
1751 
1752  /*
1753  * make sure the next pass comes back to this CR byte,
1754  * so it matches in s_hdrline_hdr_val.
1755  */
1756  i--;
1757  break;
1758  case LF:
1759  /* never got a CR for an empty header, this is an
1760  * invalid state.
1761  */
1762  p->error = htparse_error_inval_hdr;
1763  return i + 1;
1764  default:
1765  HTP_SET_BUF(ch);
1766  p->state = s_hdrline_hdr_val;
1767  break;
1768  } /* switch */
1769  break;
1770  case s_hdrline_hdr_val:
1771  err = 0;
1772 
1773  do {
1774  log_debug("[%p] s_hdrline_hdr_val", p);
1775  if (ch == CR)
1776  {
1777  switch (p->heval) {
1778  case eval_hdr_val_none:
1779  break;
1780  case eval_hdr_val_hostname:
1781  if (hook_hostname_run(p, hooks, p->buf, p->buf_idx))
1782  {
1784  p->error = htparse_error_user;
1785  return i + 1;
1786  }
1787 
1788  break;
1790  p->content_len = str_to_uint64(p->buf, p->buf_idx, &err);
1791  p->orig_content_len = p->content_len;
1792 
1793  log_debug("[%p] s_hdrline_hdr_val content-lenth = %zu", p, p->content_len);
1794 
1795  if (err == 1)
1796  {
1797  p->error = htparse_error_too_big;
1798  return i + 1;
1799  }
1800 
1801  break;
1803  switch (p->buf[0]) {
1804  char A_case;
1805  char C_case;
1806  const char * S_buf;
1807 
1808  case 'K':
1809  case 'k':
1810  if (p->buf_idx != 10)
1811  {
1812  break;
1813  }
1814 
1815  A_case = (p->buf[5] == 'A') ? 'A' : 'a';
1816  S_buf = (const char *)(p->buf + 1);
1817 
1818  if (_str9cmp(S_buf,
1819  'e', 'e', 'p', '-', A_case, 'l', 'i', 'v', 'e'))
1820  {
1822  }
1823  break;
1824  case 'c':
1825  case 'C':
1826  if (p->buf_idx != 5)
1827  {
1828  break;
1829  }
1830 
1831  C_case = (p->buf[0] == 'C') ? 'C' : 'c';
1832  S_buf = (const char *)p->buf;
1833 
1834  if (_str5cmp(S_buf, C_case, 'l', 'o', 's', 'e'))
1835  {
1837  }
1838  break;
1839  } /* switch */
1840  break;
1842  if (p->buf_idx != 7)
1843  {
1844  break;
1845  }
1846 
1847  switch (p->buf[0]) {
1848  const char * S_buf;
1849 
1850  case 'c':
1851  case 'C':
1852  if (p->buf_idx != 7)
1853  {
1854  break;
1855  }
1856 
1857  S_buf = (const char *)(p->buf + 1);
1858 
1859  if (_str6cmp(S_buf, 'h', 'u', 'n', 'k', 'e', 'd'))
1860  {
1861  p->flags |= parser_flag_chunked;
1862  }
1863 
1864  break;
1865  }
1866 
1867  break;
1869  if (p->buf_idx != 9)
1870  {
1871  break;
1872  }
1873 
1874  switch (p->buf[0]) {
1875  const char * S_buf;
1876 
1877  case 'm':
1878  case 'M':
1879  S_buf = (const char *)(p->buf + 1);
1880 
1881  if (_str8cmp(S_buf, 'u', 'l', 't', 'i', 'p', 'a', 'r', 't'))
1882  {
1883  p->multipart = 1;
1884  }
1885 
1886  break;
1887  }
1888 
1889  break;
1891  default:
1892  break;
1893  } /* switch */
1894 
1896 
1897  break;
1898  }
1899 
1900  switch (ch) {
1901  case LF:
1902  /* LF before CR? invalid */
1903  p->error = htparse_error_inval_hdr;
1904  return i + 1;
1905  default:
1906  HTP_SET_BUF(ch);
1907  break;
1908  } /* switch */
1909 
1910  if (p->state != s_hdrline_hdr_val)
1911  {
1912  break;
1913  }
1914 
1915  ch = data[++i];
1916  } while (i < len);
1917 
1918  break;
1920  log_debug("[%p] s_hdrline_hdr_almost_done", p);
1921 
1922  res = 0;
1923  switch (ch) {
1924  case LF:
1925  if (p->flags & parser_flag_trailing)
1926  {
1927  res = hook_on_msg_complete_run(p, hooks);
1928  p->state = s_start;
1929  break;
1930  }
1931 
1933  break;
1934  default:
1935  p->error = htparse_error_inval_hdr;
1936  return i + 1;
1937  }
1938 
1939  if (res)
1940  {
1941  p->error = htparse_error_user;
1942  return i + 1;
1943  }
1944 
1945  break;
1946  case s_hdrline_hdr_done:
1947  log_debug("[%p] s_hdrline_hdr_done", p);
1948 
1949  switch (ch) {
1950  case CR:
1951  res = hook_hdr_val_run(p, hooks, p->buf, p->buf_idx);
1953 
1954  if (res)
1955  {
1956  p->error = htparse_error_user;
1957  return i + 1;
1958  }
1959 
1960  break;
1961  case LF:
1962  /* got LFLF? is this valid? */
1963  p->error = htparse_error_inval_hdr;
1964 
1965  return i + 1;
1966  case '\t':
1967  /* this is a multiline header value, we must go back to
1968  * reading as a header value */
1969  p->state = s_hdrline_hdr_val;
1970  break;
1971  default:
1972  res = hook_hdr_val_run(p, hooks, p->buf, p->buf_idx);
1973  p->buf_idx = 0;
1974 
1975  HTP_SET_BUF(ch);
1976 
1977  p->state = s_hdrline_hdr_key;
1978 
1979  if (res) {
1980  p->error = htparse_error_user;
1981  return i + 1;
1982  }
1983 
1984  break;
1985  } /* switch */
1986  break;
1987  case s_hdrline_almost_done:
1988  log_debug("[%p] s_hdrline_almost_done", p);
1989 
1990  switch (ch) {
1991  case LF:
1992  res = hook_on_hdrs_complete_run(p, hooks);
1993 
1994  if (res != 0)
1995  {
1996  p->error = htparse_error_user;
1997  return i + 1;
1998  }
1999 
2000  p->buf_idx = 0;
2001 
2002  if (p->flags & parser_flag_trailing)
2003  {
2004  res = hook_on_msg_complete_run(p, hooks);
2005  p->state = s_start;
2006  } else if (p->flags & parser_flag_chunked)
2007  {
2009  } else if (p->content_len > 0)
2010  {
2011  p->state = s_body_read;
2012  } else if (p->content_len == 0)
2013  {
2014  res = hook_on_msg_complete_run(p, hooks);
2015  p->state = s_start;
2016  } else {
2017  p->state = s_hdrline_done;
2018  }
2019 
2020  if (res != 0)
2021  {
2022  p->error = htparse_error_user;
2023  return i + 1;
2024  }
2025  break;
2026 
2027  default:
2028  p->error = htparse_error_inval_hdr;
2029  return i + 1;
2030  } /* switch */
2031 
2032  if (res != 0)
2033  {
2034  p->error = htparse_error_user;
2035  return i + 1;
2036  }
2037 
2038  break;
2039  case s_hdrline_done:
2040  log_debug("[%p] s_hdrline_done", p);
2041 
2042  res = 0;
2043 
2044  if (p->flags & parser_flag_trailing)
2045  {
2046  res = hook_on_msg_complete_run(p, hooks);
2047  p->state = s_start;
2048  } else if (p->flags & parser_flag_chunked)
2049  {
2051  i--;
2052  } else if (p->content_len > 0)
2053  {
2054  p->state = s_body_read;
2055  i--;
2056  } else if (p->content_len == 0)
2057  {
2058  res = hook_on_msg_complete_run(p, hooks);
2059  p->state = s_start;
2060  }
2061 
2062  if (res)
2063  {
2064  p->error = htparse_error_user;
2065  return i + 1;
2066  }
2067 
2068  break;
2069  case s_chunk_size_start:
2070  c = unhex[(unsigned char)ch];
2071 
2072  if (c == -1)
2073  {
2074  p->error = htparse_error_inval_chunk_sz;
2075  return i + 1;
2076  }
2077 
2078  p->content_len = c;
2079  p->state = s_chunk_size;
2080  break;
2081  case s_chunk_size:
2082  if (ch == CR)
2083  {
2085  break;
2086  }
2087 
2088  c = unhex[(unsigned char)ch];
2089 
2090  if (c == -1)
2091  {
2092  p->error = htparse_error_inval_chunk_sz;
2093  return i + 1;
2094  }
2095 
2096  p->content_len *= 16;
2097  p->content_len += c;
2098  break;
2099 
2101  if (ch != LF)
2102  {
2103  p->error = htparse_error_inval_chunk_sz;
2104  return i + 1;
2105  }
2106 
2107  p->orig_content_len = p->content_len;
2108 
2109  if (p->content_len == 0)
2110  {
2111  res = hook_on_chunks_complete_run(p, hooks);
2112 
2114  p->state = s_hdrline_start;
2115  } else {
2116  res = hook_on_new_chunk_run(p, hooks);
2117 
2118  p->state = s_chunk_data;
2119  }
2120 
2121  if (res)
2122  {
2123  p->error = htparse_error_user;
2124  return i + 1;
2125  }
2126 
2127  break;
2128 
2129  case s_chunk_data:
2130  res = 0;
2131  {
2132  const char * pp = &data[i];
2133  const char * pe = (const char *)(data + len);
2134  size_t to_read = _MIN_READ(pe - pp, p->content_len);
2135 
2136  if (to_read > 0)
2137  {
2138  res = hook_body_run(p, hooks, pp, to_read);
2139 
2140  i += to_read - 1;
2141  }
2142 
2143  if (to_read == p->content_len)
2144  {
2146  }
2147 
2148  p->content_len -= to_read;
2149  }
2150 
2151  if (res)
2152  {
2153  p->error = htparse_error_user;
2154  return i + 1;
2155  }
2156 
2157  break;
2158 
2160  if (ch != CR)
2161  {
2162  p->error = htparse_error_inval_chunk;
2163  return i + 1;
2164  }
2165 
2166  p->state = s_chunk_data_done;
2167  break;
2168 
2169  case s_chunk_data_done:
2170  if (ch != LF)
2171  {
2172  p->error = htparse_error_inval_chunk;
2173  return i + 1;
2174  }
2175 
2176  p->orig_content_len = 0;
2178 
2179  if (hook_on_chunk_complete_run(p, hooks))
2180  {
2181  p->error = htparse_error_user;
2182  return i + 1;
2183  }
2184 
2185  break;
2186 
2187  case s_body_read:
2188  res = 0;
2189 
2190  {
2191  const char * pp = &data[i];
2192  const char * pe = (const char *)(data + len);
2193  size_t to_read = _MIN_READ(pe - pp, p->content_len);
2194 
2195  if (to_read > 0) {
2196  res = hook_body_run(p, hooks, pp, to_read);
2197 
2198  i += to_read - 1;
2199  p->content_len -= to_read;
2200  }
2201 
2202  if (res) {
2203  p->error = htparse_error_user;
2204  return i + 1;
2205  }
2206 
2207  if (p->content_len == 0) {
2208  res = hook_on_msg_complete_run(p, hooks);
2209  p->state = s_start;
2210  }
2211 
2212  if (res)
2213  {
2214  p->error = htparse_error_user;
2215  return i + 1;
2216  }
2217  }
2218 
2219  break;
2220 
2221  default:
2222  log_debug("[%p] This is a silly state....", p);
2223  p->error = htparse_error_inval_state;
2224  return i + 1;
2225  } /* switch */
2226 
2227  /* If we successfully completed a request/response we return
2228  * to caller, and leave it up to him to call us again if
2229  * parsing should continue. */
2230  if (p->state == s_start)
2231  {
2232  return i + 1;
2233  }
2234  } /* switch */
2235 
2236  return i;
2237 } /* htparser_run */
htparser::major
unsigned char major
Definition: parser.c:102
s_chunk_size_almost_done
Definition: parser.c:77
log_debug
#define log_debug(M,...)
Definition: internal.h:47
htparser
Definition: parser.c:91
s_chunk_size_start
Definition: parser.c:75
s_spaces_after_digit
Definition: parser.c:63
s_host_done
Definition: parser.c:49
s_hdrline_hdr_space_before_val
Definition: parser.c:70
method_strmap
static const char * method_strmap[]
Definition: parser.c:224
eval_hdr_val
eval_hdr_val
Definition: parser.c:23
s_http_HTTP
Definition: parser.c:58
htparser_get_total_bytes_read
uint64_t htparser_get_total_bytes_read(htparser *p)
Definition: parser.c:563
_str7_cmp
#define _str7_cmp(m, c0, c1, c2, c3, c4, c5, c6, c7)
Definition: parser.c:264
htparser::flags
parser_flags flags
Definition: parser.c:94
s_http_HT
Definition: parser.c:56
s_major_digit
Definition: parser.c:60
evhtp_likely
#define evhtp_likely(x)
Definition: internal.h:17
htparser_get_content_length
uint64_t htparser_get_content_length(htparser *p)
Definition: parser.c:551
htparser_get_major
unsigned char htparser_get_major(htparser *p)
Definition: parser.c:515
_str3_cmp
#define _str3_cmp(m, c0, c1, c2, c3)
Definition: parser.c:247
_str9cmp
#define _str9cmp(m, c0, c1, c2, c3, c4, c5, c6, c7, c8)
Definition: parser.c:272
s_schema_slash
Definition: parser.c:45
eval_hdr_val_transfer_encoding
Definition: parser.c:28
s_first_minor_digit
Definition: parser.c:61
get_method
static htp_method get_method(const char *m, const size_t sz)
Definition: parser.c:605
htparser_get_method
htp_method htparser_get_method(htparser *p)
Definition: parser.c:480
htparser_new
htparser * htparser_new(void)
Definition: parser.c:581
htparser::path_offset
char * path_offset
Definition: parser.c:114
s_minor_digit
Definition: parser.c:62
s_status_text
Definition: parser.c:83
s_body_read
Definition: parser.c:74
htparser_should_keep_alive
int htparser_should_keep_alive(htparser *p)
Definition: parser.c:451
htparser::content_len
uint64_t content_len
Definition: parser.c:104
s_done
Definition: parser.c:65
htparser_get_strerror
const char * htparser_get_strerror(htparser *p)
Definition: parser.c:432
s_hdrline_hdr_done
Definition: parser.c:68
htparser::multipart
unsigned char multipart
Definition: parser.c:101
htparser::args_offset
char * args_offset
Definition: parser.c:115
htparser::minor
unsigned char minor
Definition: parser.c:103
s_http_H
Definition: parser.c:55
htparser::error
htpparse_error error
Definition: parser.c:92
eval_hdr_val_content_type
Definition: parser.c:30
log_htparser__s_
#define log_htparser__s_(p)
Definition: parser.c:182
htparser_get_minor
unsigned char htparser_get_minor(htparser *p)
Definition: parser.c:521
s_uri
Definition: parser.c:53
htparser::scheme_offset
char * scheme_offset
Definition: parser.c:111
htparser_set_userdata
void htparser_set_userdata(htparser *p, void *ud)
Definition: parser.c:539
htparser_get_userdata
void * htparser_get_userdata(htparser *p)
Definition: parser.c:533
s_method
Definition: parser.c:42
s_chunk_data_done
Definition: parser.c:80
htparser_get_error
htpparse_error htparser_get_error(htparser *p)
Definition: parser.c:426
htparser::state
parser_state state
Definition: parser.c:93
htparser::status_count
unsigned int status_count
Definition: parser.c:109
s_hdrline_hdr_val
Definition: parser.c:71
htparser_get_status
unsigned int htparser_get_status(htparser *p)
Definition: parser.c:445
htparser::scheme
htp_scheme scheme
Definition: parser.c:98
_str4cmp
#define _str4cmp(m, c0, c1, c2, c3)
Definition: parser.c:253
evhtp_unlikely
#define evhtp_unlikely(x)
Definition: internal.h:18
__HTPARSE_GENHOOK
#define __HTPARSE_GENHOOK(__n)
Definition: parser.c:308
htparser::heval
eval_hdr_val heval
Definition: parser.c:95
s_schema
Definition: parser.c:44
htparser::status
unsigned int status
Definition: parser.c:108
htparser_get_content_pending
uint64_t htparser_get_content_pending(htparser *p)
Definition: parser.c:545
htparser::orig_content_len
uint64_t orig_content_len
Definition: parser.c:105
eval_hdr_val_connection
Definition: parser.c:25
parser_flag_connection_close
Definition: parser.c:36
htparser::total_bytes_read
uint64_t total_bytes_read
Definition: parser.c:107
is_host_char
static int is_host_char(unsigned char ch)
Definition: parser.c:587
htparser::buf
char buf[PARSER_STACK_MAX]
Definition: parser.c:121
_str8cmp
#define _str8cmp(m, c0, c1, c2, c3, c4, c5, c6, c7)
Definition: parser.c:268
parser_flag_trailing
Definition: parser.c:37
usual
static uint32_t usual[]
Definition: parser.c:185
htparser::buf_idx
size_t buf_idx
Definition: parser.c:119
parser_flags
parser_flags
Definition: parser.c:33
htparser_get_multipart
unsigned char htparser_get_multipart(htparser *p)
Definition: parser.c:527
htparser_get_bytes_read
uint64_t htparser_get_bytes_read(htparser *p)
Definition: parser.c:557
eval_hdr_val_hostname
Definition: parser.c:29
htparser_set_major
void htparser_set_major(htparser *p, unsigned char major)
Definition: parser.c:503
htparser_get_methodstr_m
const char * htparser_get_methodstr_m(htp_method meth)
Definition: parser.c:486
s_host_ipv6
Definition: parser.c:48
s_almost_done
Definition: parser.c:64
eval_hdr_val_content_length
Definition: parser.c:27
HTP_SET_BUF
#define HTP_SET_BUF(CH)
Definition: parser.c:711
internal.h
_str6cmp
#define _str6cmp(m, c0, c1, c2, c3, c4, c5)
Definition: parser.c:260
s_chunk_size
Definition: parser.c:76
s_check_uri
Definition: parser.c:52
htparser_set_minor
void htparser_set_minor(htparser *p, unsigned char minor)
Definition: parser.c:509
s_first_major_digit
Definition: parser.c:59
s_hdrline_start
Definition: parser.c:66
LF
#define LF
Definition: parser.c:19
s_hdrline_hdr_almost_done
Definition: parser.c:67
s_schema_slash_slash
Definition: parser.c:46
htparser::method
htp_method method
Definition: parser.c:99
htparser::port_offset
char * port_offset
Definition: parser.c:113
parser_flag_chunked
Definition: parser.c:34
s_hdrline_hdr_key
Definition: parser.c:69
_str3Ocmp
#define _str3Ocmp(m, c0, c1, c2, c3)
Definition: parser.c:250
errstr_map
static const char * errstr_map[]
Definition: parser.c:207
s_port
Definition: parser.c:50
unhex
static int8_t unhex[256]
Definition: parser.c:196
s_status
Definition: parser.c:81
htparser_get_methodstr
const char * htparser_get_methodstr(htparser *p)
Definition: parser.c:497
s_hdrline_almost_done
Definition: parser.c:72
args
void * args
Definition: thread.c:123
s_spaces_before_uri
Definition: parser.c:43
s_host
Definition: parser.c:47
eval_hdr_val_proxy_connection
Definition: parser.c:26
parser_flag_connection_keep_alive
Definition: parser.c:35
eval_hdr_val_none
Definition: parser.c:24
CR
#define CR
Definition: parser.c:20
htparser_get_scheme
htp_scheme htparser_get_scheme(htparser *p)
Definition: parser.c:474
htparser::userdata
void * userdata
Definition: parser.c:117
s_http_09
Definition: parser.c:54
s_start
Definition: parser.c:41
htparser::host_offset
char * host_offset
Definition: parser.c:112
s_space_after_status
Definition: parser.c:82
__HTPARSE_GENDHOOK
#define __HTPARSE_GENDHOOK(__n)
Definition: parser.c:319
s_chunk_data
Definition: parser.c:78
str_to_uint64
static uint64_t str_to_uint64(char *str, size_t n, int *err)
Definition: parser.c:354
htparser::bytes_read
uint64_t bytes_read
Definition: parser.c:106
htparser::type
htp_type type
Definition: parser.c:97
s_http_HTT
Definition: parser.c:57
_str5cmp
#define _str5cmp(m, c0, c1, c2, c3, c4)
Definition: parser.c:256
_str_to_ssize_t
static ssize_t _str_to_ssize_t(char *str, size_t n)
Definition: parser.c:396
s_after_slash_in_uri
Definition: parser.c:51
_MIN_READ
#define _MIN_READ(a, b)
Definition: parser.c:243
s_hdrline_done
Definition: parser.c:73
PARSER_STACK_MAX
#define PARSER_STACK_MAX
Definition: parser.c:18
s_chunk_data_almost_done
Definition: parser.c:79
htparser_init
void htparser_init(htparser *p, htp_type type)
Definition: parser.c:569
parser_state
parser_state
Definition: parser.c:40
htparser_run
size_t htparser_run(htparser *p, htparse_hooks *hooks, const char *data, size_t len)
Definition: parser.c:723