/* * Copyright (c) 2002-2018 Dovecot authors * Copyright (c) 2015-2018 Pali * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include "dovecot-parser.h" #ifndef SIZE_MAX #define SIZE_MAX ((size_t)-1) #endif void i_panic(const char *format, ...); #ifdef DEBUG #define i_assert(expr) \ do { if (!(expr)) \ i_panic("file %s: line %d (%s): assertion failed: (%s)", \ __FILE__, \ __LINE__, \ __FUNCTION__, \ #expr); \ } while ( 0 ) #else #define i_assert(expr) #endif typedef struct { char *buf; size_t len; size_t size; } string_t; struct rfc822_parser_context { const unsigned char *data, *end; string_t *last_comment; /* Replace NUL characters with this string */ const char *nul_replacement_str; }; struct message_address_parser_context { struct rfc822_parser_context parser; struct message_address *first_addr, *last_addr, addr; string_t *str; bool fill_missing, non_strict_dots, non_strict_dots_as_invalid; }; static string_t *str_new(size_t initial_size) { char *buf; string_t *str; if (!initial_size) initial_size = 1; if (initial_size >= SIZE_MAX / 2) i_panic("str_new() failed: %s", "initial_size is too big"); buf = malloc(initial_size); if (!buf) i_panic("malloc() failed: %s", strerror(errno)); str = malloc(sizeof(string_t)); if (!str) i_panic("malloc() failed: %s", strerror(errno)); buf[0] = 0; str->buf = buf; str->len = 0; str->size = initial_size; return str; } static void str_free(string_t **str) { free((*str)->buf); free(*str); *str = NULL; } static const char *str_c(string_t *str) { return str->buf; } static char *str_ccopy(string_t *str) { char *copy; copy = malloc(str->len+1); if (!copy) i_panic("malloc() failed: %s", strerror(errno)); memcpy(copy, str->buf, str->len); copy[str->len] = 0; return copy; } static size_t str_len(const string_t *str) { return str->len; } static void str_append_data(string_t *str, const void *data, size_t len) { char *new_buf; size_t need_size; need_size = str->len + len + 1; if (len >= SIZE_MAX / 2 || need_size >= SIZE_MAX / 2) i_panic("%s() failed: %s", __FUNCTION__, "len is too big"); if (need_size > str->size) { str->size = 1; while (str->size < need_size) str->size <<= 1; new_buf = realloc(str->buf, str->size); if (!new_buf) i_panic("realloc() failed: %s", strerror(errno)); str->buf = new_buf; } memcpy(str->buf + str->len, data, len); str->len += len; str->buf[str->len] = 0; } static void str_append(string_t *str, const char *cstr) { str_append_data(str, cstr, strlen(cstr)); } static void str_append_c(string_t *str, unsigned char chr) { str_append_data(str, &chr, 1); } static void str_truncate(string_t *str, size_t len) { if (str->size - 1 <= len || str->len <= len) return; str->len = len; str->buf[len] = 0; } /* atext = ALPHA / DIGIT / ; Any character except controls, "!" / "#" / ; SP, and specials. "$" / "%" / ; Used for atoms "&" / "'" / "*" / "+" / "-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}" / "~" MIME: token := 1* tspecials := "(" / ")" / "<" / ">" / "@" / "," / ";" / ":" / "\" / <"> "/" / "[" / "]" / "?" / "=" So token is same as dot-atom, except stops also at '/', '?' and '='. */ /* atext chars are marked with 1, alpha and digits with 2, atext-but-mime-tspecials with 4 */ unsigned char rfc822_atext_chars[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0-15 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-31 */ 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 4, /* 32-47 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 4, 0, 4, /* 48-63 */ 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 64-79 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 1, 1, /* 80-95 */ 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 96-111 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0, /* 112-127 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }; #define IS_ATEXT(c) \ (rfc822_atext_chars[(int)(unsigned char)(c)] != 0) #define IS_ATEXT_NON_TSPECIAL(c) \ ((rfc822_atext_chars[(int)(unsigned char)(c)] & 3) != 0) /* qtext = %d33 / ; Printable US-ASCII %d35-91 / ; characters not including %d93-126 / ; "\" or the quote character obs-qtext obs-qtext = obs-NO-WS-CTL obs-NO-WS-CTL = %d1-8 / ; US-ASCII control %d11 / ; characters that do not %d12 / ; include the carriage %d14-31 / ; return, line feed, and %d127 ; white space characters So qtext is everything expects '\0', '\t', '\n', '\r', ' ', '"', '\\'. */ /* non-qtext characters */ #define CHAR_NEEDS_ESCAPE(c) ((c) == '"' || (c) == '\\' || (c) == '\0' || (c) == '\t' || (c) == '\n' || (c) == '\r') /* quote with "" and escape all needed characters */ static void str_append_maybe_escape(string_t *str, const char *data, size_t len, bool quote_dot) { const char *p; const char *end; if (len == 0) { str_append(str, "\"\""); return; } /* leading or trailing dot needs to be always quoted */ if (data[0] == '.' || data[len-1] == '.') quote_dot = true; end = data + len; /* see if we need to quote it */ for (p = data; p != end; p++) { if (!IS_ATEXT(*p) && (quote_dot || *p != '.')) break; } if (p == end) { str_append_data(str, data, len); return; } /* see if we need to escape it */ for (p = data; p != end; p++) { if (CHAR_NEEDS_ESCAPE(*p)) break; } if (p == end) { /* only quote */ str_append_c(str, '"'); str_append_data(str, data, len); str_append_c(str, '"'); return; } /* quote and escape */ str_append_c(str, '"'); str_append_data(str, data, (size_t) (p - data)); for (; p != end; p++) { if (CHAR_NEEDS_ESCAPE(*p)) str_append_c(str, '\\'); str_append_c(str, *p); } str_append_c(str, '"'); } /* Parse given data using RFC 822 token parser. */ static void rfc822_parser_init(struct rfc822_parser_context *ctx, const unsigned char *data, size_t size, string_t *last_comment) { memset(ctx, 0, sizeof(*ctx)); ctx->data = data; ctx->end = data + size; ctx->last_comment = last_comment; } static void rfc822_parser_deinit(struct rfc822_parser_context *ctx) { /* make sure the parsing didn't trigger a bug that caused reading past the end pointer. */ i_assert(ctx->data <= ctx->end); /* make sure the parser is no longer accessed */ ctx->data = ctx->end = NULL; } /* The functions below return 1 = more data available, 0 = no more data available (but a value might have been returned now), -1 = invalid input. LWSP is automatically skipped after value, but not before it. So typically you begin with skipping LWSP and then start using the parse functions. */ /* Parse comment. Assumes parser's data points to '(' */ static int rfc822_skip_comment(struct rfc822_parser_context *ctx) { const unsigned char *start; size_t len; int level = 1; i_assert(*ctx->data == '('); if (ctx->last_comment != NULL) str_truncate(ctx->last_comment, 0); start = ++ctx->data; for (; ctx->data < ctx->end; ctx->data++) { switch (*ctx->data) { case '\0': if (ctx->nul_replacement_str != NULL) { if (ctx->last_comment != NULL) { str_append_data(ctx->last_comment, start, ctx->data - start); str_append(ctx->last_comment, ctx->nul_replacement_str); start = ctx->data + 1; } } else { return -1; } break; case '(': level++; break; case ')': if (--level == 0) { if (ctx->last_comment != NULL) { str_append_data(ctx->last_comment, start, ctx->data - start); } ctx->data++; return ctx->data < ctx->end ? 1 : 0; } break; case '\n': /* folding whitespace, remove the (CR)LF */ if (ctx->last_comment == NULL) break; len = ctx->data - start; if (len > 0 && start[len-1] == '\r') len--; str_append_data(ctx->last_comment, start, len); start = ctx->data + 1; break; case '\\': ctx->data++; if (ctx->data >= ctx->end) return -1; #if 0 if (*ctx->data == '\r' || *ctx->data == '\n' || *ctx->data == '\0') { /* quoted-pair doesn't allow CR/LF/NUL. They are part of the obs-qp though, so don't return them as error. */ ctx->data--; break; } #endif if (ctx->last_comment != NULL) { str_append_data(ctx->last_comment, start, ctx->data - start - 1); } start = ctx->data; break; } } /* missing ')' */ return -1; } /* Skip LWSP if there is any */ static int rfc822_skip_lwsp(struct rfc822_parser_context *ctx) { for (; ctx->data < ctx->end;) { if (*ctx->data == ' ' || *ctx->data == '\t' || *ctx->data == '\r' || *ctx->data == '\n') { ctx->data++; continue; } if (*ctx->data != '(') break; if (rfc822_skip_comment(ctx) < 0) return -1; } return ctx->data < ctx->end ? 1 : 0; } /* Stop at next non-atext char */ int rfc822_parse_atom(struct rfc822_parser_context *ctx, string_t *str) { const unsigned char *start; /* atom = [CFWS] 1*atext [CFWS] atext = ; Any character except controls, SP, and specials. */ if (ctx->data >= ctx->end || !IS_ATEXT(*ctx->data)) return -1; for (start = ctx->data++; ctx->data < ctx->end; ctx->data++) { if (IS_ATEXT(*ctx->data)) continue; str_append_data(str, start, ctx->data - start); return rfc822_skip_lwsp(ctx); } str_append_data(str, start, ctx->data - start); return 0; } /* Like parse_atom() but don't stop at '.' */ static int rfc822_parse_dot_atom(struct rfc822_parser_context *ctx, string_t *str, bool stop_trailing_dot) { const unsigned char *start; const unsigned char *last_dot_ptr; bool last_is_dot; bool dot_problem; int ret; /* dot-atom = [CFWS] dot-atom-text [CFWS] dot-atom-text = 1*atext *("." 1*atext) atext = ; Any character except controls, SP, and specials. For RFC-822 compatibility allow LWSP around '.' */ if (ctx->data >= ctx->end || !IS_ATEXT(*ctx->data)) return -1; last_dot_ptr = ctx->data; last_is_dot = false; dot_problem = false; for (start = ctx->data++; ctx->data < ctx->end; ) { if (IS_ATEXT(*ctx->data)) { ctx->data++; continue; } #if 0 if (start == ctx->data) dot_problem = true; #endif str_append_data(str, start, ctx->data - start); if (ctx->data - start > 0) last_is_dot = false; if ((ret = rfc822_skip_lwsp(ctx)) <= 0) return (dot_problem && ret >= 0) ? -2 : ret; if (*ctx->data != '.') { if (last_is_dot && stop_trailing_dot) { ctx->data = last_dot_ptr; return dot_problem ? -2 : 1; } return (last_is_dot || dot_problem) ? -2 : 1; } if (last_is_dot) dot_problem = true; last_dot_ptr = ctx->data; ctx->data++; str_append_c(str, '.'); last_is_dot = true; if (rfc822_skip_lwsp(ctx) <= 0) return (dot_problem && ret >= 0) ? -2 : ret; start = ctx->data; } #if 0 i_assert(start != ctx->data); #endif str_append_data(str, start, ctx->data - start); return dot_problem ? -2 : 0; } /* "quoted string" */ static int rfc822_parse_quoted_string(struct rfc822_parser_context *ctx, string_t *str) { const unsigned char *start; bool char_problem; int ret; size_t len; i_assert(ctx->data < ctx->end); i_assert(*ctx->data == '"'); ctx->data++; char_problem = false; for (start = ctx->data; ctx->data < ctx->end; ctx->data++) { switch (*ctx->data) { case '\0': if (ctx->nul_replacement_str != NULL) { str_append_data(str, start, ctx->data - start); str_append(str, ctx->nul_replacement_str); start = ctx->data + 1; } else { char_problem = true; } break; case '"': str_append_data(str, start, ctx->data - start); ctx->data++; ret = rfc822_skip_lwsp(ctx); return (char_problem && ret >= 0) ? -2 : ret; case '\r': if (ctx->data+1 < ctx->end && *(ctx->data+1) != '\n') char_problem = true; break; case '\n': #if 0 /* folding whitespace, remove the (CR)LF */ len = ctx->data - start; if (len > 0 && start[len-1] == '\r') len--; str_append_data(str, start, len); start = ctx->data + 1; #endif len = ctx->data - start; if (len <= 0 || start[len-1] != '\r') char_problem = true; break; case '\\': ctx->data++; if (ctx->data >= ctx->end) return -1; #if 0 if (*ctx->data == '\r' || *ctx->data == '\n' || *ctx->data == '\0') { /* quoted-pair doesn't allow CR/LF/NUL. They are part of the obs-qp though, so don't return them as error. */ ctx->data--; break; } #endif str_append_data(str, start, ctx->data - start - 1); str_append_c(str, *ctx->data); start = ctx->data+1; break; } } /* missing '"' */ return -1; } static int rfc822_parse_atom_or_dot(struct rfc822_parser_context *ctx, string_t *str) { const unsigned char *start; /* atom = [CFWS] 1*atext [CFWS] atext = ; Any character except controls, SP, and specials. The difference between this function and rfc822_parse_dot_atom() is that this doesn't just silently skip over all the whitespace. */ for (start = ctx->data; ctx->data < ctx->end; ctx->data++) { if (IS_ATEXT(*ctx->data) || *ctx->data == '.') continue; str_append_data(str, start, ctx->data - start); return rfc822_skip_lwsp(ctx); } str_append_data(str, start, ctx->data - start); return 0; } /* atom or quoted-string */ static int rfc822_parse_phrase(struct rfc822_parser_context *ctx, string_t *str) { int ret; bool char_problem; char_problem = false; /* phrase = 1*word / obs-phrase word = atom / quoted-string obs-phrase = word *(word / "." / CFWS) */ if (ctx->data >= ctx->end) return 0; if (*ctx->data == '.') return -1; for (;;) { if (*ctx->data == '"') ret = rfc822_parse_quoted_string(ctx, str); else ret = rfc822_parse_atom_or_dot(ctx, str); if (ret <= 0 && ret != -2) return (char_problem && ret == 0) ? -2 : ret; if (ret == -2) { char_problem = true; if (ctx->data >= ctx->end) return -2; } if (!IS_ATEXT(*ctx->data) && *ctx->data != '"' && *ctx->data != '.') break; str_append_c(str, ' '); } ret = rfc822_skip_lwsp(ctx); return (char_problem && ret >= 0) ? -2 : ret; } static int rfc822_parse_domain_literal(struct rfc822_parser_context *ctx, string_t *str) { const unsigned char *start; size_t len; /* domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS] dcontent = dtext / quoted-pair dtext = NO-WS-CTL / ; Non white space controls %d33-90 / ; The rest of the US-ASCII %d94-126 ; characters not including "[", ; "]", or "\" */ i_assert(ctx->data < ctx->end); i_assert(*ctx->data == '['); for (start = ctx->data++; ctx->data < ctx->end; ctx->data++) { switch (*ctx->data) { case '\0': if (ctx->nul_replacement_str != NULL) { str_append_data(str, start, ctx->data - start); str_append(str, ctx->nul_replacement_str); start = ctx->data + 1; } else { return -1; } break; case '[': /* not allowed */ return -1; case ']': str_append_data(str, start, ctx->data - start + 1); ctx->data++; return rfc822_skip_lwsp(ctx); case '\n': /* folding whitespace, remove the (CR)LF */ len = ctx->data - start; if (len > 0 && start[len-1] == '\r') len--; str_append_data(str, start, len); start = ctx->data + 1; break; case '\\': /* note: the '\' is preserved in the output */ ctx->data++; if (ctx->data >= ctx->end) return -1; #if 0 if (*ctx->data == '\r' || *ctx->data == '\n' || *ctx->data == '\0') { /* quoted-pair doesn't allow CR/LF/NUL. They are part of the obs-qp though, so don't return them as error. */ str_append_data(str, start, ctx->data - start); start = ctx->data; ctx->data--; break; } #endif break; } } /* missing ']' */ return -1; } /* dot-atom / domain-literal */ static int rfc822_parse_domain(struct rfc822_parser_context *ctx, string_t *str) { /* domain = dot-atom / domain-literal / obs-domain domain-literal = [CFWS] "[" *([FWS] dcontent) [FWS] "]" [CFWS] obs-domain = atom *("." atom) */ i_assert(ctx->data < ctx->end); i_assert(*ctx->data == '@'); ctx->data++; if (rfc822_skip_lwsp(ctx) <= 0) return -1; if (*ctx->data == '[') return rfc822_parse_domain_literal(ctx, str); else return rfc822_parse_dot_atom(ctx, str, false); } static void add_address(struct message_address_parser_context *ctx) { struct message_address *addr; addr = malloc(sizeof(struct message_address)); if (!addr) i_panic("malloc() failed: %s", strerror(errno)); memcpy(addr, &ctx->addr, sizeof(ctx->addr)); memset(&ctx->addr, 0, sizeof(ctx->addr)); if (ctx->first_addr == NULL) ctx->first_addr = addr; else ctx->last_addr->next = addr; ctx->last_addr = addr; } static int parse_nonstrict_dot_atom(struct rfc822_parser_context *ctx, string_t *str) { int ret = -1; do { while (*ctx->data == '.') { str_append_c(str, '.'); ctx->data++; if (ctx->data == ctx->end) { /* @domain is missing, but local-part parsing was successful */ return 0; } ret = 1; } if (*ctx->data == '@') break; ret = rfc822_parse_atom(ctx, str); } while (ret > 0 && *ctx->data == '.'); return ret; } static int parse_local_part(struct message_address_parser_context *ctx) { int ret; bool char_problem; /* local-part = dot-atom / quoted-string / obs-local-part obs-local-part = word *("." word) */ i_assert(ctx->parser.data < ctx->parser.end); str_truncate(ctx->str, 0); char_problem = false; while (ctx->parser.data < ctx->parser.end) { if (*ctx->parser.data == '"') ret = rfc822_parse_quoted_string(&ctx->parser, ctx->str); else if (!ctx->non_strict_dots || ctx->non_strict_dots_as_invalid) ret = rfc822_parse_dot_atom(&ctx->parser, ctx->str, true); else ret = parse_nonstrict_dot_atom(&ctx->parser, ctx->str); if (ret < 0 && (ret != -2 || (!ctx->non_strict_dots && !ctx->non_strict_dots_as_invalid))) return -1; if (ret == -2) char_problem = true; if (ctx->parser.data >= ctx->parser.end) break; if ((ret = rfc822_skip_lwsp(&ctx->parser)) <= 0) break; if (*ctx->parser.data != '.') break; ctx->parser.data++; if (ctx->parser.data >= ctx->parser.end) { char_problem = true; break; } if ((ret = rfc822_skip_lwsp(&ctx->parser)) <= 0) break; if (ctx->parser.data >= ctx->parser.end || *ctx->parser.data == '@') { char_problem = true; break; } } if (char_problem || ret < 0) ctx->addr.invalid_syntax = true; ctx->addr.mailbox = str_ccopy(ctx->str); ctx->addr.mailbox_len = str_len(ctx->str); return ret; } static int parse_domain(struct message_address_parser_context *ctx) { int ret; str_truncate(ctx->str, 0); if ((ret = rfc822_parse_domain(&ctx->parser, ctx->str)) < 0 && (ret != -2 || (!ctx->non_strict_dots && !ctx->non_strict_dots_as_invalid))) return -1; ctx->addr.domain = str_ccopy(ctx->str); ctx->addr.domain_len = str_len(ctx->str); return ret; } static int parse_domain_list(struct message_address_parser_context *ctx) { int ret; bool dot_problem; /* obs-domain-list = "@" domain *(*(CFWS / "," ) [CFWS] "@" domain) */ str_truncate(ctx->str, 0); dot_problem = false; for (;;) { if (ctx->parser.data >= ctx->parser.end) return dot_problem ? -2 : 0; if (*ctx->parser.data != '@') break; if (str_len(ctx->str) > 0) str_append_c(ctx->str, ','); str_append_c(ctx->str, '@'); if ((ret = rfc822_parse_domain(&ctx->parser, ctx->str)) <= 0 && (ret != -2 || (!ctx->non_strict_dots && !ctx->non_strict_dots_as_invalid))) return ret; if (ret == -2) dot_problem = true; while (rfc822_skip_lwsp(&ctx->parser) > 0 && *ctx->parser.data == ',') ctx->parser.data++; } ctx->addr.route = str_ccopy(ctx->str); ctx->addr.route_len = str_len(ctx->str); return dot_problem ? -2 : 1; } static int parse_angle_addr(struct message_address_parser_context *ctx, bool parsing_path) { int ret; /* "<" [ "@" route ":" ] local-part "@" domain ">" */ i_assert(*ctx->parser.data == '<'); ctx->parser.data++; if (rfc822_skip_lwsp(&ctx->parser) <= 0) return -1; if (*ctx->parser.data == '@') { if ((ret = parse_domain_list(ctx)) > 0 && *ctx->parser.data == ':') { ctx->parser.data++; } else if (parsing_path && (ctx->parser.data >= ctx->parser.end || *ctx->parser.data != ':')) { return -1; } else { if (ctx->fill_missing && ret != -2) ctx->addr.route = strdup("INVALID_ROUTE"); ctx->addr.invalid_syntax = true; if (ctx->parser.data >= ctx->parser.end) return -1; if (ret == -2) ctx->parser.data++; /* try to continue anyway */ } if (rfc822_skip_lwsp(&ctx->parser) <= 0) return -1; } if (*ctx->parser.data == '>') { /* <> address isn't valid */ } else { if ((ret = parse_local_part(ctx)) <= 0 && (ret != -2 || (!ctx->non_strict_dots && !ctx->non_strict_dots_as_invalid))) return -1; if (ret == -2) ctx->addr.invalid_syntax = true; if (ctx->parser.data >= ctx->parser.end) return 0; if (*ctx->parser.data == '@') { if ((ret = parse_domain(ctx)) <= 0 && (ret != -2 || (!ctx->non_strict_dots && !ctx->non_strict_dots_as_invalid))) return -1; if (ret == -2) ctx->addr.invalid_syntax = true; if (ctx->parser.data >= ctx->parser.end) return 0; } } if (*ctx->parser.data != '>') return -1; ctx->parser.data++; return rfc822_skip_lwsp(&ctx->parser); } static int parse_name_addr(struct message_address_parser_context *ctx) { int ret; /* name-addr = [display-name] angle-addr display-name = phrase */ str_truncate(ctx->str, 0); ret = rfc822_parse_phrase(&ctx->parser, ctx->str); if ((ret <= 0 && (ret != -2 || (!ctx->non_strict_dots && !ctx->non_strict_dots_as_invalid))) || *ctx->parser.data != '<') return -1; if (ret == -2) ctx->addr.invalid_syntax = true; if (str_len(ctx->str) == 0) { /* Cope with "
" without display name */ ctx->addr.name = NULL; } else { ctx->addr.name = str_ccopy(ctx->str); ctx->addr.name_len = str_len(ctx->str); } if (ctx->parser.last_comment != NULL) str_truncate(ctx->parser.last_comment, 0); if (parse_angle_addr(ctx, false) < 0) { /* broken */ if (ctx->fill_missing) ctx->addr.domain = strdup("SYNTAX_ERROR"); ctx->addr.invalid_syntax = true; } if (ctx->parser.last_comment != NULL) { if (str_len(ctx->parser.last_comment) > 0) { ctx->addr.comment = str_ccopy(ctx->parser.last_comment); ctx->addr.comment_len = str_len(ctx->parser.last_comment); } } return ctx->parser.data < ctx->parser.end ? 1 : 0; } static int parse_addr_spec(struct message_address_parser_context *ctx) { /* addr-spec = local-part "@" domain */ int ret, ret2 = -3; i_assert(ctx->parser.data < ctx->parser.end); if (ctx->parser.last_comment != NULL) str_truncate(ctx->parser.last_comment, 0); #if 0 bool quoted_string = *ctx->parser.data == '"'; #endif ret = parse_local_part(ctx); if (ret <= 0) { /* end of input or parsing local-part failed */ ctx->addr.invalid_syntax = true; } if (ret != 0 && ctx->parser.data < ctx->parser.end && *ctx->parser.data == '@') { ret2 = parse_domain(ctx); if (ret2 <= 0 && ret != -2) ret = ret2; if (ret2 == -2) { ctx->addr.invalid_syntax = true; if (ctx->parser.data >= ctx->parser.end) ret = 0; } } if (ctx->parser.last_comment != NULL && str_len(ctx->parser.last_comment) > 0) { ctx->addr.comment = str_ccopy(ctx->parser.last_comment); ctx->addr.comment_len = str_len(ctx->parser.last_comment); } else if (ret2 == -3) { #if 0 /* So far we've read user without @domain and without (Display Name). We'll assume that a single "user" (already read into addr.mailbox) is a mailbox, but if it's followed by anything else it's a display-name. */ str_append_c(ctx->str, ' '); size_t orig_str_len = str_len(ctx->str); (void)rfc822_parse_phrase(&ctx->parser, ctx->str); if (str_len(ctx->str) != orig_str_len) { ctx->addr.mailbox = NULL; ctx->addr.name = str_ccopy(ctx->str); ctx->addr.name_len = str_len(ctx->str); } else { if (!quoted_string) ctx->addr.domain = strdup(""); } ctx->addr.invalid_syntax = true; ret = -1; #endif } return ret; } static void add_fixed_address(struct message_address_parser_context *ctx) { if (ctx->addr.mailbox == NULL) { ctx->addr.mailbox = strdup(!ctx->fill_missing ? "" : "MISSING_MAILBOX"); ctx->addr.invalid_syntax = true; } if (ctx->addr.domain == NULL || ctx->addr.domain_len == 0) { free(ctx->addr.domain); ctx->addr.domain = strdup(!ctx->fill_missing ? "" : "MISSING_DOMAIN"); ctx->addr.invalid_syntax = true; } add_address(ctx); } static int parse_mailbox(struct message_address_parser_context *ctx) { const unsigned char *start; size_t len; int ret; /* mailbox = name-addr / addr-spec */ start = ctx->parser.data; if ((ret = parse_name_addr(ctx)) < 0) { /* nope, should be addr-spec */ if (ctx->addr.name != NULL) { free(ctx->addr.name); ctx->addr.name = NULL; } if (ctx->addr.route != NULL) { free(ctx->addr.route); ctx->addr.route = NULL; } if (ctx->addr.mailbox != NULL) { free(ctx->addr.mailbox); ctx->addr.mailbox = NULL; } if (ctx->addr.domain != NULL) { free(ctx->addr.domain); ctx->addr.domain = NULL; } if (ctx->addr.comment != NULL) { free(ctx->addr.comment); ctx->addr.comment = NULL; } if (ctx->addr.original != NULL) { free(ctx->addr.original); ctx->addr.original = NULL; } ctx->parser.data = start; ret = parse_addr_spec(ctx); if (ctx->addr.invalid_syntax && ctx->addr.name == NULL && ctx->addr.mailbox != NULL && ctx->addr.domain == NULL) { ctx->addr.name = ctx->addr.mailbox; ctx->addr.name_len = ctx->addr.mailbox_len; ctx->addr.mailbox = NULL; ctx->addr.mailbox_len = 0; } } if (ret < 0) ctx->addr.invalid_syntax = true; len = ctx->parser.data - start; ctx->addr.original = malloc(len + 1); if (!ctx->addr.original) i_panic("malloc() failed: %s", strerror(errno)); memcpy(ctx->addr.original, start, len); ctx->addr.original[len] = 0; ctx->addr.original_len = len; add_fixed_address(ctx); free(ctx->addr.original); ctx->addr.original = NULL; return ret; } static int parse_group(struct message_address_parser_context *ctx) { int ret; /* group = display-name ":" [mailbox-list / CFWS] ";" [CFWS] display-name = phrase */ str_truncate(ctx->str, 0); ret = rfc822_parse_phrase(&ctx->parser, ctx->str); if ((ret <= 0 && (ret != -2 || (!ctx->non_strict_dots && !ctx->non_strict_dots_as_invalid))) || *ctx->parser.data != ':') return -1; if (ret == -2) ctx->addr.invalid_syntax = true; /* from now on don't return -1 even if there are problems, so that the caller knows this is a group */ ctx->parser.data++; if ((ret = rfc822_skip_lwsp(&ctx->parser)) <= 0) ctx->addr.invalid_syntax = true; ctx->addr.mailbox = str_ccopy(ctx->str); ctx->addr.mailbox_len = str_len(ctx->str); add_address(ctx); if (ret > 0 && *ctx->parser.data != ';') { for (;;) { /* mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list */ if (parse_mailbox(ctx) <= 0) { /* broken mailbox - try to continue anyway. */ } if (ctx->parser.data >= ctx->parser.end || *ctx->parser.data != ',') break; ctx->parser.data++; if (rfc822_skip_lwsp(&ctx->parser) <= 0) { ret = -1; break; } } } if (ret >= 0) { if (ctx->parser.data >= ctx->parser.end || *ctx->parser.data != ';') ret = -1; else { ctx->parser.data++; ret = rfc822_skip_lwsp(&ctx->parser); } } if (ret < 0) ctx->addr.invalid_syntax = true; add_address(ctx); return ret == 0 ? 0 : 1; } static int parse_address(struct message_address_parser_context *ctx) { const unsigned char *start; int ret; /* address = mailbox / group */ start = ctx->parser.data; if ((ret = parse_group(ctx)) < 0) { /* not a group, try mailbox */ ctx->parser.data = start; ret = parse_mailbox(ctx); } return ret; } static int parse_address_list(struct message_address_parser_context *ctx, unsigned int max_addresses) { const unsigned char *start; size_t len; int ret = 0; /* address-list = (address *("," address)) / obs-addr-list */ while (max_addresses > 0) { max_addresses--; if ((ret = parse_address(ctx)) == 0) break; if (ctx->parser.data >= ctx->parser.end || *ctx->parser.data != ',') { ctx->last_addr->invalid_syntax = true; ret = -1; break; } ctx->parser.data++; start = ctx->parser.data; if ((ret = rfc822_skip_lwsp(&ctx->parser)) <= 0) { if (ret < 0) { /* ends with some garbage */ len = ctx->parser.data - start; ctx->addr.original = malloc(len + 1); if (!ctx->addr.original) i_panic("malloc() failed: %s", strerror(errno)); memcpy(ctx->addr.original, start, len); ctx->addr.original[len] = 0; ctx->addr.original_len = len; add_fixed_address(ctx); free(ctx->addr.original); ctx->addr.original = NULL; } break; } } return ret; } static char *mem_copy(const char *mem, size_t len) { char *copy; copy = malloc(len+1); if (!copy) i_panic("malloc() failed: %s", strerror(errno)); memcpy(copy, mem, len); copy[len] = 0; return copy; } void message_address_add(struct message_address **first, struct message_address **last, const char *name, size_t name_len, const char *route, size_t route_len, const char *mailbox, size_t mailbox_len, const char *domain, size_t domain_len, const char *comment, size_t comment_len) { struct message_address *message; message = malloc(sizeof(struct message_address)); if (!message) i_panic("malloc() failed: %s", strerror(errno)); message->name = name ? mem_copy(name, name_len) : NULL; message->name_len = name_len; message->route = route ? mem_copy(route, route_len) : NULL; message->route_len = route_len; message->mailbox = mailbox ? mem_copy(mailbox, mailbox_len) : NULL; message->mailbox_len = mailbox_len; message->domain = domain ? mem_copy(domain, domain_len) : NULL; message->domain_len = domain_len; message->comment = comment ? mem_copy(comment, comment_len) : NULL; message->comment_len = comment_len; message->original = NULL; message->original_len = 0; message->next = NULL; if (!*first) *first = message; else (*last)->next = message; *last = message; } void message_address_free(struct message_address **addr) { struct message_address *current; struct message_address *next; current = *addr; while (current) { next = current->next; free(current->name); free(current->route); free(current->mailbox); free(current->domain); free(current->comment); free(current->original); free(current); current = next; } *addr = NULL; } struct message_address * message_address_parse(const char *input, size_t input_len, unsigned int max_addresses, enum message_address_parse_flags flags) { string_t *str; struct message_address_parser_context ctx; memset(&ctx, 0, sizeof(ctx)); str = str_new(128); rfc822_parser_init(&ctx.parser, (const unsigned char *)input, input_len, str); if (rfc822_skip_lwsp(&ctx.parser) <= 0) { /* no addresses */ str_free(&str); return NULL; } ctx.str = str_new(128); ctx.fill_missing = (flags & MESSAGE_ADDRESS_PARSE_FLAG_FILL_MISSING) != 0; ctx.non_strict_dots = (flags & MESSAGE_ADDRESS_PARSE_FLAG_STRICT_DOTS) == 0; ctx.non_strict_dots_as_invalid = (flags & MESSAGE_ADDRESS_PARSE_FLAG_NON_STRICT_DOTS_AS_INVALID) != 0; (void)parse_address_list(&ctx, max_addresses); str_free(&ctx.str); str_free(&str); rfc822_parser_deinit(&ctx.parser); return ctx.first_addr; } static bool has_mime_word(const char *str, size_t len) { const char *ptr; const char *end; ptr = str; end = str+len; while ((ptr = memchr(ptr, '=', end - ptr)) != NULL) { ptr++; if (*ptr == '?') return true; } return false; } void message_address_write(char **output, size_t *output_len, const struct message_address *addr) { string_t *str; const char *tmp; bool first = true, in_group = false; str = str_new(128); #if 0 if (addr == NULL) return; /* <> path */ if (addr->mailbox == NULL && addr->domain == NULL) { i_assert(addr->next == NULL); str_append(str, "<>"); return; } #endif /* a) mailbox@domain b) name <@route:mailbox@domain> c) group: .. ; */ while (addr != NULL) { if (first) first = false; else str_append(str, ", "); if (addr->domain == NULL) { if (!in_group) { /* beginning of group. mailbox is the group name, others are NULL. */ if (addr->mailbox != NULL && addr->mailbox_len != 0) { /* check for MIME encoded-word */ if (has_mime_word(addr->mailbox, addr->mailbox_len)) /* MIME encoded-word MUST NOT appear within a 'quoted-string' so escaping and quoting of phrase is not possible, instead use obsolete RFC822 phrase syntax which allow spaces */ str_append_data(str, addr->mailbox, addr->mailbox_len); else str_append_maybe_escape(str, addr->mailbox, addr->mailbox_len, true); } else { /* empty group name needs to be quoted */ str_append(str, "\"\""); } str_append(str, ": "); first = true; } else { /* end of group. all fields should be NULL. */ i_assert(addr->mailbox == NULL); /* cut out the ", " */ tmp = str_c(str)+str_len(str)-2; i_assert((tmp[0] == ',' || tmp[0] == ':') && tmp[1] == ' '); if (tmp[0] == ',' && tmp[1] == ' ') str_truncate(str, str_len(str)-2); else if (tmp[0] == ':' && tmp[1] == ' ') str_truncate(str, str_len(str)-1); str_append_c(str, ';'); } in_group = !in_group; } else if ((addr->name == NULL || addr->name_len == 0) && addr->route == NULL) { /* no name and no route. use only mailbox@domain */ i_assert(addr->mailbox != NULL); str_append_maybe_escape(str, addr->mailbox, addr->mailbox_len, false); str_append_c(str, '@'); str_append_data(str, addr->domain, addr->domain_len); if (addr->comment != NULL) { str_append(str, " ("); str_append_data(str, addr->comment, addr->comment_len); str_append_c(str, ')'); } } else { /* name and/or route. use full Name */ i_assert(addr->mailbox != NULL); if (addr->name != NULL && addr->name_len != 0) { /* check for MIME encoded-word */ if (has_mime_word(addr->name, addr->name_len)) /* MIME encoded-word MUST NOT appear within a 'quoted-string' so escaping and quoting of phrase is not possible, instead use obsolete RFC822 phrase syntax which allow spaces */ str_append_data(str, addr->name, addr->name_len); else str_append_maybe_escape(str, addr->name, addr->name_len, true); } if (addr->route != NULL || addr->mailbox_len != 0 || addr->domain_len != 0) { if (addr->name != NULL && addr->name_len != 0) str_append_c(str, ' '); str_append_c(str, '<'); if (addr->route != NULL) { str_append_data(str, addr->route, addr->route_len); str_append_c(str, ':'); } str_append_maybe_escape(str, addr->mailbox, addr->mailbox_len, false); if (addr->domain_len != 0) { str_append_c(str, '@'); str_append_data(str, addr->domain, addr->domain_len); } str_append_c(str, '>'); } if (addr->comment != NULL) { str_append(str, " ("); str_append_data(str, addr->comment, addr->comment_len); str_append_c(str, ')'); } } addr = addr->next; } *output = str_ccopy(str); *output_len = str_len(str); str_free(&str); } void compose_address(char **output, size_t *output_len, const char *mailbox, size_t mailbox_len, const char *domain, size_t domain_len) { string_t *str; str = str_new(128); str_append_maybe_escape(str, mailbox, mailbox_len, false); str_append_c(str, '@'); str_append_data(str, domain, domain_len); *output = str_ccopy(str); *output_len = str_len(str); str_free(&str); } void split_address(const char *input, size_t input_len, char **mailbox, size_t *mailbox_len, char **domain, size_t *domain_len) { struct message_address_parser_context ctx; int ret; if (!input || !input[0]) { *mailbox = NULL; *mailbox_len = 0; *domain = NULL; *domain_len = 0; return; } memset(&ctx, 0, sizeof(ctx)); rfc822_parser_init(&ctx.parser, (const unsigned char *)input, input_len, NULL); ctx.str = str_new(128); ctx.fill_missing = false; ctx.non_strict_dots = false; ctx.non_strict_dots_as_invalid = false; ret = rfc822_skip_lwsp(&ctx.parser); if (ret > 0) ret = parse_addr_spec(&ctx); else ret = -1; if (ret >= 0) ret = rfc822_skip_lwsp(&ctx.parser); if (ret < 0 || ctx.parser.data != ctx.parser.end || ctx.addr.invalid_syntax) { free(ctx.addr.mailbox); free(ctx.addr.domain); *mailbox = NULL; *mailbox_len = 0; *domain = NULL; *domain_len = 0; } else { *mailbox = ctx.addr.mailbox; *mailbox_len = ctx.addr.mailbox_len; *domain = ctx.addr.domain; *domain_len = ctx.addr.domain_len; } free(ctx.addr.comment); free(ctx.addr.route); free(ctx.addr.name); free(ctx.addr.original); rfc822_parser_deinit(&ctx.parser); str_free(&ctx.str); } void string_free(char *string) { free(string); }