Open4

HTTP header values normalization for DOM API in the real world

arayaaraya

Investigating normalization/validation implementations for new Headers API

arayaaraya

Webkit

Implementation of append: https://github.com/WebKit/WebKit/blob/647f7bf42b8d133280d06a83c52c5c56146888d5/Source/WebCore/Modules/fetch/FetchHeaders.cpp#L76

https://github.com/WebKit/WebKit/blob/647f7bf42b8d133280d06a83c52c5c56146888d5/Source/WTF/wtf/ASCIICType.h#L171
trimming \n, \t, and \r

template<Character CharacterType> constexpr bool isASCIIWhitespaceWithoutFF(CharacterType character)
{
    // This is different from isASCIIWhitespace: JSON/HTTP/XML do not accept \f as a whitespace.
    // ECMA-404 specifies the following:
    // > Whitespace is any sequence of one or more of the following code points:
    // > character tabulation (U+0009), line feed (U+000A), carriage return (U+000D), and space (U+0020).
    //
    // This matches HTTP whitespace:
    // https://fetch.spec.whatwg.org/#http-whitespace-byte
    //
    // And XML whitespace:
    // https://www.w3.org/TR/2008/REC-xml-20081126/#NT-S
    return character == ' ' || character == '\n' || character == '\t' || character == '\r';
}

validation: https://github.com/WebKit/WebKit/blob/285b9f14d91179e262dbf72c7916d116efaabd86/Source/WebCore/platform/network/HTTPParsers.cpp#L126-L140

bool isValidHTTPHeaderValue(const String& value)
{
    char16_t c = value[0];
    if (isTabOrSpace(c))
        return false;
    c = value[value.length() - 1];
    if (isTabOrSpace(c))
        return false;
    for (unsigned i = 0; i < value.length(); ++i) {
        c = value[i];
        if (c == 0x00 || c == 0x0A || c == 0x0D)
            return false;
    }
    return true;
}
arayaaraya

Gecko

Implementation of append: https://searchfox.org/firefox-main/rev/e02959386f6f89c1476edba10b3902f4e4f3ed4c/dom/fetch/InternalHeaders.cpp#89

trimming \n, \t and \r: https://searchfox.org/firefox-main/rev/e02959386f6f89c1476edba10b3902f4e4f3ed4c/netwerk/protocol/http/nsHttp.cpp#200-207

void TrimHTTPWhitespace(const nsACString& aSource, nsACString& aDest) {
  nsAutoCString str(aSource);

  // HTTP whitespace 0x09: '\t', 0x0A: '\n', 0x0D: '\r', 0x20: ' '
  static const char kHTTPWhitespace[] = "\t\n\r ";
  str.Trim(kHTTPWhitespace);
  aDest.As

validation: https://searchfox.org/firefox-main/rev/e02959386f6f89c1476edba10b3902f4e4f3ed4c/netwerk/protocol/http/nsHttp.cpp#210-223

bool IsReasonableHeaderValue(const nsACString& s) {
  // Header values MUST NOT contain line-breaks.  RFC 2616 technically
  // permits CTL characters, including CR and LF, in header values provided
  // they are quoted.  However, this can lead to problems if servers do not
  // interpret quoted strings properly.  Disallowing CR and LF here seems
  // reasonable and keeps things simple.  We also disallow a null byte.
  const nsACString::char_type* end = s.EndReading();
  for (const nsACString::char_type* i = s.BeginReading(); i != end; ++i) {
    if (*i == '\r' || *i == '\n' || *i == '\0') {
      return false;
    }
  }
  return true;
}