Skip to content
This repository was archived by the owner on Apr 10, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 164 additions & 3 deletions net/instaweb/rewriter/csp.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
// Author: [email protected] (Maksim Orlovich)
//
// This provides basic parsing and evaluation of a (subset of)
// Content-Security-Policy that's relevant for PageSpeed Automatic
// Content-Security-Policy that's relevant for PageSpeed Automatic.

#include "net/instaweb/rewriter/public/csp.h"

Expand Down Expand Up @@ -80,6 +80,7 @@ bool CspSourceExpression::TryParseScheme(StringPiece* input) {
kind_ = kSchemeSource;
input->substr(0, pos).CopyToString(&mutable_url_data()->scheme_part);
input->remove_prefix(pos + 1);
LowerString(&mutable_url_data()->scheme_part);
return true;
}

Expand All @@ -88,6 +89,7 @@ bool CspSourceExpression::TryParseScheme(StringPiece* input) {
&& ((*input)[pos + 1] == '/') && ((*input)[pos + 2] == '/')) {
input->substr(0, pos).CopyToString(&mutable_url_data()->scheme_part);
input->remove_prefix(pos + 3);
LowerString(&mutable_url_data()->scheme_part);
}

// Either way, it's not a valid scheme-source at this point, even if it's
Expand Down Expand Up @@ -136,6 +138,7 @@ CspSourceExpression CspSourceExpression::Parse(StringPiece input) {
result.mutable_url_data()->host_part.push_back(input[0]);
input.remove_prefix(1);
}
LowerString(&result.mutable_url_data()->host_part);

// Verify accumulated host-part is valid.
StringPiece host_part(result.url_data().host_part);
Expand Down Expand Up @@ -172,12 +175,160 @@ CspSourceExpression CspSourceExpression::Parse(StringPiece input) {
return CspSourceExpression();
}

input.CopyToString(&result.mutable_url_data()->path_part);
// Normalize and tokenize the path.
StringPieceVector components;
SplitStringPieceToVector(input, "/", &components, true);

for (StringPiece c : components) {
GoogleString canon = GoogleUrl::CanonicalizePath(c);
if (canon.empty()) {
LOG(DFATAL) << "Path canonicalization returned empty string?" << c;
return CspSourceExpression();
}
result.mutable_url_data()->path_part.push_back(canon.substr(1));
}
result.mutable_url_data()->path_exact_match =
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to consider the context in which we are parsing this, to be able to determine this? (specifically, if we are considering a redirect url Y/N?)

!input.empty() && !input.ends_with("/");
}

return result;
}

bool CspSourceExpression::Matches(
const GoogleUrl& origin_url, const GoogleUrl& url) const {
// Implementation of the "Does url match expression in origin with
// redirect count?" algorithm (where redirect count is 0 for our
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe link to the rfc section on this algorithm?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

// purposes, since we check the request).
// https://w3c.github.io/webappsec-csp/#match-url-to-source-list

if (kind_ != kSelf && kind_ != kSchemeSource && kind_ != kHostSource) {
return false;
}

if (!origin_url.IsAnyValid() || !url.IsAnyValid()) {
Copy link
Member

@oschaaf oschaaf Mar 13, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to specifically handle blob / data / filesystem schemes here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm. Actually I want to limit this to http[s], since I dropped support for ws/wss (as we don't use them and they were making for some complicated expressions)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually probably want data:, too, assuming we actually fetch it (checking)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't. Hmm.

return false;
}

// Check for 'self' first, since that doesn't need/have url_data()
if (kind_ == kSelf) {
if (origin_url.Origin() == url.Origin()) {
return true;
}

if (origin_url.Host() != url.Host()) {
return false;
}

if (origin_url.SchemeIs("http") && url.SchemeIs("https")) {
// Using the same port is OK.
if (origin_url.EffectiveIntPort() == url.EffectiveIntPort()) {
return true;
}

// Using default ports for both is OK, too.
if (HasDefaultPortForScheme(origin_url) && HasDefaultPortForScheme(url)) {
return true;
}
}

return false;
}

// Give our state some short names closer to those in the spec
StringPiece expr_scheme = url_data().scheme_part;
StringPiece expr_host = url_data().host_part;
StringPiece expr_port = url_data().port_part;
const std::vector<GoogleString>& expr_path = url_data().path_part;

// Some special handling of *, which for some reason handles some schemes
// a bit differently than other things with * host portion and no scheme
// specified.
if (kind_ == kHostSource &&
expr_scheme.empty() &&
expr_host == "*" &&
expr_port.empty() &&
expr_path.empty()) {
if (url.SchemeIs("http") ||
url.SchemeIs("https") ||
url.SchemeIs("ftp")) {
return true;
}
return (url.Scheme() == origin_url.Scheme());
}

if (!expr_scheme.empty()
&& url.Scheme() != expr_scheme
&& !(expr_scheme == "http" && url.SchemeIs("https"))) {
return false;
}

if (kind_ == kSchemeSource) {
return true;
}

if (url.Host().empty() || expr_host.empty()) {
return false;
}

if (expr_scheme.empty()
&& url.Scheme() != origin_url.Scheme()
&& !(origin_url.SchemeIs("http") && url.SchemeIs("https"))) {
return false;
}

if (expr_host[0] == '*') {
StringPiece remaining = expr_host.substr(1);
if (!url.Host().ends_with(remaining)) {
return false;
}
} else {
if (url.Host() != expr_host) {
return false;
}
}

// TODO(morlovich): Implement IP-address handling here, once appropriate
// spec has been read.

if (expr_port.empty()) {
if (!HasDefaultPortForScheme(url)) {
return false;
}
} else {
// TODO(morlovich): Check whether the :80/:443 case is about effective
// or explicit port.
if (expr_port != "*"
&& expr_port != IntegerToString(url.EffectiveIntPort())
&& !(expr_port == "80" && url.EffectiveIntPort() == 443)) {
return false;
}
}

// TODO(morlovich):Redirect following may require changes here ---
// this would also be skipped for redirects.
if (!expr_path.empty()) {
// TODO(morlovich): Verify that behavior for query here is what we want.
StringPieceVector url_path_list;
SplitStringPieceToVector(url.PathAndLeaf(), "/", &url_path_list, true);
if (expr_path.size() > url_path_list.size()) {
return false;
}

if (url_data().path_exact_match
&& (url_path_list.size() != expr_path.size())) {
return false;
}

for (int i = 0, n = expr_path.size(); i < n; ++i) {
if (expr_path[i] != url_path_list[i]) {
return false;
}
}
}

return true;
}

CspSourceExpression CspSourceExpression::ParseQuoted(StringPiece input) {
CHECK(!input.empty());

Expand All @@ -204,6 +355,15 @@ CspSourceExpression CspSourceExpression::ParseQuoted(StringPiece input) {
return CspSourceExpression(kUnknown);
}

bool CspSourceExpression::HasDefaultPortForScheme(const GoogleUrl& url) {
int url_scheme_port = GoogleUrl::DefaultPortForScheme(url.Scheme());
if (url_scheme_port == url::PORT_UNSPECIFIED) {
return false;
}

return (url_scheme_port == url.EffectiveIntPort());
}

std::unique_ptr<CspSourceList> CspSourceList::Parse(StringPiece input) {
std::unique_ptr<CspSourceList> result(new CspSourceList);

Expand Down Expand Up @@ -264,7 +424,8 @@ std::unique_ptr<CspPolicy> CspPolicy::Parse(StringPiece input) {
if (dir_name != CspDirective::kNumSourceListDirectives &&
policy->policies_[static_cast<int>(dir_name)] == nullptr) {
// Note: repeated directives are ignored per the "Parse a serialized
// CSP as disposition" algorith,
// CSP as disposition" algorithm.
// https://w3c.github.io/webappsec-csp/#parse-serialized-policy
policy->policies_[static_cast<int>(dir_name)]
= CspSourceList::Parse(value);
}
Expand Down
Loading