Sensitive validator branch (#1364)

Validator for missing sensitive trait
This commit is contained in:
rchache 2022-11-14 19:18:07 -05:00 committed by GitHub
parent c226a57ff0
commit 6b7e154588
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 427 additions and 8 deletions

View File

@ -120,6 +120,59 @@ Example:
{name: "CamelCase"}
]
.. _MissingSensitiveTrait:
MissingSensitiveTrait
=====================
This validator scans shape or member names and identifies ones that look like they could contain
sensitive information but are not marked with the ``@sensitive`` trait. This does not apply to
shapes where the ``@sensitive`` trait would be invalid. Users may also configure this validator
with a custom list of terms, and choose to ignore the built-in defaults. The defaults terms include
types of personal information such as 'birth day', 'billing address', 'zip code', or 'gender',
as well as information that could be maliciously exploited such as 'password', 'secret key', or 'credit card'.
Rationale
Sensitive information often incurs legal requirements regarding the handling and logging
of it. Mistakenly not marking sensitive data accordingly carries a large risk, and it is
helpful to have an automated validator to catch instances of this rather than rely on best efforts.
Default severity
``WARNING``
Configuration
.. list-table::
:header-rows: 1
:widths: 20 20 60
* - Property
- Type
- Description
* - terms
- [ ``string`` ]
- A list of search terms that match shape or member names
case-insensitively based on word boundaries (for example, the term
"access key id" matches "AccessKeyId", "access_key_id", and
"accesskeyid"). See :ref:`words-boundaries` for details.
* - excludeDefaults
- ``boolean``
- A flag indicating whether or not to disregard the default set
of terms. This property is not required and defaults to false.
If set to true, ``terms`` must be provided.
Example:
.. code-block:: smithy
$version: "2"
metadata validators = [{
name: "MissingSensitiveTrait"
configuration: {
excludeDefaults: false,
terms: ["home planet"]
}
}]
.. _NoninclusiveTerms:
@ -227,7 +280,7 @@ Configuration
- A list of search terms that match shape or member names
case-insensitively based on word boundaries (for example, the term
"access key id" matches "AccessKeyId", "access_key_id", and
"accesskeyid"). See :ref:`reserved-words-boundaries` for details.
"accesskeyid"). See :ref:`words-boundaries` for details.
* - selector
- ``string``
- Specifies a selector of shapes to validate for this configuration.
@ -345,12 +398,12 @@ be specified.
* - **Codename**
- Match
.. _reserved-words-boundaries:
.. _words-boundaries:
Reserved words boundary matching
--------------------------------
Words boundary matching
-----------------------
Word boundaries can be used to find reserved words. Word boundary search
Word boundaries can be used to find terms of interest. Word boundary search
text consists of one or more alphanumeric words separated by a single
space. When comparing against another string, the contents of the string
are separated into words based on word boundaries. Those words are
@ -379,7 +432,7 @@ demonstrates how comparison text is parsed into words.
* - access_keyID
- access key id
The following table shows matches for a reserved term of ``secret id``,
The following table shows matches for a search term of ``secret id``,
meaning the word "secret" needs to be followed by the word "id". Word
boundary searches also match if the search terms concatenated together with
no spaces is considered a word in the search text (for example,

View File

@ -0,0 +1,178 @@
/*
* Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://aws.amazon.com/apache2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package software.amazon.smithy.linters;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import software.amazon.smithy.model.Model;
import software.amazon.smithy.model.node.NodeMapper;
import software.amazon.smithy.model.shapes.MemberShape;
import software.amazon.smithy.model.shapes.Shape;
import software.amazon.smithy.model.traits.SensitiveTrait;
import software.amazon.smithy.model.validation.AbstractValidator;
import software.amazon.smithy.model.validation.ValidationEvent;
import software.amazon.smithy.model.validation.ValidatorService;
import software.amazon.smithy.utils.ListUtils;
import software.amazon.smithy.utils.SetUtils;
/**
* <p>Validates that shapes and members that possibly contain sensitive data are marked with the sensitive trait.
*/
public final class MissingSensitiveTraitValidator extends AbstractValidator {
static final Set<String> DEFAULT_SENSITIVE_TERMS = SetUtils.of(
"account number",
"bank",
"billing address",
"birth",
"birth day",
"citizenship",
"credentials",
"credit card",
"csc",
"driver license",
"drivers license",
"email",
"ethnicity",
"first name",
"gender",
"insurance",
"ip address",
"last name",
"maiden name",
"mailing address",
"pass phrase",
"pass word",
"passport",
"phone",
"religion",
"secret",
"sexual orientation",
"social security",
"ssn",
"tax payer",
"telephone",
"zip code"
);
private final WordBoundaryMatcher wordMatcher;
public static final class Provider extends ValidatorService.Provider {
public Provider() {
super(MissingSensitiveTraitValidator.class, node -> {
NodeMapper mapper = new NodeMapper();
return new MissingSensitiveTraitValidator(
mapper.deserialize(node, MissingSensitiveTraitValidator.Config.class));
});
}
}
/**
* MissingSensitiveTrait configuration.
*/
public static final class Config {
private List<String> terms = ListUtils.of();
private boolean excludeDefaults;
public List<String> getTerms() {
return terms;
}
public void setTerms(List<String> terms) {
this.terms = terms;
}
public boolean getExcludeDefaults() {
return excludeDefaults;
}
public void setExcludeDefaults(boolean excludeDefaults) {
this.excludeDefaults = excludeDefaults;
}
}
private MissingSensitiveTraitValidator(Config config) {
wordMatcher = new WordBoundaryMatcher();
if (config.getExcludeDefaults() && config.getTerms().isEmpty()) {
//This configuration combination makes the validator a no-op.
throw new IllegalArgumentException("Cannot set 'excludeDefaults' to true and leave "
+ "'terms' unspecified.");
}
config.getTerms().forEach(wordMatcher::addSearch);
if (!config.getExcludeDefaults()) {
DEFAULT_SENSITIVE_TERMS.forEach(wordMatcher::addSearch);
}
}
/**
* Finds shapes without the sensitive trait that possibly contain sensitive data,
* based on the shape/member name and the list of key words and phrases.
*
* @param model Model to validate.
* @return list of violation events
*/
@Override
public List<ValidationEvent> validate(Model model) {
List<ValidationEvent> validationEvents = new ArrayList<>();
validationEvents.addAll(scanShapes(model));
return validationEvents;
}
private List<ValidationEvent> scanShapes(Model model) {
List<ValidationEvent> validationEvents = new ArrayList<>();
for (Shape shape : model.toSet()) {
if (shape.isMemberShape()) {
MemberShape memberShape = (MemberShape) shape;
Shape containingShape = model.expectShape(memberShape.getContainer());
Shape targetShape = model.expectShape(memberShape.getTarget());
if (!containingShape.hasTrait(SensitiveTrait.class) && !targetShape.hasTrait(SensitiveTrait.class)) {
Optional<ValidationEvent> optionalValidationEvent =
detectSensitiveTerms(memberShape.getMemberName(), memberShape);
optionalValidationEvent.ifPresent(validationEvents::add);
}
} else if (!shape.isOperationShape()
&& !shape.isServiceShape()
&& !shape.isResourceShape()
&& !shape.hasTrait(SensitiveTrait.class)) {
Optional<ValidationEvent> optionalValidationEvent =
detectSensitiveTerms(shape.toShapeId().getName(), shape);
optionalValidationEvent.ifPresent(validationEvents::add);
}
}
return validationEvents;
}
private Optional<ValidationEvent> detectSensitiveTerms(String name, Shape shape) {
Optional<String> matchedTerm = wordMatcher.getFirstMatch(name);
if (matchedTerm.isPresent()) {
String message = shape.isMemberShape()
? String.format("This member possibly contains sensitive data but neither the enclosing nor target"
+ " shape are marked with the sensitive trait (based on the presence of '%s')", matchedTerm.get())
: String.format("This shape possibly contains sensitive data but is not marked "
+ "with the sensitive trait (based on the presence of '%s')", matchedTerm.get());
return Optional.of(warning(shape, message));
} else {
return Optional.empty();
}
}
}

View File

@ -18,6 +18,7 @@ package software.amazon.smithy.linters;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Predicate;
import software.amazon.smithy.utils.StringUtils;
@ -62,9 +63,9 @@ final class WordBoundaryMatcher implements Predicate<String> {
return false;
}
String searchString = searchCache.computeIfAbsent(text, WordBoundaryMatcher::splitWords);
String haystack = searchCache.computeIfAbsent(text, WordBoundaryMatcher::splitWords);
for (String needle : words) {
if (testWordMatch(needle, searchString)) {
if (testWordMatch(needle, haystack)) {
return true;
}
}
@ -72,6 +73,26 @@ final class WordBoundaryMatcher implements Predicate<String> {
return false;
}
/**
* Returns the first term that the input text matched.
* @param text the String within which to search for matches
* @return the first match found
*/
public Optional<String> getFirstMatch(String text) {
if (text == null || text.isEmpty() || words.isEmpty()) {
return Optional.empty();
}
String haystack = searchCache.computeIfAbsent(text, WordBoundaryMatcher::splitWords);
for (String needle : words) {
if (testWordMatch(needle, haystack)) {
return Optional.of(needle);
}
}
return Optional.empty();
}
private boolean testWordMatch(String needle, String haystack) {
int position = haystack.indexOf(needle);
int haystackLength = haystack.length();

View File

@ -9,3 +9,4 @@ software.amazon.smithy.linters.ReservedWordsValidator$Provider
software.amazon.smithy.linters.ShouldHaveUsedTimestampValidator$Provider
software.amazon.smithy.linters.StandardOperationVerbValidator$Provider
software.amazon.smithy.linters.StutteredShapeNameValidator$Provider
software.amazon.smithy.linters.MissingSensitiveTraitValidator$Provider

View File

@ -0,0 +1,6 @@
[WARNING] smithy.example#FooOperationRequest: This shape possibly contains sensitive data but is not marked with the sensitive trait (based on the presence of 'foo') | DefaultMissingSensitiveTrait
[WARNING] smithy.example#FooOperationRequest$secondMember: This member possibly contains sensitive data but neither the enclosing nor target shape are marked with the sensitive trait (based on the presence of 'second member') | DefaultMissingSensitiveTrait
[WARNING] smithy.example#FooOperationResponse: This shape possibly contains sensitive data but is not marked with the sensitive trait (based on the presence of 'foo') | DefaultMissingSensitiveTrait
[WARNING] smithy.example#MyString: This shape possibly contains sensitive data but is not marked with the sensitive trait (based on the presence of 'string') | DefaultMissingSensitiveTrait
[WARNING] smithy.example#BillingInfo$bank: This member possibly contains sensitive data but neither the enclosing nor target shape are marked with the sensitive trait (based on the presence of 'bank') | DefaultMissingSensitiveTrait
[WARNING] smithy.example#BillingInfo$data: This member possibly contains sensitive data but neither the enclosing nor target shape are marked with the sensitive trait (based on the presence of 'data') | DefaultMissingSensitiveTrait

View File

@ -0,0 +1,77 @@
$version: "2.0"
metadata validators = [{
name: "MissingSensitiveTrait",
id: "DefaultMissingSensitiveTrait",
configuration: {
excludeDefaults: true,
terms: [
"bank",
"foo",
"string",
"second member",
"bill inginfo",
"da ta"
]
}
}]
namespace smithy.example
service FooService {
version: "2020-09-21",
operations: [FooOperation],
}
operation FooOperation {
input: FooOperationRequest,
output: FooOperationResponse,
errors: [],
}
// should get flagged
structure FooOperationRequest {
firstMember: CabAnkle,
// should get flagged
secondMember: BillingInfo,
thirdMember: SafeBillingInfo
}
// should get flagged
structure FooOperationResponse {
}
structure CabAnkle {
myMember: MyString
}
//should not get flagged
structure BillingInfo {
// should get flagged
bank: MyString,
data: MyString,
// should not get flagged
safeBank: MySensitiveString,
firstName: FirstName,
// should not get flagged
lastName: LastName
}
@sensitive
structure SafeBillingInfo {
bank: MyString,
data: MyString,
safeBank: MySensitiveString,
firstName: MyString,
lastName: MySensitiveString
}
string MyString
@sensitive
string MySensitiveString
string FirstName
@sensitive
string LastName

View File

@ -0,0 +1,5 @@
[WARNING] smithy.example#BillingAddress: This shape possibly contains sensitive data but is not marked with the sensitive trait (based on the presence of 'billing address') | DefaultMissingSensitiveTrait
[WARNING] smithy.example#BillingAddress$bank: This member possibly contains sensitive data but neither the enclosing nor target shape are marked with the sensitive trait (based on the presence of 'bank') | DefaultMissingSensitiveTrait
[WARNING] smithy.example#BillingAddress$firstName: This member possibly contains sensitive data but neither the enclosing nor target shape are marked with the sensitive trait (based on the presence of 'first name') | DefaultMissingSensitiveTrait
[WARNING] smithy.example#FirstName: This shape possibly contains sensitive data but is not marked with the sensitive trait (based on the presence of 'first name') | DefaultMissingSensitiveTrait
[WARNING] smithy.example#CabAnkle$myBirthday: This member possibly contains sensitive data but neither the enclosing nor target shape are marked with the sensitive trait (based on the presence of 'birthday') | DefaultMissingSensitiveTrait

View File

@ -0,0 +1,65 @@
$version: "2.0"
metadata validators = [
{name: "MissingSensitiveTrait",
id: "DefaultMissingSensitiveTrait"}
]
namespace smithy.example
service FooService {
version: "2020-09-21",
operations: [FooOperation],
}
operation FooOperation {
input: FooOperationRequest,
output: FooOperationResponse,
errors: [],
}
structure FooOperationRequest {
firstMember: CabAnkle,
secondMember: BillingAddress,
thirdMember: SafeBillingAddress
}
structure FooOperationResponse {
}
structure CabAnkle {
myMember: MyString,
// should get flagged
myBirthday: MyString
}
// should get flagged
structure BillingAddress {
// should get flagged
bank: MyString,
data: MyString,
safeBank: MySensitiveString,
// should get flagged
firstName: FirstName,
lastName: LastName
}
@sensitive
structure SafeBillingAddress {
bank: MyString,
data: MyString,
safeBank: MySensitiveString,
firstName: MyString,
lastName: MySensitiveString
}
string MyString
@sensitive
string MySensitiveString
// should get flagged
string FirstName
@sensitive
string LastName

View File

@ -0,0 +1 @@
[ERROR] -: Error creating `MissingSensitiveTrait` validator: Cannot set 'excludeDefaults' to true and leave 'terms' unspecified. | Model

View File

@ -0,0 +1,12 @@
$version: "2.0"
metadata validators = [{
name: "MissingSensitiveTrait",
id: "DefaultMissingSensitiveTrait",
configuration: {
excludeDefaults: true,
terms: []
}
}]
namespace smithy.example