From 236e223f910248b2f308fa6c9fe9d4e2468829d9 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Mon, 9 Oct 2023 21:54:09 -0700 Subject: [PATCH 001/151] Skeleton draft of data_masking docs --- docs/utilities/data_masking.md | 101 +++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 docs/utilities/data_masking.md diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md new file mode 100644 index 00000000000..f5829feb438 --- /dev/null +++ b/docs/utilities/data_masking.md @@ -0,0 +1,101 @@ +--- +title: Data Masking +description: Utility +--- + + + +The data masking utility provides a simple solution to conceal incoming data so that sensitive information is not passed downstream or logged. + + +## Key features + +* Mask data irreversibly without having to install any encryption library. +* Out of the box integration with AWS Encryption SDK to easily encrypt and decrypt data. +* Install any encryption provider and connect it with our new Data Masker class to easily mask, encrypt, and decrypt data. + + +## Terminology + +Mask: This refers to concealing or partially replacing sensitive information with a non-sensitive placeholder or mask. The key characteristic of this operation is that it is irreversible, meaning the original sensitive data cannot be retrieved from the masked data. Masking is commonly applied when displaying data to users or for anonymizing data in non-reversible scenarios. + +Encrypt: This is the process of transforming plaintext data into a ciphertext format using an encryption algorithm and a cryptographic key. Encryption is a reversible process, meaning the original data can be retrieved (decrypted) using the appropriate decryption key. + +Decrypt: This is the process of reversing the encryption process, converting ciphertext back into plaintext using a decryption algorithm and the correct decryption key. Decryption is applied to recover the original data from its encrypted form. Decryption requires an encryption key that only authorized users have. + +## Getting started + +### IAM Permissions + +If using the AWS Encryption SDK, your Lambda function IAM Role must have `kms:Encrypt`, `kms:Decrypt` and `kms:GenerateDataKey` IAM permissions. + +If using any other encryption provider, make sure to have the permissions for your role that it requires. + +If not using any encryption services and just masking data, your Lambda does not need any additional permissions to use this utility. + + +### Required resources + +If using the AWS Encryption SDK, you must have a KMS key with Encrypt, Decrypt, and GenerateDataKey permissions. + +If using any other encryption provider, you must have the resources required for that provider. + + +### Masking data +You can mask data without having to install any encryption library. + +=== "getting_started_mask_data.py" + ```python hl_lines="3 10" + --8<-- "examples/data_masking/src/getting_started_mask_data.py" + ``` + +### Encryting and decrypting data +In order to encrypt data, you must use either our out-of-the-box integration with the AWS Encryption SDK, or install another encryption provider of your own. You can still use the masking feature while using any encryption provider. + +=== "getting_started_encrypt_data.py" + ```python hl_lines="3 10" + --8<-- "examples/data_masking/src/getting_started_encrypt_data.py" + ``` + +## Advanced + +### Adjusting configurations for AWS Encryption SDK + +### Create your own encryption provider + +You can create your own custom encryption provider by inheriting the `BaseProvider` class, and implementing both the `encrypt()` and `decrypt()` methods in order to encrypt and decrypt data using your custom encryption provider. You can also either use your own data serializer and deserializer by passing the `BaseProvider` class a `json_serializer` and `json_deserializer` argument, or you can use the default. + +All masking logic is handled by the `mask()` and methods from the `BaseProvider` class. + +Here is an examples of implementing a custom encryption using an external encryption library like [ItsDangerous](https://itsdangerous.palletsprojects.com/en/2.1.x/){target="_blank" rel="nofollow"}, a widely popular encryption library. + +=== "working_with_own_provider.py" + ```python hl_lines="5 13 20 24" + --8<-- "examples/data_masking/src/working_with_own_provider.py" + ``` + +=== "custom_provider.py" + ```python hl_lines="6 9 17 24" + --8<-- "examples/data_masking/src/custom_provider.py" + ``` + +## Testing your code + +For unit testing your applications, you can mock the calls to the data masking utility to avoid calling AWS APIs. This can be achieved in a number of ways - in this example, we use the pytest monkeypatch fixture to patch the `data_masking.encrypt` method. + +=== "test_single_mock.py" + ```python hl_lines="4 8" + --8<-- "examples/data_masking/tests/test_single_mock.py" + ``` + +=== "single_mock.py" + ```python + --8<-- "examples/data_masking/tests/src/single_mock.py" + ``` + +If we need to use this pattern across multiple tests, we can avoid repetition by refactoring to use our own pytest fixture: + +=== "test_with_fixture.py" + ```python hl_lines="5 10" + --8<-- "examples/data_masking/tests/test_with_fixture.py" + ``` \ No newline at end of file From 025fc77942df9bf0f8f81740a9df28a1e4558ab9 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Wed, 11 Oct 2023 11:25:55 -0700 Subject: [PATCH 002/151] Added example getting started code to data masking docs --- docs/utilities/data_masking.md | 29 +++-- examples/data_masking/src/custom_provider.py | 19 ++++ .../src/getting_started_encrypt_data.py | 104 ++++++++++++++++++ .../src/getting_started_mask_data.py | 45 ++++++++ .../src/working_with_own_provider.py | 102 +++++++++++++++++ .../data_masking/tests/src/single_mock.py | 0 .../data_masking/tests/test_single_mock.py | 0 .../data_masking/tests/test_with_fixture.py | 0 8 files changed, 289 insertions(+), 10 deletions(-) create mode 100644 examples/data_masking/src/custom_provider.py create mode 100644 examples/data_masking/src/getting_started_encrypt_data.py create mode 100644 examples/data_masking/src/getting_started_mask_data.py create mode 100644 examples/data_masking/src/working_with_own_provider.py create mode 100644 examples/data_masking/tests/src/single_mock.py create mode 100644 examples/data_masking/tests/test_single_mock.py create mode 100644 examples/data_masking/tests/test_with_fixture.py diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index f5829feb438..fcf2e8c199a 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -7,41 +7,40 @@ description: Utility The data masking utility provides a simple solution to conceal incoming data so that sensitive information is not passed downstream or logged. - ## Key features * Mask data irreversibly without having to install any encryption library. -* Out of the box integration with AWS Encryption SDK to easily encrypt and decrypt data. +* Out of the box integration with the [AWS Encryption SDK](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/introduction.html){target="_blank" rel="nofollow"} to easily encrypt and decrypt data. * Install any encryption provider and connect it with our new Data Masker class to easily mask, encrypt, and decrypt data. - ## Terminology -Mask: This refers to concealing or partially replacing sensitive information with a non-sensitive placeholder or mask. The key characteristic of this operation is that it is irreversible, meaning the original sensitive data cannot be retrieved from the masked data. Masking is commonly applied when displaying data to users or for anonymizing data in non-reversible scenarios. +Mask: This refers to concealing or partially replacing sensitive information with a non-sensitive placeholder or mask. The key characteristic of this operation is that it is irreversible, meaning the original sensitive data cannot be retrieved from the masked data. Masking is commonly applied when displaying data to users or for anonymizing data in non-reversible scenarios. For example, display the last four digits of a credit card number as "**** **** **** 1234". -Encrypt: This is the process of transforming plaintext data into a ciphertext format using an encryption algorithm and a cryptographic key. Encryption is a reversible process, meaning the original data can be retrieved (decrypted) using the appropriate decryption key. +Encrypt: This is the process of transforming plaintext data into a ciphertext format using an encryption algorithm and a cryptographic key. Encryption is a reversible process, meaning the original data can be retrieved (decrypted) using the appropriate decryption key. You can use this, for instance, to encrypt any PII (personally identifiable information) of your customers and make sure only the people with the right permissions are allowed to decrypt and view the plaintext PII data, in accordance with GDPR. -Decrypt: This is the process of reversing the encryption process, converting ciphertext back into plaintext using a decryption algorithm and the correct decryption key. Decryption is applied to recover the original data from its encrypted form. Decryption requires an encryption key that only authorized users have. +Decrypt: This is the process of reversing the encryption process, converting ciphertext back into its original plaintext using a decryption algorithm and the correct decryption key that only authorized personnel should have access to. ## Getting started ### IAM Permissions -If using the AWS Encryption SDK, your Lambda function IAM Role must have `kms:Encrypt`, `kms:Decrypt` and `kms:GenerateDataKey` IAM permissions. +If using the AWS Encryption SDK, your Lambda function IAM Role must have `kms:Encrypt`, `kms:Decrypt` and `kms:GenerateDataKey` IAM permissions. If using any other encryption provider, make sure to have the permissions for your role that it requires. If not using any encryption services and just masking data, your Lambda does not need any additional permissions to use this utility. - ### Required resources -If using the AWS Encryption SDK, you must have a KMS key with Encrypt, Decrypt, and GenerateDataKey permissions. +If using the AWS Encryption SDK, you must have an AWS KMS key with Encrypt, Decrypt, and GenerateDataKey permissions. You can create one and learn more on the [AWS KMS console](https://us-east-1.console.aws.amazon.com/kms/home?region=us-east-1#/kms/home){target="_blank" rel="nofollow"}. If using any other encryption provider, you must have the resources required for that provider. +## Using the utility ### Masking data + You can mask data without having to install any encryption library. === "getting_started_mask_data.py" @@ -50,6 +49,7 @@ You can mask data without having to install any encryption library. ``` ### Encryting and decrypting data + In order to encrypt data, you must use either our out-of-the-box integration with the AWS Encryption SDK, or install another encryption provider of your own. You can still use the masking feature while using any encryption provider. === "getting_started_encrypt_data.py" @@ -61,6 +61,15 @@ In order to encrypt data, you must use either our out-of-the-box integration wit ### Adjusting configurations for AWS Encryption SDK +You have the option to modify some of the configurations we have set as defaults when connecting to the AWS Encryption SDK. You can find and modify these values at `utilities/data_masking/constants.py`. + +The `CACHE_CAPACITY` value is currently set at `100`. This value represents the maximum number of entries that can be retained in the local cryptographic materials cache. Please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.caches.local.html){target="_blank" rel="nofollow"} for more information. + +The `MAX_CACHE_AGE_SECONDS` value is currently set at `300`. It represents the maximum time (in seconds) that a cache entry may be kept in the cache. + +The `MAX_MESSAGES_ENCRYPTED` value is currently set at `200`. It represents the maximum number of messages that may be encrypted under a cache entry. Please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.materials_managers.caching.html#module-aws_encryption_sdk.materials_managers.caching){target="_blank" rel="nofollow"} for more information about this and `MAX_CACHE_AGE_SECONDS`. + + ### Create your own encryption provider You can create your own custom encryption provider by inheriting the `BaseProvider` class, and implementing both the `encrypt()` and `decrypt()` methods in order to encrypt and decrypt data using your custom encryption provider. You can also either use your own data serializer and deserializer by passing the `BaseProvider` class a `json_serializer` and `json_deserializer` argument, or you can use the default. @@ -98,4 +107,4 @@ If we need to use this pattern across multiple tests, we can avoid repetition by === "test_with_fixture.py" ```python hl_lines="5 10" --8<-- "examples/data_masking/tests/test_with_fixture.py" - ``` \ No newline at end of file + ``` diff --git a/examples/data_masking/src/custom_provider.py b/examples/data_masking/src/custom_provider.py new file mode 100644 index 00000000000..e35fc0cbb50 --- /dev/null +++ b/examples/data_masking/src/custom_provider.py @@ -0,0 +1,19 @@ +from itsdangerous.url_safe import URLSafeSerializer + +from aws_lambda_powertools.utilities.data_masking.provider import BaseProvider + + +class MyCustomEncryption(BaseProvider): + def __init__(self, secret): + super().__init__() + self.secret = URLSafeSerializer(secret) + + def encrypt(self, data: str) -> str: + if data is None: + return data + return self.secret.dumps(data) + + def decrypt(self, data: str) -> str: + if data is None: + return data + return self.secret.loads(data) diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py new file mode 100644 index 00000000000..285f226e5e6 --- /dev/null +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -0,0 +1,104 @@ +from aws_lambda_powertools.utilities.data_masking import DataMasking +from aws_lambda_powertools.utilities.data_masking.constants import KMS_KEY_ARN +from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider + + +def lambda_handler(event, context: LambdaContext): + + data = { + "id": 1, + "name": "John Doe", + "age": 30, + "email": "johndoe@example.com", + "address": { + "street": "123 Main St", + "city": "Anytown", + "state": "CA", + "zip": "12345", + }, + } + + encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) + data_masker = DataMasking(provider=encryption_provider) + + encrypted = data_masker.encrypt(data=data, fields=["email", "address.street"]) + # encrypted = { + # "id": 1, + # "name": "John Doe", + # "age": 30, + # "email": "InRoaXMgaXMgYSBzdHJpbmciHsLZGx2na-XzP_TB5Bf2LNU1bLc", + # "address": { + # "street": "XMgYSB_KDddaDJYMb-JpbmGnagTklwQ-msdaDLP", + # "city": "Anytown", + # "state": "CA", + # "zip": "12345" + # }, + # } + + decrypted = data_masker.decrypt(data=encrypted, fields=["email", "address.street"]) + # decrypted = { + # "id": 1, + # "name": "John Doe", + # "age": 30, + # "email": "johndoe@example.com", + # "address": { + # "street": "123 Main St", + # "city": "Anytown", + # "state": "CA", + # "zip": "12345" + # }, + # } + + encrypted = data_masker.encrypt(data=data, fields=["email", "address"]) + # encrypted = { + # "id": 1, + # "name": "John Doe", + # "age": 30, + # "email": "InRoaXMgaXMgYSBzdHJpbmciHsLZGx2na-XzP_TB5Bf2LNU1bLc", + # "address": "XMgYSB_KDddaDJYMb-JpbmGnagTklwQ-msdaDLP" + # } + + decrypted = data_masker.decrypt(data=encrypted, fields=["email", "address"]) + # decrypted = { + # "id": 1, + # "name": "John Doe", + # "age": 30, + # "email": "johndoe@example.com", + # "address": { + # "street": "123 Main St", + # "city": "Anytown", + # "state": "CA", + # "zip": "12345" + # }, + # } + + encrypted = data_masker.encrypt(data=data) + # encrypted = "InRoaXMgaXMgYSBzdHJpbmciHsLZGx2na-XzP_TB5Bf2LNU1bLc" + + decrypted = data_masker.decrypt(data=encrypted) + # decrypted = { + # "id": 1, + # "name": "John Doe", + # "age": 30, + # "email": "johndoe@example.com", + # "address": { + # "street": "123 Main St", + # "city": "Anytown", + # "state": "CA", + # "zip": "12345" + # }, + # } + + masked = data_masker.mask(data=data, fields=["email", "address.street"]) + # masked = { + # "id": 1, + # "name": "John Doe", + # "age": 30, + # "email": "*****", + # "address": { + # "street": "*****", + # "city": "Anytown", + # "state": "CA", + # "zip": "12345" + # }, + # } diff --git a/examples/data_masking/src/getting_started_mask_data.py b/examples/data_masking/src/getting_started_mask_data.py new file mode 100644 index 00000000000..83ecdfc16c7 --- /dev/null +++ b/examples/data_masking/src/getting_started_mask_data.py @@ -0,0 +1,45 @@ +from aws_lambda_powertools.utilities.data_masking import DataMasking + + +def lambda_handler(event, context: LambdaContext): + + data_masker = DataMasking() + + data = { + "id": 1, + "name": "John Doe", + "age": 30, + "email": "johndoe@example.com", + "address": { + "street": "123 Main St", + "city": "Anytown", + "state": "CA", + "zip": "12345", + }, + } + + masked = data_masker.mask(data=data, fields=["email", "address.street"]) + # masked = { + # "id": 1, + # "name": "John Doe", + # "age": 30, + # "email": "*****", + # "address": { + # "street": "*****", + # "city": "Anytown", + # "state": "CA", + # "zip": "12345" + # }, + # } + + masked = data_masker.mask(data=data, fields=["address"]) + # masked = { + # "id": 1, + # "name": "John Doe", + # "age": 30, + # "email": "johndoe@example.com", + # "address": "*****" + # } + + masked = data_masker.mask(data=data) + # masked = "*****" diff --git a/examples/data_masking/src/working_with_own_provider.py b/examples/data_masking/src/working_with_own_provider.py new file mode 100644 index 00000000000..10e62fb13f0 --- /dev/null +++ b/examples/data_masking/src/working_with_own_provider.py @@ -0,0 +1,102 @@ +from aws_lambda_powertools.utilities.data_masking.base import DataMasking +from examples.data_masking.src.custom_provider import MyCustomEncryption + + +def lambda_handler(): + data = { + "id": 1, + "name": "John Doe", + "age": 30, + "email": "johndoe@example.com", + "address": { + "street": "123 Main St", + "city": "Anytown", + "state": "CA", + "zip": "12345", + }, + } + + encryption_provider = MyCustomEncryption(secret="secret-key") + data_masker = DataMasking(provider=encryption_provider) + + encrypted = data_masker.encrypt(data, fields=["email", "address.street"]) + # encrypted = { + # "id": 1, + # "name": "John Doe", + # "age": 30, + # "email": "InRoaXMgaXMgYSBzdHJpbmciHsLZGx2na-XzP_TB5Bf2LNU1bLc", + # "address": { + # "street": "XMgYSB_KDddaDJYMb-JpbmGnagTklwQ-msdaDLP", + # "city": "Anytown", + # "state": "CA", + # "zip": "12345" + # }, + # } + + decrypted = data_masker.decrypt(data=encrypted, fields=["email", "address.street"]) + # decrypted = { + # "id": 1, + # "name": "John Doe", + # "age": 30, + # "email": "johndoe@example.com", + # "address": { + # "street": "123 Main St", + # "city": "Anytown", + # "state": "CA", + # "zip": "12345" + # }, + # } + + encrypted = data_masker.encrypt(data=data, fields=["email", "address"]) + # encrypted = { + # "id": 1, + # "name": "John Doe", + # "age": 30, + # "email": "InRoaXMgaXMgYSBzdHJpbmciHsLZGx2na-XzP_TB5Bf2LNU1bLc", + # "address": "XMgYSB_KDddaDJYMb-JpbmGnagTklwQ-msdaDLP" + # } + + decrypted = data_masker.decrypt(data=encrypted, fields=["email", "address"]) + # decrypted = { + # "id": 1, + # "name": "John Doe", + # "age": 30, + # "email": "johndoe@example.com", + # "address": { + # "street": "123 Main St", + # "city": "Anytown", + # "state": "CA", + # "zip": "12345" + # }, + # } + + encrypted = data_masker.encrypt(data=data) + # encrypted = "InRoaXMgaXMgYSBzdHJpbmciHsLZGx2na-XzP_TB5Bf2LNU1bLc" + + decrypted = data_masker.decrypt(data=encrypted) + # decrypted = { + # "id": 1, + # "name": "John Doe", + # "age": 30, + # "email": "johndoe@example.com", + # "address": { + # "street": "123 Main St", + # "city": "Anytown", + # "state": "CA", + # "zip": "12345" + # }, + # } + + masked = data_masker.mask(data=data, fields=["email", "address.street"]) + # masked = { + # "id": 1, + # "name": "John Doe", + # "age": 30, + # "email": "*****", + # "address": { + # "street": "*****", + # "city": "Anytown", + # "state": "CA", + # "zip": "12345" + # }, + # } diff --git a/examples/data_masking/tests/src/single_mock.py b/examples/data_masking/tests/src/single_mock.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/examples/data_masking/tests/test_single_mock.py b/examples/data_masking/tests/test_single_mock.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/examples/data_masking/tests/test_with_fixture.py b/examples/data_masking/tests/test_with_fixture.py new file mode 100644 index 00000000000..e69de29bb2d From 0c0c4acd813942de28eae39451ec951ee67e257b Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Wed, 11 Oct 2023 14:23:27 -0700 Subject: [PATCH 003/151] Added SAM template example and fixed KMS permission info --- docs/utilities/data_masking.md | 4 +- examples/data_masking/sam/template.yaml | 57 +++++++++++++++++++ examples/data_masking/src/app.py | 50 ++++++++++++++++ .../src/getting_started_encrypt_data.py | 6 +- .../pt-load-test-stack/template.yaml | 12 +++- 5 files changed, 122 insertions(+), 7 deletions(-) create mode 100644 examples/data_masking/sam/template.yaml create mode 100644 examples/data_masking/src/app.py diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index fcf2e8c199a..5006e0691a2 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -25,7 +25,7 @@ Decrypt: This is the process of reversing the encryption process, converting cip ### IAM Permissions -If using the AWS Encryption SDK, your Lambda function IAM Role must have `kms:Encrypt`, `kms:Decrypt` and `kms:GenerateDataKey` IAM permissions. +If using the AWS Encryption SDK, your Lambda function IAM Role must have `kms:Decrypt` and `kms:GenerateDataKey` IAM permissions. If using any other encryption provider, make sure to have the permissions for your role that it requires. @@ -33,7 +33,7 @@ If not using any encryption services and just masking data, your Lambda does not ### Required resources -If using the AWS Encryption SDK, you must have an AWS KMS key with Encrypt, Decrypt, and GenerateDataKey permissions. You can create one and learn more on the [AWS KMS console](https://us-east-1.console.aws.amazon.com/kms/home?region=us-east-1#/kms/home){target="_blank" rel="nofollow"}. +If using the AWS Encryption SDK, you must have an AWS KMS key with full read/write permissions. You can create one and learn more on the [AWS KMS console](https://us-east-1.console.aws.amazon.com/kms/home?region=us-east-1#/kms/home){target="_blank" rel="nofollow"}. If using any other encryption provider, you must have the resources required for that provider. diff --git a/examples/data_masking/sam/template.yaml b/examples/data_masking/sam/template.yaml new file mode 100644 index 00000000000..716c510486a --- /dev/null +++ b/examples/data_masking/sam/template.yaml @@ -0,0 +1,57 @@ +AWSTemplateFormatVersion: '2010-09-09' +Transform: AWS::Serverless-2016-10-31 +Description: > + Powertools for AWS Lambda (Python) data masking example + +Globals: # https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-specification-template-anatomy-globals.html + Function: + Timeout: 5 + Runtime: python3.10 + Tracing: Active +Resources: + MyKMSKey: + Type: AWS::KMS::Key + Properties: + Enabled: true + KeyPolicy: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: kms:* + Resource: "*" + Principal: + AWS: !Join [ "", [ "arn:aws:iam::", !Ref "AWS::AccountId", ":root" ] ] + DataMaskingFunctionExample: + Type: AWS::Serverless::Function # More info about Function Resource: https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-resource-function.html + Properties: + Handler: app.lambda_handler + CodeUri: ../src + Description: Data Masking Function Example + MemorySize: 1024 # TODO: Recommended to use 1024 MB due to + Architectures: + - x86_64 + Policies: + Statement: + - Effect: Allow + Action: + - kms:Decrypt + - kms:GenerateDataKey + Resource: !GetAtt MyKMSKey.Arn + Tracing: Active + Environment: + Variables: + POWERTOOLS_SERVICE_NAME: PowertoolsHelloWorld + POWERTOOLS_METRICS_NAMESPACE: Powertools + LOG_LEVEL: INFO + KMS_KEY_ARN: !GetAtt MyKMSKey.Arn + Tags: + LambdaPowertools: python + +Outputs: + KMSKeyArn: + Description: ARN of the KMS Key + Value: !GetAtt MyKMSKey.Arn + + DataMaskingFunctionExample: + Description: Data Masking Function Example + Value: !GetAtt DataMaskingFunctionExample.Arn diff --git a/examples/data_masking/src/app.py b/examples/data_masking/src/app.py new file mode 100644 index 00000000000..83fb2637130 --- /dev/null +++ b/examples/data_masking/src/app.py @@ -0,0 +1,50 @@ +import os + +from aws_lambda_powertools import Logger, Tracer +from aws_lambda_powertools.utilities.data_masking import DataMasking +from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +from aws_lambda_powertools.utilities.typing import LambdaContext + +KMS_KEY_ARN = os.environ["KMS_KEY_ARN"] + +json_blob = { + "id": 1, + "name": "John Doe", + "age": 30, + "email": "johndoe@example.com", + "address": {"street": "123 Main St", "city": "Anytown", "state": "CA", "zip": "12345"}, + "phone_numbers": ["+1-555-555-1234", "+1-555-555-5678"], + "interests": ["Hiking", "Traveling", "Photography", "Reading"], + "job_history": { + "company": { + "company_name": "Acme Inc.", + "company_address": "5678 Interview Dr.", + }, + "position": "Software Engineer", + "start_date": "2015-01-01", + "end_date": "2017-12-31", + }, + "about_me": """ + Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla tincidunt velit quis + sapien mollis, at egestas massa tincidunt. Suspendisse ultrices arcu a dolor dapibus, + ut pretium turpis volutpat. Vestibulum at sapien quis sapien dignissim volutpat ut a enim. + Praesent fringilla sem eu dui convallis luctus. Donec ullamcorper, sapien ut convallis congue, + risus mauris pretium tortor, nec dignissim arcu urna a nisl. Vivamus non fermentum ex. Proin + interdum nisi id sagittis egestas. Nam sit amet nisi nec quam pharetra sagittis. Aliquam erat + volutpat. Donec nec luctus sem, nec ornare lorem. Vivamus vitae orci quis enim faucibus placerat. + Nulla facilisi. Proin in turpis orci. Donec imperdiet velit ac tellus gravida, eget laoreet tellus + malesuada. Praesent venenatis tellus ac urna blandit, at varius felis posuere. Integer a commodo nunc. + """, +} + +tracer = Tracer() +logger = Logger() + + +@tracer.capture_lambda_handler +def lambda_handler(event: dict, context: LambdaContext) -> dict: + logger.info("Hello world function - HTTP 200") + data_masker = DataMasking(provider=AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN])) + encrypted = data_masker.encrypt(json_blob, fields=["address.street", "job_history.company.company_name"]) + decrypted = data_masker.decrypt(encrypted, fields=["address.street", "job_history.company.company_name"]) + return {"Decrypted_json": decrypted} diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py index 285f226e5e6..981269d9b63 100644 --- a/examples/data_masking/src/getting_started_encrypt_data.py +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -1,9 +1,11 @@ +import os + from aws_lambda_powertools.utilities.data_masking import DataMasking -from aws_lambda_powertools.utilities.data_masking.constants import KMS_KEY_ARN from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +KMS_KEY_ARN = os.environ["KMS_KEY_ARN"] -def lambda_handler(event, context: LambdaContext): +def lambda_handler(event, context): data = { "id": 1, diff --git a/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/template.yaml b/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/template.yaml index f2a6540c267..184192c7a5d 100644 --- a/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/template.yaml +++ b/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/template.yaml @@ -38,7 +38,9 @@ Resources: Policies: Statement: - Effect: Allow - Action: kms:* + Action: + - kms:Decrypt + - kms:GenerateDataKey Resource: !GetAtt MyKMSKey.Arn Tracing: Active Events: @@ -68,7 +70,9 @@ Resources: Policies: Statement: - Effect: Allow - Action: kms:* + Action: + - kms:Decrypt + - kms:GenerateDataKey Resource: !GetAtt MyKMSKey.Arn Tracing: Active Events: @@ -98,7 +102,9 @@ Resources: Policies: Statement: - Effect: Allow - Action: kms:* + Action: + - kms:Decrypt + - kms:GenerateDataKey Resource: !GetAtt MyKMSKey.Arn Tracing: Active Events: From 0112f69e80764df8660142976509f3280a7f00fb Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Wed, 11 Oct 2023 14:29:23 -0700 Subject: [PATCH 004/151] Added clearer file names --- examples/data_masking/sam/template.yaml | 2 +- ...pp.py => data_masking_function_example.py} | 0 .../data_masking_function_example_output.json | 34 +++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) rename examples/data_masking/src/{app.py => data_masking_function_example.py} (100%) create mode 100644 examples/data_masking/src/data_masking_function_example_output.json diff --git a/examples/data_masking/sam/template.yaml b/examples/data_masking/sam/template.yaml index 716c510486a..e8d5f5718c1 100644 --- a/examples/data_masking/sam/template.yaml +++ b/examples/data_masking/sam/template.yaml @@ -24,7 +24,7 @@ Resources: DataMaskingFunctionExample: Type: AWS::Serverless::Function # More info about Function Resource: https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-resource-function.html Properties: - Handler: app.lambda_handler + Handler: data_masking_function_example.lambda_handler CodeUri: ../src Description: Data Masking Function Example MemorySize: 1024 # TODO: Recommended to use 1024 MB due to diff --git a/examples/data_masking/src/app.py b/examples/data_masking/src/data_masking_function_example.py similarity index 100% rename from examples/data_masking/src/app.py rename to examples/data_masking/src/data_masking_function_example.py diff --git a/examples/data_masking/src/data_masking_function_example_output.json b/examples/data_masking/src/data_masking_function_example_output.json new file mode 100644 index 00000000000..87601e79ee4 --- /dev/null +++ b/examples/data_masking/src/data_masking_function_example_output.json @@ -0,0 +1,34 @@ +{ + "Decrypted_json": { + "id": 1, + "name": "John Doe", + "age": 30, + "email": "johndoe@example.com", + "address": { + "street": "123 Main St", + "city": "Anytown", + "state": "CA", + "zip": "12345" + }, + "phone_numbers": [ + "+1-555-555-1234", + "+1-555-555-5678" + ], + "interests": [ + "Hiking", + "Traveling", + "Photography", + "Reading" + ], + "job_history": { + "company": { + "company_name": "Acme Inc.", + "company_address": "5678 Interview Dr." + }, + "position": "Software Engineer", + "start_date": "2015-01-01", + "end_date": "2017-12-31" + }, + "about_me": "\n Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla tincidunt velit quis\n sapien mollis, at egestas massa tincidunt. Suspendisse ultrices arcu a dolor dapibus,\n ut pretium turpis volutpat. Vestibulum at sapien quis sapien dignissim volutpat ut a enim.\n Praesent fringilla sem eu dui convallis luctus. Donec ullamcorper, sapien ut convallis congue,\n risus mauris pretium tortor, nec dignissim arcu urna a nisl. Vivamus non fermentum ex. Proin\n interdum nisi id sagittis egestas. Nam sit amet nisi nec quam pharetra sagittis. Aliquam erat\n volutpat. Donec nec luctus sem, nec ornare lorem. Vivamus vitae orci quis enim faucibus placerat.\n Nulla facilisi. Proin in turpis orci. Donec imperdiet velit ac tellus gravida, eget laoreet tellus\n malesuada. Praesent venenatis tellus ac urna blandit, at varius felis posuere. Integer a commodo nunc.\n " + } + } \ No newline at end of file From a45a4e39366848b3a97678a4f6ac20bd7c5b67f3 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Wed, 11 Oct 2023 22:46:45 -0700 Subject: [PATCH 005/151] Add testing your code example --- .../data_masking/tests/src/single_mock.py | 42 +++++++++++++++++++ .../data_masking/tests/test_single_mock.py | 10 +++++ .../data_masking/tests/test_with_fixture.py | 16 +++++++ 3 files changed, 68 insertions(+) diff --git a/examples/data_masking/tests/src/single_mock.py b/examples/data_masking/tests/src/single_mock.py index e69de29bb2d..6c31f3dbbd2 100644 --- a/examples/data_masking/tests/src/single_mock.py +++ b/examples/data_masking/tests/src/single_mock.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import base64 +from typing import Any, Callable, Dict, Union + +from aws_lambda_powertools.utilities.data_masking.provider import BaseProvider +from aws_lambda_powertools.utilities.data_masking.base import DataMasking +from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider + +class FakeEncryptionKeyProvider(BaseProvider): + def __init__( + self, + json_serializer: Callable[[Dict], str] | None = None, + json_deserializer: Callable[[Union[Dict, str, bool, int, float]], str] | None = None, + ): + super().__init__(json_serializer=json_serializer, json_deserializer=json_deserializer) + + def encrypt(self, data: bytes | str, **kwargs) -> str: + data = self.json_serializer(data) + ciphertext = base64.b64encode(data).decode() + return ciphertext + + def decrypt(self, data: bytes, **kwargs) -> Any: + ciphertext_decoded = base64.b64decode(data) + ciphertext = self.json_deserializer(ciphertext_decoded) + return ciphertext + + +def handler(event, context): + data = "mock_value" + + fake_key_provider = FakeEncryptionKeyProvider() + provider = AwsEncryptionSdkProvider( + keys=["dummy"], + key_provider=fake_key_provider, + ) + data_masker = DataMasking(provider=provider) + + encrypted = data_masker.encrypt(data=data) + decrypted = data_masker.decrypt(data=encrypted) + + return {"message": "mock_value"} diff --git a/examples/data_masking/tests/test_single_mock.py b/examples/data_masking/tests/test_single_mock.py index e69de29bb2d..46265309c25 100644 --- a/examples/data_masking/tests/test_single_mock.py +++ b/examples/data_masking/tests/test_single_mock.py @@ -0,0 +1,10 @@ +from src import single_mock + + +def test_handler(monkeypatch): + def mockreturn(name): + return "mock_value" + + monkeypatch.setattr(single_mock.DataMasking, "decrypt", mockreturn) + return_val = single_mock.handler({}, {}) + assert return_val.get("message") == "mock_value" diff --git a/examples/data_masking/tests/test_with_fixture.py b/examples/data_masking/tests/test_with_fixture.py index e69de29bb2d..57c39b0af39 100644 --- a/examples/data_masking/tests/test_with_fixture.py +++ b/examples/data_masking/tests/test_with_fixture.py @@ -0,0 +1,16 @@ +import pytest +from src import single_mock + + +@pytest.fixture +def mock_data_masking_response(monkeypatch): + def mockreturn(name): + return "mock_value" + + monkeypatch.setattr(single_mock.DataMasking, "decrypt", mockreturn) + + +# Pass our fixture as an argument to all tests where we want to mock the decrypt response +def test_handler(mock_data_masking_response): + return_val = single_mock.handler({}, {}) + assert return_val.get("message") == "mock_value" From 7b1645dec862621a0505b10ac6a985da050375f9 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Thu, 12 Oct 2023 13:44:47 -0700 Subject: [PATCH 006/151] Added diagram and fixed highlighting in code examples --- docs/utilities/data_masking.md | 43 +++++++++++++++---- .../data_masking/tests/src/single_mock.py | 3 +- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 5006e0691a2..dd20c663765 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -44,7 +44,7 @@ If using any other encryption provider, you must have the resources required for You can mask data without having to install any encryption library. === "getting_started_mask_data.py" - ```python hl_lines="3 10" + ```python hl_lines="1 6 21 35 44" --8<-- "examples/data_masking/src/getting_started_mask_data.py" ``` @@ -53,7 +53,7 @@ You can mask data without having to install any encryption library. In order to encrypt data, you must use either our out-of-the-box integration with the AWS Encryption SDK, or install another encryption provider of your own. You can still use the masking feature while using any encryption provider. === "getting_started_encrypt_data.py" - ```python hl_lines="3 10" + ```python hl_lines="3-4 6 23-26 40 54 63 77 80 94" --8<-- "examples/data_masking/src/getting_started_encrypt_data.py" ``` @@ -72,25 +72,52 @@ The `MAX_MESSAGES_ENCRYPTED` value is currently set at `200`. It represents the ### Create your own encryption provider -You can create your own custom encryption provider by inheriting the `BaseProvider` class, and implementing both the `encrypt()` and `decrypt()` methods in order to encrypt and decrypt data using your custom encryption provider. You can also either use your own data serializer and deserializer by passing the `BaseProvider` class a `json_serializer` and `json_deserializer` argument, or you can use the default. +You can create your own custom encryption provider by inheriting the `BaseProvider` class, and implementing the `encrypt()` and `decrypt()` methods, and optionally the `mask()` method. You can also either use your own data serializer and deserializer by passing the `BaseProvider` class a `json_serializer` and `json_deserializer` argument, or you can use the default. -All masking logic is handled by the `mask()` and methods from the `BaseProvider` class. + +
+```mermaid +classDiagram + direction LR + class BaseProvider { + <> + +encrypt(data: Any) + +decrypt(data: str) + +mask(data: Any) + } -Here is an examples of implementing a custom encryption using an external encryption library like [ItsDangerous](https://itsdangerous.palletsprojects.com/en/2.1.x/){target="_blank" rel="nofollow"}, a widely popular encryption library. + class YourCustomEncryptionProvider { + +encrypt(data: Any) + +decrypt(data: str) + +mask(data: Any) + } + + BaseProvider <|-- YourCustomEncryptionProvider : implement +``` +Visual representation to bring your own encryption provider +
+ +* **`encrypt()`** – handles all logic for how to encrypt any data +* **`decrypt()`** – handles all logic for how to decrypt encrypted data +* **`mask()`** – handles all logic for how to irreversably mask data (optional) + +You can then use this custom encryption provider class as the `provider` argument when creating a new `DataMasking` instance to use the encryption and decryption algorithms of the encryption library you have chosen. + +Here is an example of implementing a custom encryption using an external encryption library like [ItsDangerous](https://itsdangerous.palletsprojects.com/en/2.1.x/){target="_blank" rel="nofollow"}, a widely popular encryption library. === "working_with_own_provider.py" - ```python hl_lines="5 13 20 24" + ```python hl_lines="1-2 19-22 36 50 59 73 76 90" --8<-- "examples/data_masking/src/working_with_own_provider.py" ``` === "custom_provider.py" - ```python hl_lines="6 9 17 24" + ```python hl_lines="1 3 6 8 11 16" --8<-- "examples/data_masking/src/custom_provider.py" ``` ## Testing your code -For unit testing your applications, you can mock the calls to the data masking utility to avoid calling AWS APIs. This can be achieved in a number of ways - in this example, we use the pytest monkeypatch fixture to patch the `data_masking.encrypt` method. +For unit testing your applications, you can mock the calls to the data masking utility to avoid calling AWS APIs. This can be achieved in a number of ways - in this example, we use the pytest monkeypatch fixture to patch the `data_masking.decrypt` method. === "test_single_mock.py" ```python hl_lines="4 8" diff --git a/examples/data_masking/tests/src/single_mock.py b/examples/data_masking/tests/src/single_mock.py index 6c31f3dbbd2..a6dcf2adb65 100644 --- a/examples/data_masking/tests/src/single_mock.py +++ b/examples/data_masking/tests/src/single_mock.py @@ -3,10 +3,11 @@ import base64 from typing import Any, Callable, Dict, Union -from aws_lambda_powertools.utilities.data_masking.provider import BaseProvider from aws_lambda_powertools.utilities.data_masking.base import DataMasking +from aws_lambda_powertools.utilities.data_masking.provider import BaseProvider from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider + class FakeEncryptionKeyProvider(BaseProvider): def __init__( self, From 3c83aa0c406917bf5ba20a939096a6211e7ae9fb Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Thu, 12 Oct 2023 14:05:20 -0700 Subject: [PATCH 007/151] Added SAM template section to md file --- docs/utilities/data_masking.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index dd20c663765..9b07ec915ec 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -57,6 +57,22 @@ In order to encrypt data, you must use either our out-of-the-box integration wit --8<-- "examples/data_masking/src/getting_started_encrypt_data.py" ``` +### SAM template example +=== "template.yaml" + ```yaml hl_lines="11-23 30 33-39 46" + --8<-- "examples/data_masking/sam/template.yaml" + ``` + +=== "data_masking_function_example.py" + ```python hl_lines="8 47-50" + --8<-- "examples/data_masking/src/data_masking_function_example.py" + ``` + +=== "output.json" + ```json + --8<-- "examples/data_masking/src/data_masking_function_example_output.json" + ``` + ## Advanced ### Adjusting configurations for AWS Encryption SDK From 3988f10f14b1dc1e2586f9cb9750c2016f6efe5e Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Wed, 8 Nov 2023 14:32:42 -0800 Subject: [PATCH 008/151] Separated examples into more tabs, fixed comments --- docs/utilities/data_masking.md | 51 ++++++++--- examples/data_masking/sam/template.yaml | 2 +- .../data_masking/src/decrypt_data_output.json | 18 ++++ .../data_masking/src/encrypt_data_output.json | 13 +++ .../src/getting_started_encrypt_data.py | 88 ++----------------- .../src/getting_started_mask_data.py | 34 ++----- .../data_masking/src/mask_data_output.json | 13 +++ .../src/working_with_own_provider.py | 88 ++----------------- .../data_masking/tests/src/single_mock.py | 2 +- mkdocs.yml | 1 + 10 files changed, 111 insertions(+), 199 deletions(-) create mode 100644 examples/data_masking/src/decrypt_data_output.json create mode 100644 examples/data_masking/src/encrypt_data_output.json create mode 100644 examples/data_masking/src/mask_data_output.json diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 9b07ec915ec..82e50b4a370 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -15,48 +15,67 @@ The data masking utility provides a simple solution to conceal incoming data so ## Terminology -Mask: This refers to concealing or partially replacing sensitive information with a non-sensitive placeholder or mask. The key characteristic of this operation is that it is irreversible, meaning the original sensitive data cannot be retrieved from the masked data. Masking is commonly applied when displaying data to users or for anonymizing data in non-reversible scenarios. For example, display the last four digits of a credit card number as "**** **** **** 1234". +**Mask**: This refers to concealing or partially replacing sensitive information with a non-sensitive placeholder or mask. The key characteristic of this operation is that it is irreversible, meaning the original sensitive data cannot be retrieved from the masked data. Masking is commonly applied when displaying data to users or for anonymizing data in non-reversible scenarios. For example, display the last four digits of a credit card number as "**** **** **** 1234". -Encrypt: This is the process of transforming plaintext data into a ciphertext format using an encryption algorithm and a cryptographic key. Encryption is a reversible process, meaning the original data can be retrieved (decrypted) using the appropriate decryption key. You can use this, for instance, to encrypt any PII (personally identifiable information) of your customers and make sure only the people with the right permissions are allowed to decrypt and view the plaintext PII data, in accordance with GDPR. +**Encrypt**: This is the process of transforming plaintext data into a ciphertext format using an encryption algorithm and a cryptographic key. Encryption is a reversible process, meaning the original data can be retrieved (decrypted) using the appropriate decryption key. You can use this, for instance, to encrypt any PII (personally identifiable information) of your customers and make sure only the people with the right permissions are allowed to decrypt and view the plaintext PII data, in accordance with GDPR. -Decrypt: This is the process of reversing the encryption process, converting ciphertext back into its original plaintext using a decryption algorithm and the correct decryption key that only authorized personnel should have access to. +**Decrypt**: This is the process of reversing the encryption process, converting ciphertext back into its original plaintext using a decryption algorithm and the correct decryption key that only authorized personnel should have access to. ## Getting started ### IAM Permissions -If using the AWS Encryption SDK, your Lambda function IAM Role must have `kms:Decrypt` and `kms:GenerateDataKey` IAM permissions. +To use the AWS Encryption SDK, your Lambda function IAM Role must have `kms:Decrypt` and `kms:GenerateDataKey` IAM permissions. -If using any other encryption provider, make sure to have the permissions for your role that it requires. +For any other encryption provider, make sure to have the permissions for your role that it requires. -If not using any encryption services and just masking data, your Lambda does not need any additional permissions to use this utility. +If not using any encryption services and only masking data, your Lambda does not need any additional permissions to use this utility. ### Required resources -If using the AWS Encryption SDK, you must have an AWS KMS key with full read/write permissions. You can create one and learn more on the [AWS KMS console](https://us-east-1.console.aws.amazon.com/kms/home?region=us-east-1#/kms/home){target="_blank" rel="nofollow"}. +To use the AWS Encryption SDK, you must have an AWS KMS key with full read/write permissions. You can create one and learn more on the [AWS KMS console](https://us-east-1.console.aws.amazon.com/kms/home?region=us-east-1#/kms/home){target="_blank" rel="nofollow"}. -If using any other encryption provider, you must have the resources required for that provider. +For any other encryption provider, you must have the resources required for that provider. ## Using the utility +#### Working with JSON +When using the data masking utility with dictionaries or JSON objects, you can provide a list of keys to conceal the corresponding values. If no fields are provided, the entire data object will be masked or encrypted. You can conceal values of nested keys by using dot notation. + ### Masking data You can mask data without having to install any encryption library. === "getting_started_mask_data.py" - ```python hl_lines="1 6 21 35 44" + ```python hl_lines="1 6 27" --8<-- "examples/data_masking/src/getting_started_mask_data.py" ``` +=== "output.json" + ```json hl_lines="5 7 12" + --8<-- "examples/data_masking/src/mask_data_output.json" + ``` + ### Encryting and decrypting data In order to encrypt data, you must use either our out-of-the-box integration with the AWS Encryption SDK, or install another encryption provider of your own. You can still use the masking feature while using any encryption provider. === "getting_started_encrypt_data.py" - ```python hl_lines="3-4 6 23-26 40 54 63 77 80 94" + ```python hl_lines="3-4 6 29 32 34" --8<-- "examples/data_masking/src/getting_started_encrypt_data.py" ``` +=== "encrypted_output.json" + ```json hl_lines="5-7 12" + --8<-- "examples/data_masking/src/encrypt_data_output.json" + ``` + +=== "decrypted_output.json" + ```json hl_lines="5-7 12-17" + --8<-- "examples/data_masking/src/decrypt_data_output.json" + ``` + + ### SAM template example === "template.yaml" ```yaml hl_lines="11-23 30 33-39 46" @@ -122,7 +141,7 @@ You can then use this custom encryption provider class as the `provider` argumen Here is an example of implementing a custom encryption using an external encryption library like [ItsDangerous](https://itsdangerous.palletsprojects.com/en/2.1.x/){target="_blank" rel="nofollow"}, a widely popular encryption library. === "working_with_own_provider.py" - ```python hl_lines="1-2 19-22 36 50 59 73 76 90" + ```python hl_lines="1-2 25 28 30" --8<-- "examples/data_masking/src/working_with_own_provider.py" ``` @@ -131,6 +150,16 @@ Here is an example of implementing a custom encryption using an external encrypt --8<-- "examples/data_masking/src/custom_provider.py" ``` +=== "encrypted_output.json" + ```json hl_lines="5-7 12" + --8<-- "examples/data_masking/src/encrypt_data_output.json" + ``` + +=== "decrypted_output.json" + ```json hl_lines="5-7 12-17" + --8<-- "examples/data_masking/src/decrypt_data_output.json" + ``` + ## Testing your code For unit testing your applications, you can mock the calls to the data masking utility to avoid calling AWS APIs. This can be achieved in a number of ways - in this example, we use the pytest monkeypatch fixture to patch the `data_masking.decrypt` method. diff --git a/examples/data_masking/sam/template.yaml b/examples/data_masking/sam/template.yaml index e8d5f5718c1..d849a0760cd 100644 --- a/examples/data_masking/sam/template.yaml +++ b/examples/data_masking/sam/template.yaml @@ -27,7 +27,7 @@ Resources: Handler: data_masking_function_example.lambda_handler CodeUri: ../src Description: Data Masking Function Example - MemorySize: 1024 # TODO: Recommended to use 1024 MB due to + MemorySize: 128 Architectures: - x86_64 Policies: diff --git a/examples/data_masking/src/decrypt_data_output.json b/examples/data_masking/src/decrypt_data_output.json new file mode 100644 index 00000000000..7871a0416e7 --- /dev/null +++ b/examples/data_masking/src/decrypt_data_output.json @@ -0,0 +1,18 @@ +{ + "id": 1, + "name": "John Doe", + "age": 30, + "email": "johndoe@example.com", + "address": { + "street": "123 Main St", + "city": "Anytown", + "state": "CA", + "zip": "12345" + }, + "company_address": { + "street": "456 ACME Ave", + "city": "Anytown", + "state": "CA", + "zip": "12345" + } +} \ No newline at end of file diff --git a/examples/data_masking/src/encrypt_data_output.json b/examples/data_masking/src/encrypt_data_output.json new file mode 100644 index 00000000000..cdd85e08701 --- /dev/null +++ b/examples/data_masking/src/encrypt_data_output.json @@ -0,0 +1,13 @@ +{ + "id": 1, + "name": "John Doe", + "age": 30, + "email": "InRoaXMgaXMgYSBzdHJpbmciHsLZGx2na-XzP_TB5Bf2LNU1bLc", + "address": { + "street": "XMgYSB_KDddaDJYMb-JpbmGnagTklwQ-msdaDLP", + "city": "Anytown", + "state": "CA", + "zip": "12345" + }, + "company_address": "B_KDddaDJYMb-93daSFGmnrtepytrejPNVXX98" +} \ No newline at end of file diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py index 981269d9b63..c8a7c156b1e 100644 --- a/examples/data_masking/src/getting_started_encrypt_data.py +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -18,89 +18,17 @@ def lambda_handler(event, context): "state": "CA", "zip": "12345", }, + "company_address": { + "street": "456 ACME Ave", + "city": "Anytown", + "state": "CA", + "zip": "12345", + }, } encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) data_masker = DataMasking(provider=encryption_provider) - encrypted = data_masker.encrypt(data=data, fields=["email", "address.street"]) - # encrypted = { - # "id": 1, - # "name": "John Doe", - # "age": 30, - # "email": "InRoaXMgaXMgYSBzdHJpbmciHsLZGx2na-XzP_TB5Bf2LNU1bLc", - # "address": { - # "street": "XMgYSB_KDddaDJYMb-JpbmGnagTklwQ-msdaDLP", - # "city": "Anytown", - # "state": "CA", - # "zip": "12345" - # }, - # } - - decrypted = data_masker.decrypt(data=encrypted, fields=["email", "address.street"]) - # decrypted = { - # "id": 1, - # "name": "John Doe", - # "age": 30, - # "email": "johndoe@example.com", - # "address": { - # "street": "123 Main St", - # "city": "Anytown", - # "state": "CA", - # "zip": "12345" - # }, - # } - - encrypted = data_masker.encrypt(data=data, fields=["email", "address"]) - # encrypted = { - # "id": 1, - # "name": "John Doe", - # "age": 30, - # "email": "InRoaXMgaXMgYSBzdHJpbmciHsLZGx2na-XzP_TB5Bf2LNU1bLc", - # "address": "XMgYSB_KDddaDJYMb-JpbmGnagTklwQ-msdaDLP" - # } - - decrypted = data_masker.decrypt(data=encrypted, fields=["email", "address"]) - # decrypted = { - # "id": 1, - # "name": "John Doe", - # "age": 30, - # "email": "johndoe@example.com", - # "address": { - # "street": "123 Main St", - # "city": "Anytown", - # "state": "CA", - # "zip": "12345" - # }, - # } - - encrypted = data_masker.encrypt(data=data) - # encrypted = "InRoaXMgaXMgYSBzdHJpbmciHsLZGx2na-XzP_TB5Bf2LNU1bLc" - - decrypted = data_masker.decrypt(data=encrypted) - # decrypted = { - # "id": 1, - # "name": "John Doe", - # "age": 30, - # "email": "johndoe@example.com", - # "address": { - # "street": "123 Main St", - # "city": "Anytown", - # "state": "CA", - # "zip": "12345" - # }, - # } + encrypted = data_masker.encrypt(data=data, fields=["email", "address.street", "company_address"]) - masked = data_masker.mask(data=data, fields=["email", "address.street"]) - # masked = { - # "id": 1, - # "name": "John Doe", - # "age": 30, - # "email": "*****", - # "address": { - # "street": "*****", - # "city": "Anytown", - # "state": "CA", - # "zip": "12345" - # }, - # } + data_masker.decrypt(data=encrypted, fields=["email", "address.street", "company_address"]) diff --git a/examples/data_masking/src/getting_started_mask_data.py b/examples/data_masking/src/getting_started_mask_data.py index 83ecdfc16c7..f8fcafe5509 100644 --- a/examples/data_masking/src/getting_started_mask_data.py +++ b/examples/data_masking/src/getting_started_mask_data.py @@ -1,7 +1,7 @@ from aws_lambda_powertools.utilities.data_masking import DataMasking -def lambda_handler(event, context: LambdaContext): +def lambda_handler(event, context): data_masker = DataMasking() @@ -16,30 +16,12 @@ def lambda_handler(event, context: LambdaContext): "state": "CA", "zip": "12345", }, + "company_address": { + "street": "456 ACME Ave", + "city": "Anytown", + "state": "CA", + "zip": "12345", + }, } - masked = data_masker.mask(data=data, fields=["email", "address.street"]) - # masked = { - # "id": 1, - # "name": "John Doe", - # "age": 30, - # "email": "*****", - # "address": { - # "street": "*****", - # "city": "Anytown", - # "state": "CA", - # "zip": "12345" - # }, - # } - - masked = data_masker.mask(data=data, fields=["address"]) - # masked = { - # "id": 1, - # "name": "John Doe", - # "age": 30, - # "email": "johndoe@example.com", - # "address": "*****" - # } - - masked = data_masker.mask(data=data) - # masked = "*****" + data_masker.mask(data=data, fields=["email", "address.street", "company_address"]) diff --git a/examples/data_masking/src/mask_data_output.json b/examples/data_masking/src/mask_data_output.json new file mode 100644 index 00000000000..76a43cc81e7 --- /dev/null +++ b/examples/data_masking/src/mask_data_output.json @@ -0,0 +1,13 @@ +{ + "id": 1, + "name": "John Doe", + "age": 30, + "email": "*****", + "address": { + "street": "*****", + "city": "Anytown", + "state": "CA", + "zip": "12345" + }, + "company_address": "*****" +} \ No newline at end of file diff --git a/examples/data_masking/src/working_with_own_provider.py b/examples/data_masking/src/working_with_own_provider.py index 10e62fb13f0..7e4cb01fcdb 100644 --- a/examples/data_masking/src/working_with_own_provider.py +++ b/examples/data_masking/src/working_with_own_provider.py @@ -14,89 +14,17 @@ def lambda_handler(): "state": "CA", "zip": "12345", }, + "company_address": { + "street": "456 ACME Ave", + "city": "Anytown", + "state": "CA", + "zip": "12345", + }, } encryption_provider = MyCustomEncryption(secret="secret-key") data_masker = DataMasking(provider=encryption_provider) - encrypted = data_masker.encrypt(data, fields=["email", "address.street"]) - # encrypted = { - # "id": 1, - # "name": "John Doe", - # "age": 30, - # "email": "InRoaXMgaXMgYSBzdHJpbmciHsLZGx2na-XzP_TB5Bf2LNU1bLc", - # "address": { - # "street": "XMgYSB_KDddaDJYMb-JpbmGnagTklwQ-msdaDLP", - # "city": "Anytown", - # "state": "CA", - # "zip": "12345" - # }, - # } - - decrypted = data_masker.decrypt(data=encrypted, fields=["email", "address.street"]) - # decrypted = { - # "id": 1, - # "name": "John Doe", - # "age": 30, - # "email": "johndoe@example.com", - # "address": { - # "street": "123 Main St", - # "city": "Anytown", - # "state": "CA", - # "zip": "12345" - # }, - # } - - encrypted = data_masker.encrypt(data=data, fields=["email", "address"]) - # encrypted = { - # "id": 1, - # "name": "John Doe", - # "age": 30, - # "email": "InRoaXMgaXMgYSBzdHJpbmciHsLZGx2na-XzP_TB5Bf2LNU1bLc", - # "address": "XMgYSB_KDddaDJYMb-JpbmGnagTklwQ-msdaDLP" - # } - - decrypted = data_masker.decrypt(data=encrypted, fields=["email", "address"]) - # decrypted = { - # "id": 1, - # "name": "John Doe", - # "age": 30, - # "email": "johndoe@example.com", - # "address": { - # "street": "123 Main St", - # "city": "Anytown", - # "state": "CA", - # "zip": "12345" - # }, - # } - - encrypted = data_masker.encrypt(data=data) - # encrypted = "InRoaXMgaXMgYSBzdHJpbmciHsLZGx2na-XzP_TB5Bf2LNU1bLc" - - decrypted = data_masker.decrypt(data=encrypted) - # decrypted = { - # "id": 1, - # "name": "John Doe", - # "age": 30, - # "email": "johndoe@example.com", - # "address": { - # "street": "123 Main St", - # "city": "Anytown", - # "state": "CA", - # "zip": "12345" - # }, - # } + encrypted = data_masker.encrypt(data, fields=["email", "address.street", "company_address"]) - masked = data_masker.mask(data=data, fields=["email", "address.street"]) - # masked = { - # "id": 1, - # "name": "John Doe", - # "age": 30, - # "email": "*****", - # "address": { - # "street": "*****", - # "city": "Anytown", - # "state": "CA", - # "zip": "12345" - # }, - # } + data_masker.decrypt(data=encrypted, fields=["email", "address.street", "company_address"]) diff --git a/examples/data_masking/tests/src/single_mock.py b/examples/data_masking/tests/src/single_mock.py index a6dcf2adb65..93fbf3cba45 100644 --- a/examples/data_masking/tests/src/single_mock.py +++ b/examples/data_masking/tests/src/single_mock.py @@ -38,6 +38,6 @@ def handler(event, context): data_masker = DataMasking(provider=provider) encrypted = data_masker.encrypt(data=data) - decrypted = data_masker.decrypt(data=encrypted) + data_masker.decrypt(data=encrypted) return {"message": "mock_value"} diff --git a/mkdocs.yml b/mkdocs.yml index 0a844fd392f..8b85bc89f56 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -29,6 +29,7 @@ nav: - utilities/data_classes.md - utilities/parser.md - utilities/idempotency.md + - utilities/data_masking.md - utilities/feature_flags.md - utilities/streaming.md - utilities/middleware_factory.md From 1609105b233a92a241fa33a15c8b200da313499c Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Wed, 8 Nov 2023 14:45:52 -0800 Subject: [PATCH 009/151] Fix mypy errors --- docs/utilities/data_masking.md | 4 ++-- .../{test_single_mock.py => test_data_masking_single_mock.py} | 0 ...test_with_fixture.py => test_data_masking_with_fixture.py} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename examples/data_masking/tests/{test_single_mock.py => test_data_masking_single_mock.py} (100%) rename examples/data_masking/tests/{test_with_fixture.py => test_data_masking_with_fixture.py} (100%) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 82e50b4a370..6c750d26089 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -166,7 +166,7 @@ For unit testing your applications, you can mock the calls to the data masking u === "test_single_mock.py" ```python hl_lines="4 8" - --8<-- "examples/data_masking/tests/test_single_mock.py" + --8<-- "examples/data_masking/tests/test_data_masking_single_mock.py" ``` === "single_mock.py" @@ -178,5 +178,5 @@ If we need to use this pattern across multiple tests, we can avoid repetition by === "test_with_fixture.py" ```python hl_lines="5 10" - --8<-- "examples/data_masking/tests/test_with_fixture.py" + --8<-- "examples/data_masking/tests/test_data_masking_with_fixture.py" ``` diff --git a/examples/data_masking/tests/test_single_mock.py b/examples/data_masking/tests/test_data_masking_single_mock.py similarity index 100% rename from examples/data_masking/tests/test_single_mock.py rename to examples/data_masking/tests/test_data_masking_single_mock.py diff --git a/examples/data_masking/tests/test_with_fixture.py b/examples/data_masking/tests/test_data_masking_with_fixture.py similarity index 100% rename from examples/data_masking/tests/test_with_fixture.py rename to examples/data_masking/tests/test_data_masking_with_fixture.py From 65a9e9f703652144ef5bcc4ba59a99d034d0c973 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Tue, 14 Nov 2023 11:09:23 -0800 Subject: [PATCH 010/151] Fix mypy errors --- examples/data_masking/src/custom_provider.py | 2 +- examples/data_masking/src/data_masking_function_example.py | 4 ++-- examples/data_masking/src/getting_started_encrypt_data.py | 4 ++-- examples/data_masking/src/getting_started_mask_data.py | 2 +- examples/data_masking/src/working_with_own_provider.py | 2 +- examples/data_masking/tests/src/single_mock.py | 6 +++--- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/examples/data_masking/src/custom_provider.py b/examples/data_masking/src/custom_provider.py index e35fc0cbb50..b42bb47d10b 100644 --- a/examples/data_masking/src/custom_provider.py +++ b/examples/data_masking/src/custom_provider.py @@ -1,6 +1,6 @@ from itsdangerous.url_safe import URLSafeSerializer -from aws_lambda_powertools.utilities.data_masking.provider import BaseProvider +from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider class MyCustomEncryption(BaseProvider): diff --git a/examples/data_masking/src/data_masking_function_example.py b/examples/data_masking/src/data_masking_function_example.py index 83fb2637130..867b22ff9eb 100644 --- a/examples/data_masking/src/data_masking_function_example.py +++ b/examples/data_masking/src/data_masking_function_example.py @@ -1,8 +1,8 @@ import os from aws_lambda_powertools import Logger, Tracer -from aws_lambda_powertools.utilities.data_masking import DataMasking -from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +from aws_lambda_powertools.utilities._data_masking import DataMasking +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider from aws_lambda_powertools.utilities.typing import LambdaContext KMS_KEY_ARN = os.environ["KMS_KEY_ARN"] diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py index c8a7c156b1e..dea59a686dd 100644 --- a/examples/data_masking/src/getting_started_encrypt_data.py +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -1,7 +1,7 @@ import os -from aws_lambda_powertools.utilities.data_masking import DataMasking -from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +from aws_lambda_powertools.utilities._data_masking import DataMasking +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider KMS_KEY_ARN = os.environ["KMS_KEY_ARN"] diff --git a/examples/data_masking/src/getting_started_mask_data.py b/examples/data_masking/src/getting_started_mask_data.py index f8fcafe5509..a876a256ed4 100644 --- a/examples/data_masking/src/getting_started_mask_data.py +++ b/examples/data_masking/src/getting_started_mask_data.py @@ -1,4 +1,4 @@ -from aws_lambda_powertools.utilities.data_masking import DataMasking +from aws_lambda_powertools.utilities._data_masking import DataMasking def lambda_handler(event, context): diff --git a/examples/data_masking/src/working_with_own_provider.py b/examples/data_masking/src/working_with_own_provider.py index 7e4cb01fcdb..3bc69ee1a2e 100644 --- a/examples/data_masking/src/working_with_own_provider.py +++ b/examples/data_masking/src/working_with_own_provider.py @@ -1,4 +1,4 @@ -from aws_lambda_powertools.utilities.data_masking.base import DataMasking +from aws_lambda_powertools.utilities._data_masking.base import DataMasking from examples.data_masking.src.custom_provider import MyCustomEncryption diff --git a/examples/data_masking/tests/src/single_mock.py b/examples/data_masking/tests/src/single_mock.py index 93fbf3cba45..ea0135b9401 100644 --- a/examples/data_masking/tests/src/single_mock.py +++ b/examples/data_masking/tests/src/single_mock.py @@ -3,9 +3,9 @@ import base64 from typing import Any, Callable, Dict, Union -from aws_lambda_powertools.utilities.data_masking.base import DataMasking -from aws_lambda_powertools.utilities.data_masking.provider import BaseProvider -from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +from aws_lambda_powertools.utilities._data_masking.base import DataMasking +from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider class FakeEncryptionKeyProvider(BaseProvider): From 9fc33b405d59ee0d4a1269e6ca1ab43cfe089d7f Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Tue, 14 Nov 2023 11:26:30 -0800 Subject: [PATCH 011/151] Fix mypy --- .../src/{custom_provider.py => custom_data_masking_provider.py} | 0 examples/data_masking/src/working_with_own_provider.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename examples/data_masking/src/{custom_provider.py => custom_data_masking_provider.py} (100%) diff --git a/examples/data_masking/src/custom_provider.py b/examples/data_masking/src/custom_data_masking_provider.py similarity index 100% rename from examples/data_masking/src/custom_provider.py rename to examples/data_masking/src/custom_data_masking_provider.py diff --git a/examples/data_masking/src/working_with_own_provider.py b/examples/data_masking/src/working_with_own_provider.py index 3bc69ee1a2e..8aed6227647 100644 --- a/examples/data_masking/src/working_with_own_provider.py +++ b/examples/data_masking/src/working_with_own_provider.py @@ -1,5 +1,5 @@ from aws_lambda_powertools.utilities._data_masking.base import DataMasking -from examples.data_masking.src.custom_provider import MyCustomEncryption +from examples.data_masking.src.custom_data_masking_provider import MyCustomEncryption def lambda_handler(): From e6315815b1878819dee2641ba4c8df9c7fada4b5 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Tue, 14 Nov 2023 11:35:19 -0800 Subject: [PATCH 012/151] Remove itsdangerous --- .../data_masking/src/custom_data_masking_provider.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/data_masking/src/custom_data_masking_provider.py b/examples/data_masking/src/custom_data_masking_provider.py index b42bb47d10b..2e945bba282 100644 --- a/examples/data_masking/src/custom_data_masking_provider.py +++ b/examples/data_masking/src/custom_data_masking_provider.py @@ -1,19 +1,18 @@ -from itsdangerous.url_safe import URLSafeSerializer - +import json from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider class MyCustomEncryption(BaseProvider): def __init__(self, secret): super().__init__() - self.secret = URLSafeSerializer(secret) + self.secret = secret def encrypt(self, data: str) -> str: if data is None: return data - return self.secret.dumps(data) + return json.dumps(data) def decrypt(self, data: str) -> str: if data is None: return data - return self.secret.loads(data) + return json.loads(data) From 2a959fbdf3a94742149f27bf2f54b29142bd92fe Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Tue, 14 Nov 2023 11:43:36 -0800 Subject: [PATCH 013/151] fix mypy --- examples/data_masking/src/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 examples/data_masking/src/__init__.py diff --git a/examples/data_masking/src/__init__.py b/examples/data_masking/src/__init__.py new file mode 100644 index 00000000000..e69de29bb2d From b206873a43bc00482cb3e5e4909ebc4447f55041 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Tue, 14 Nov 2023 11:49:17 -0800 Subject: [PATCH 014/151] delete superflous init file --- examples/data_masking/src/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 examples/data_masking/src/__init__.py diff --git a/examples/data_masking/src/__init__.py b/examples/data_masking/src/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 From bb1c2a9d658af95a7508e9fbfbf7de67ac395353 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Tue, 14 Nov 2023 14:53:26 -0800 Subject: [PATCH 015/151] Fix mypy --- examples/data_masking/tests/src/single_mock.py | 4 ++-- mypy.ini | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/data_masking/tests/src/single_mock.py b/examples/data_masking/tests/src/single_mock.py index ea0135b9401..45249a02d58 100644 --- a/examples/data_masking/tests/src/single_mock.py +++ b/examples/data_masking/tests/src/single_mock.py @@ -16,12 +16,12 @@ def __init__( ): super().__init__(json_serializer=json_serializer, json_deserializer=json_deserializer) - def encrypt(self, data: bytes | str, **kwargs) -> str: + def encrypt(self, data, **kwargs) -> str: data = self.json_serializer(data) ciphertext = base64.b64encode(data).decode() return ciphertext - def decrypt(self, data: bytes, **kwargs) -> Any: + def decrypt(self, data, **kwargs) -> Any: ciphertext_decoded = base64.b64decode(data) ciphertext = self.json_deserializer(ciphertext_decoded) return ciphertext diff --git a/mypy.ini b/mypy.ini index cb2d3ce2443..36df3cebfdc 100644 --- a/mypy.ini +++ b/mypy.ini @@ -8,6 +8,7 @@ show_column_numbers = True show_error_codes = True show_error_context = True disable_error_code = annotation-unchecked +exclude = examples/data_masking/src [mypy-jmespath] ignore_missing_imports=True From 9f0acb203138bc4518362ec9a7dd6d1cd04b353e Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Tue, 21 Nov 2023 17:19:33 -0800 Subject: [PATCH 016/151] Reorganized data masking docs --- docs/utilities/data_masking.md | 124 ++++++++++++------ examples/data_masking/sam/template.yaml | 4 +- .../src/custom_data_masking_provider.py | 8 +- .../src/data_masking_function_example.py | 37 +----- .../data_masking/src/generic_data_input.json | 21 +++ .../src/getting_started_encrypt_data.py | 21 +-- .../src/getting_started_mask_data.py | 19 +-- .../data_masking/src/large_data_input.json | 32 +++++ .../src/working_with_own_provider.py | 22 +--- 9 files changed, 157 insertions(+), 131 deletions(-) create mode 100644 examples/data_masking/src/generic_data_input.json create mode 100644 examples/data_masking/src/large_data_input.json diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 6c750d26089..df3d1ce29f3 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -5,7 +5,31 @@ description: Utility -The data masking utility provides a simple solution to conceal incoming data so that sensitive information is not passed downstream or logged. +The data masking utility provides a simple solution to obfuscate (mask or encrypt) incoming data so that sensitive information is not passed downstream or logged. + +```mermaid +stateDiagram-v2 + direction LR + Source: Customer information

Sensitive data

PII

+ LambdaInit: Lambda invocation + Processor: Data Masker + Handler: Your function + YourLogic: Your logic to mask or encrypt data + LambdaResponse: Logs + + Source --> LambdaInit + + LambdaInit --> Processor + Processor --> Handler + + state Processor { + [*] --> Handler + Handler --> YourLogic + } + + Handler --> Processor: Collect results + Processor --> LambdaResponse: Obfuscated data +``` ## Key features @@ -15,39 +39,67 @@ The data masking utility provides a simple solution to conceal incoming data so ## Terminology -**Mask**: This refers to concealing or partially replacing sensitive information with a non-sensitive placeholder or mask. The key characteristic of this operation is that it is irreversible, meaning the original sensitive data cannot be retrieved from the masked data. Masking is commonly applied when displaying data to users or for anonymizing data in non-reversible scenarios. For example, display the last four digits of a credit card number as "**** **** **** 1234". +**Masking** irreversibly replaces sensitive information with a non-sensitive placeholder or mask. For example, display the last four digits of a credit card number as `"**** **** **** 1234"`. -**Encrypt**: This is the process of transforming plaintext data into a ciphertext format using an encryption algorithm and a cryptographic key. Encryption is a reversible process, meaning the original data can be retrieved (decrypted) using the appropriate decryption key. You can use this, for instance, to encrypt any PII (personally identifiable information) of your customers and make sure only the people with the right permissions are allowed to decrypt and view the plaintext PII data, in accordance with GDPR. +**Encrypting** transforms plaintext into ciphertext using an encryption algorithm and a cryptographic key. Encryption can be reversed with the correct decryption key. This allows you to encrypt any PII (personally identifiable information) and make sure only the users with appropirate permissions can decrypt it to view the plaintext. -**Decrypt**: This is the process of reversing the encryption process, converting ciphertext back into its original plaintext using a decryption algorithm and the correct decryption key that only authorized personnel should have access to. +**Decrypting** reverses the encryption process, converting ciphertext back into its original plaintext using a decryption algorithm and the correct decryption key. ## Getting started -### IAM Permissions +### Install + +If not using any encryption services and only masking data, your Lambda function does not need any additional permissions or resources to use this utility. + +#### Using AWS Encryption SDK To use the AWS Encryption SDK, your Lambda function IAM Role must have `kms:Decrypt` and `kms:GenerateDataKey` IAM permissions. +You must also have an AWS KMS key with full read/write permissions. You can create one and learn more on the [AWS KMS console](https://us-east-1.console.aws.amazon.com/kms/home?region=us-east-1#/kms/home){target="_blank" rel="nofollow"}. + +#### Using a custom encryption provider + For any other encryption provider, make sure to have the permissions for your role that it requires. -If not using any encryption services and only masking data, your Lambda does not need any additional permissions to use this utility. +### Working with nested data -### Required resources +#### JSON +When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values. If no fields are provided, the entire data object will be masked or encrypted. You can obfuscate values of nested keys by using dot notation. -To use the AWS Encryption SDK, you must have an AWS KMS key with full read/write permissions. You can create one and learn more on the [AWS KMS console](https://us-east-1.console.aws.amazon.com/kms/home?region=us-east-1#/kms/home){target="_blank" rel="nofollow"}. +???+ note + If you're using our example [AWS Serverless Application Model (SAM) template](#using-a-custom-encryption-provider), you will notice we have configured the Lambda function to use a memory size of 1024 MB. We compared the performances of Lambda functions of several different memory sizes and concluding 1024 MB was the most optimal size for this feature. For more information, you can see the full reports of our [load tests](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1730571597) and [traces](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1732060923). -For any other encryption provider, you must have the resources required for that provider. +=== "AWS Serverless Application Model (SAM) example" + ```yaml hl_lines="11-23 30 33-39 46" + --8<-- "examples/data_masking/sam/template.yaml" + ``` + +=== "input.json" + ```json + --8<-- "examples/data_masking/src/large_data_input.json" + ``` -## Using the utility +=== "data_masking_function_example.py" + ```python hl_lines="8 20-22" + --8<-- "examples/data_masking/src/data_masking_function_example.py" + ``` -#### Working with JSON -When using the data masking utility with dictionaries or JSON objects, you can provide a list of keys to conceal the corresponding values. If no fields are provided, the entire data object will be masked or encrypted. You can conceal values of nested keys by using dot notation. +=== "output.json" + ```json + --8<-- "examples/data_masking/src/data_masking_function_example_output.json" + ``` ### Masking data You can mask data without having to install any encryption library. +=== "input.json" + ```json + --8<-- "examples/data_masking/src/generic_data_input.json" + ``` + === "getting_started_mask_data.py" - ```python hl_lines="1 6 27" + ```python hl_lines="1 6 10" --8<-- "examples/data_masking/src/getting_started_mask_data.py" ``` @@ -60,8 +112,13 @@ You can mask data without having to install any encryption library. In order to encrypt data, you must use either our out-of-the-box integration with the AWS Encryption SDK, or install another encryption provider of your own. You can still use the masking feature while using any encryption provider. +=== "input.json" + ```json + --8<-- "examples/data_masking/src/generic_data_input.json" + ``` + === "getting_started_encrypt_data.py" - ```python hl_lines="3-4 6 29 32 34" + ```python hl_lines="3-4 12-13" --8<-- "examples/data_masking/src/getting_started_encrypt_data.py" ``` @@ -75,35 +132,21 @@ In order to encrypt data, you must use either our out-of-the-box integration wit --8<-- "examples/data_masking/src/decrypt_data_output.json" ``` - -### SAM template example -=== "template.yaml" - ```yaml hl_lines="11-23 30 33-39 46" - --8<-- "examples/data_masking/sam/template.yaml" - ``` - -=== "data_masking_function_example.py" - ```python hl_lines="8 47-50" - --8<-- "examples/data_masking/src/data_masking_function_example.py" - ``` - -=== "output.json" - ```json - --8<-- "examples/data_masking/src/data_masking_function_example_output.json" - ``` - ## Advanced ### Adjusting configurations for AWS Encryption SDK -You have the option to modify some of the configurations we have set as defaults when connecting to the AWS Encryption SDK. You can find and modify these values at `utilities/data_masking/constants.py`. +You have the option to modify some of the configurations we have set as defaults when connecting to the AWS Encryption SDK. You can find and modify the following values in `utilities/data_masking/provider/kms/aws_encryption_sdk.py`. + +#### Caching -The `CACHE_CAPACITY` value is currently set at `100`. This value represents the maximum number of entries that can be retained in the local cryptographic materials cache. Please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.caches.local.html){target="_blank" rel="nofollow"} for more information. +The `CACHE_CAPACITY` value is currently set to `100`. This value represents the maximum number of entries that can be retained in the local cryptographic materials cache. Please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.caches.local.html){target="_blank" rel="nofollow"} for more information. -The `MAX_CACHE_AGE_SECONDS` value is currently set at `300`. It represents the maximum time (in seconds) that a cache entry may be kept in the cache. +The `MAX_CACHE_AGE_SECONDS` value is currently set to `300`. It represents the maximum time (in seconds) that a cache entry may be kept in the cache. Please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.materials_managers.caching.html#module-aws_encryption_sdk.materials_managers.caching){target="_blank" rel="nofollow"} for more information about this. -The `MAX_MESSAGES_ENCRYPTED` value is currently set at `200`. It represents the maximum number of messages that may be encrypted under a cache entry. Please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.materials_managers.caching.html#module-aws_encryption_sdk.materials_managers.caching){target="_blank" rel="nofollow"} for more information about this and `MAX_CACHE_AGE_SECONDS`. +#### Limit messages +The `MAX_MESSAGES_ENCRYPTED` value is currently set to `200`. It represents the maximum number of messages that may be encrypted under a cache entry. Please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.materials_managers.caching.html#module-aws_encryption_sdk.materials_managers.caching){target="_blank" rel="nofollow"} for more information about this. ### Create your own encryption provider @@ -140,14 +183,19 @@ You can then use this custom encryption provider class as the `provider` argumen Here is an example of implementing a custom encryption using an external encryption library like [ItsDangerous](https://itsdangerous.palletsprojects.com/en/2.1.x/){target="_blank" rel="nofollow"}, a widely popular encryption library. +=== "input.json" + ```json + --8<-- "examples/data_masking/src/generic_data_input.json" + ``` + === "working_with_own_provider.py" - ```python hl_lines="1-2 25 28 30" + ```python hl_lines="1-2 9-10" --8<-- "examples/data_masking/src/working_with_own_provider.py" ``` === "custom_provider.py" - ```python hl_lines="1 3 6 8 11 16" - --8<-- "examples/data_masking/src/custom_provider.py" + ```python hl_lines="1 3 8" + --8<-- "examples/data_masking/src/custom_data_masking_provider.py" ``` === "encrypted_output.json" diff --git a/examples/data_masking/sam/template.yaml b/examples/data_masking/sam/template.yaml index d849a0760cd..4b4c8f9a34a 100644 --- a/examples/data_masking/sam/template.yaml +++ b/examples/data_masking/sam/template.yaml @@ -6,7 +6,7 @@ Description: > Globals: # https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-specification-template-anatomy-globals.html Function: Timeout: 5 - Runtime: python3.10 + Runtime: python3.11 Tracing: Active Resources: MyKMSKey: @@ -27,7 +27,7 @@ Resources: Handler: data_masking_function_example.lambda_handler CodeUri: ../src Description: Data Masking Function Example - MemorySize: 128 + MemorySize: 1024 Architectures: - x86_64 Policies: diff --git a/examples/data_masking/src/custom_data_masking_provider.py b/examples/data_masking/src/custom_data_masking_provider.py index 2e945bba282..e01c47bb683 100644 --- a/examples/data_masking/src/custom_data_masking_provider.py +++ b/examples/data_masking/src/custom_data_masking_provider.py @@ -1,4 +1,5 @@ -import json +from itsdangerous.url_safe import URLSafeSerializer + from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider @@ -6,13 +7,14 @@ class MyCustomEncryption(BaseProvider): def __init__(self, secret): super().__init__() self.secret = secret + self.serializer = URLSafeSerializer(self.secret) def encrypt(self, data: str) -> str: if data is None: return data - return json.dumps(data) + return self.serializer.dumps(data) def decrypt(self, data: str) -> str: if data is None: return data - return json.loads(data) + return self.serializer.loads(data) diff --git a/examples/data_masking/src/data_masking_function_example.py b/examples/data_masking/src/data_masking_function_example.py index 867b22ff9eb..98ec2d91bb2 100644 --- a/examples/data_masking/src/data_masking_function_example.py +++ b/examples/data_masking/src/data_masking_function_example.py @@ -5,37 +5,7 @@ from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider from aws_lambda_powertools.utilities.typing import LambdaContext -KMS_KEY_ARN = os.environ["KMS_KEY_ARN"] - -json_blob = { - "id": 1, - "name": "John Doe", - "age": 30, - "email": "johndoe@example.com", - "address": {"street": "123 Main St", "city": "Anytown", "state": "CA", "zip": "12345"}, - "phone_numbers": ["+1-555-555-1234", "+1-555-555-5678"], - "interests": ["Hiking", "Traveling", "Photography", "Reading"], - "job_history": { - "company": { - "company_name": "Acme Inc.", - "company_address": "5678 Interview Dr.", - }, - "position": "Software Engineer", - "start_date": "2015-01-01", - "end_date": "2017-12-31", - }, - "about_me": """ - Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla tincidunt velit quis - sapien mollis, at egestas massa tincidunt. Suspendisse ultrices arcu a dolor dapibus, - ut pretium turpis volutpat. Vestibulum at sapien quis sapien dignissim volutpat ut a enim. - Praesent fringilla sem eu dui convallis luctus. Donec ullamcorper, sapien ut convallis congue, - risus mauris pretium tortor, nec dignissim arcu urna a nisl. Vivamus non fermentum ex. Proin - interdum nisi id sagittis egestas. Nam sit amet nisi nec quam pharetra sagittis. Aliquam erat - volutpat. Donec nec luctus sem, nec ornare lorem. Vivamus vitae orci quis enim faucibus placerat. - Nulla facilisi. Proin in turpis orci. Donec imperdiet velit ac tellus gravida, eget laoreet tellus - malesuada. Praesent venenatis tellus ac urna blandit, at varius felis posuere. Integer a commodo nunc. - """, -} +KMS_KEY_ARN = os.getenv("KMS_KEY_ARN") tracer = Tracer() logger = Logger() @@ -44,7 +14,10 @@ @tracer.capture_lambda_handler def lambda_handler(event: dict, context: LambdaContext) -> dict: logger.info("Hello world function - HTTP 200") + + data = event["body"] + data_masker = DataMasking(provider=AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN])) - encrypted = data_masker.encrypt(json_blob, fields=["address.street", "job_history.company.company_name"]) + encrypted = data_masker.encrypt(data, fields=["address.street", "job_history.company.company_name"]) decrypted = data_masker.decrypt(encrypted, fields=["address.street", "job_history.company.company_name"]) return {"Decrypted_json": decrypted} diff --git a/examples/data_masking/src/generic_data_input.json b/examples/data_masking/src/generic_data_input.json new file mode 100644 index 00000000000..60ab0aa278e --- /dev/null +++ b/examples/data_masking/src/generic_data_input.json @@ -0,0 +1,21 @@ +{ + "body": + { + "id": 1, + "name": "John Doe", + "age": 30, + "email": "johndoe@example.com", + "address": { + "street": "123 Main St", + "city": "Anytown", + "state": "CA", + "zip": "12345" + }, + "company_address": { + "street": "456 ACME Ave", + "city": "Anytown", + "state": "CA", + "zip": "12345" + } + } +} \ No newline at end of file diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py index dea59a686dd..e774bd8e4a3 100644 --- a/examples/data_masking/src/getting_started_encrypt_data.py +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -3,28 +3,11 @@ from aws_lambda_powertools.utilities._data_masking import DataMasking from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider -KMS_KEY_ARN = os.environ["KMS_KEY_ARN"] +KMS_KEY_ARN = os.getenv("KMS_KEY_ARN") def lambda_handler(event, context): - data = { - "id": 1, - "name": "John Doe", - "age": 30, - "email": "johndoe@example.com", - "address": { - "street": "123 Main St", - "city": "Anytown", - "state": "CA", - "zip": "12345", - }, - "company_address": { - "street": "456 ACME Ave", - "city": "Anytown", - "state": "CA", - "zip": "12345", - }, - } + data = event["body"] encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) data_masker = DataMasking(provider=encryption_provider) diff --git a/examples/data_masking/src/getting_started_mask_data.py b/examples/data_masking/src/getting_started_mask_data.py index a876a256ed4..1f5e34af03e 100644 --- a/examples/data_masking/src/getting_started_mask_data.py +++ b/examples/data_masking/src/getting_started_mask_data.py @@ -5,23 +5,6 @@ def lambda_handler(event, context): data_masker = DataMasking() - data = { - "id": 1, - "name": "John Doe", - "age": 30, - "email": "johndoe@example.com", - "address": { - "street": "123 Main St", - "city": "Anytown", - "state": "CA", - "zip": "12345", - }, - "company_address": { - "street": "456 ACME Ave", - "city": "Anytown", - "state": "CA", - "zip": "12345", - }, - } + data = event["body"] data_masker.mask(data=data, fields=["email", "address.street", "company_address"]) diff --git a/examples/data_masking/src/large_data_input.json b/examples/data_masking/src/large_data_input.json new file mode 100644 index 00000000000..34275c3fa73 --- /dev/null +++ b/examples/data_masking/src/large_data_input.json @@ -0,0 +1,32 @@ +{ + "body": + { + "id": 1, + "name": "John Doe", + "age": 30, + "email": "johndoe@example.com", + "address": {"street": "123 Main St", "city": "Anytown", "state": "CA", "zip": "12345"}, + "phone_numbers": ["+1-555-555-1234", "+1-555-555-5678"], + "interests": ["Hiking", "Traveling", "Photography", "Reading"], + "job_history": { + "company": { + "company_name": "Acme Inc.", + "company_address": "5678 Interview Dr." + }, + "position": "Software Engineer", + "start_date": "2015-01-01", + "end_date": "2017-12-31" + }, + "about_me": """ + Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla tincidunt velit quis + sapien mollis, at egestas massa tincidunt. Suspendisse ultrices arcu a dolor dapibus, + ut pretium turpis volutpat. Vestibulum at sapien quis sapien dignissim volutpat ut a enim. + Praesent fringilla sem eu dui convallis luctus. Donec ullamcorper, sapien ut convallis congue, + risus mauris pretium tortor, nec dignissim arcu urna a nisl. Vivamus non fermentum ex. Proin + interdum nisi id sagittis egestas. Nam sit amet nisi nec quam pharetra sagittis. Aliquam erat + volutpat. Donec nec luctus sem, nec ornare lorem. Vivamus vitae orci quis enim faucibus placerat. + Nulla facilisi. Proin in turpis orci. Donec imperdiet velit ac tellus gravida, eget laoreet tellus + malesuada. Praesent venenatis tellus ac urna blandit, at varius felis posuere. Integer a commodo nunc. + """ + } +} diff --git a/examples/data_masking/src/working_with_own_provider.py b/examples/data_masking/src/working_with_own_provider.py index 8aed6227647..fda62eae7e9 100644 --- a/examples/data_masking/src/working_with_own_provider.py +++ b/examples/data_masking/src/working_with_own_provider.py @@ -2,25 +2,9 @@ from examples.data_masking.src.custom_data_masking_provider import MyCustomEncryption -def lambda_handler(): - data = { - "id": 1, - "name": "John Doe", - "age": 30, - "email": "johndoe@example.com", - "address": { - "street": "123 Main St", - "city": "Anytown", - "state": "CA", - "zip": "12345", - }, - "company_address": { - "street": "456 ACME Ave", - "city": "Anytown", - "state": "CA", - "zip": "12345", - }, - } +def lambda_handler(event, context): + + data = event["body"] encryption_provider = MyCustomEncryption(secret="secret-key") data_masker = DataMasking(provider=encryption_provider) From ae7deb60f469139b516984715c1635deebd5403c Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Tue, 21 Nov 2023 21:06:35 -0800 Subject: [PATCH 017/151] nit fixes --- docs/utilities/data_masking.md | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index df3d1ce29f3..7b495d015d6 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -5,7 +5,7 @@ description: Utility -The data masking utility provides a simple solution to obfuscate (mask or encrypt) incoming data so that sensitive information is not passed downstream or logged. +The data masking utility provides a simple solution to mask or encrypt incoming data so that sensitive information is not passed downstream or logged. ```mermaid stateDiagram-v2 @@ -39,7 +39,7 @@ stateDiagram-v2 ## Terminology -**Masking** irreversibly replaces sensitive information with a non-sensitive placeholder or mask. For example, display the last four digits of a credit card number as `"**** **** **** 1234"`. +**Masking** replaces sensitive information **irreversibly** with a non-sensitive placeholder. For example, display the last four digits of a credit card number as `"**** **** **** 1234"`. **Encrypting** transforms plaintext into ciphertext using an encryption algorithm and a cryptographic key. Encryption can be reversed with the correct decryption key. This allows you to encrypt any PII (personally identifiable information) and make sure only the users with appropirate permissions can decrypt it to view the plaintext. @@ -53,18 +53,18 @@ If not using any encryption services and only masking data, your Lambda function #### Using AWS Encryption SDK -To use the AWS Encryption SDK, your Lambda function IAM Role must have `kms:Decrypt` and `kms:GenerateDataKey` IAM permissions. +To use the AWS Encryption SDK, your Lambda function IAM Role must have the `kms:Decrypt` and `kms:GenerateDataKey` IAM permissions. You must also have an AWS KMS key with full read/write permissions. You can create one and learn more on the [AWS KMS console](https://us-east-1.console.aws.amazon.com/kms/home?region=us-east-1#/kms/home){target="_blank" rel="nofollow"}. #### Using a custom encryption provider -For any other encryption provider, make sure to have the permissions for your role that it requires. +If using your own encryption provider, make sure to have the necessary resources and permissions for your Lambda function's role. ### Working with nested data #### JSON -When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values. If no fields are provided, the entire data object will be masked or encrypted. You can obfuscate values of nested keys by using dot notation. +When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values. If no fields are provided, the entire data object will be masked or encrypted. You can select values of nested keys by using dot notation. ???+ note If you're using our example [AWS Serverless Application Model (SAM) template](#using-a-custom-encryption-provider), you will notice we have configured the Lambda function to use a memory size of 1024 MB. We compared the performances of Lambda functions of several different memory sizes and concluding 1024 MB was the most optimal size for this feature. For more information, you can see the full reports of our [load tests](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1730571597) and [traces](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1732060923). @@ -142,11 +142,11 @@ You have the option to modify some of the configurations we have set as defaults The `CACHE_CAPACITY` value is currently set to `100`. This value represents the maximum number of entries that can be retained in the local cryptographic materials cache. Please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.caches.local.html){target="_blank" rel="nofollow"} for more information. -The `MAX_CACHE_AGE_SECONDS` value is currently set to `300`. It represents the maximum time (in seconds) that a cache entry may be kept in the cache. Please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.materials_managers.caching.html#module-aws_encryption_sdk.materials_managers.caching){target="_blank" rel="nofollow"} for more information about this. +The `MAX_CACHE_AGE_SECONDS` value is currently set to `300`. This represents the maximum time (in seconds) that a cache entry may be kept in the cache. Please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.materials_managers.caching.html#module-aws_encryption_sdk.materials_managers.caching){target="_blank" rel="nofollow"} for more information about this. #### Limit messages -The `MAX_MESSAGES_ENCRYPTED` value is currently set to `200`. It represents the maximum number of messages that may be encrypted under a cache entry. Please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.materials_managers.caching.html#module-aws_encryption_sdk.materials_managers.caching){target="_blank" rel="nofollow"} for more information about this. +The `MAX_MESSAGES_ENCRYPTED` value is currently set to `200`. This represents the maximum number of messages that may be encrypted under a cache entry. Please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.materials_managers.caching.html#module-aws_encryption_sdk.materials_managers.caching){target="_blank" rel="nofollow"} for more information about this. ### Create your own encryption provider @@ -188,16 +188,16 @@ Here is an example of implementing a custom encryption using an external encrypt --8<-- "examples/data_masking/src/generic_data_input.json" ``` +=== "custom_provider.py" + ```python hl_lines="1 3 6 8" + --8<-- "examples/data_masking/src/custom_data_masking_provider.py" + ``` + === "working_with_own_provider.py" ```python hl_lines="1-2 9-10" --8<-- "examples/data_masking/src/working_with_own_provider.py" ``` -=== "custom_provider.py" - ```python hl_lines="1 3 8" - --8<-- "examples/data_masking/src/custom_data_masking_provider.py" - ``` - === "encrypted_output.json" ```json hl_lines="5-7 12" --8<-- "examples/data_masking/src/encrypt_data_output.json" From b8bd70fa89f3b1c2ddbc3ed0c7153e17f3defcce Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Wed, 22 Nov 2023 10:05:24 -0800 Subject: [PATCH 018/151] Added itsdangerous as dev dep --- poetry.lock | 13 ++++++++++++- pyproject.toml | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index eaa0fb7914b..f2d55fa4263 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1343,6 +1343,17 @@ pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib" plugins = ["setuptools"] requirements-deprecated-finder = ["pip-api", "pipreqs"] +[[package]] +name = "itsdangerous" +version = "2.1.2" +description = "Safely pass data to untrusted environments and back." +optional = false +python-versions = ">=3.7" +files = [ + {file = "itsdangerous-2.1.2-py3-none-any.whl", hash = "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44"}, + {file = "itsdangerous-2.1.2.tar.gz", hash = "sha256:5dbbc68b317e5e42f327f9021763545dc3fc3bfe22e6deb96aaf1fc38874156a"}, +] + [[package]] name = "jinja2" version = "3.1.2" @@ -3224,4 +3235,4 @@ validation = ["fastjsonschema"] [metadata] lock-version = "2.0" python-versions = "^3.7.4" -content-hash = "21c7697a42537357d74b97fcec11754de4defa04a296362dabc226078869f454" +content-hash = "7f56e136a8d3f7c907cac30b8cee9dd1c8c5f448b0f66d5fc479523261c7e7b0" diff --git a/pyproject.toml b/pyproject.toml index 9f9162acb43..1688b642d20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,6 +109,7 @@ sentry-sdk = "^1.22.2" ruff = ">=0.0.272,<0.1.7" retry2 = "^0.9.5" pytest-socket = "^0.6.0" +itsdangerous = "^2.1.2" [tool.coverage.run] source = ["aws_lambda_powertools"] From 2f7c6b7ab8d36e02844996b37541495e2cfe1eea Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Tue, 5 Dec 2023 09:50:13 -0800 Subject: [PATCH 019/151] grammar fixes --- docs/utilities/data_masking.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 7b495d015d6..87cca695da0 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -28,7 +28,7 @@ stateDiagram-v2 } Handler --> Processor: Collect results - Processor --> LambdaResponse: Obfuscated data + Processor --> LambdaResponse: Masked/encrypted data ``` ## Key features @@ -67,7 +67,7 @@ If using your own encryption provider, make sure to have the necessary resources When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values. If no fields are provided, the entire data object will be masked or encrypted. You can select values of nested keys by using dot notation. ???+ note - If you're using our example [AWS Serverless Application Model (SAM) template](#using-a-custom-encryption-provider), you will notice we have configured the Lambda function to use a memory size of 1024 MB. We compared the performances of Lambda functions of several different memory sizes and concluding 1024 MB was the most optimal size for this feature. For more information, you can see the full reports of our [load tests](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1730571597) and [traces](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1732060923). + If you're using our example [AWS Serverless Application Model (SAM) template](#using-a-custom-encryption-provider), you will notice we have configured the Lambda function to use a memory size of 1024 MB. We compared the performances of Lambda functions of several different memory sizes and concluded 1024 MB was the most optimal size for this feature. For more information, you can see the full reports of our [load tests](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1730571597) and [traces](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1732060923). === "AWS Serverless Application Model (SAM) example" ```yaml hl_lines="11-23 30 33-39 46" From b9c2c480a9d2e4679d4cd50db80449f94a9e61c7 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Fri, 8 Dec 2023 14:36:33 +0100 Subject: [PATCH 020/151] docs: refactor diag to make operations explicit --- docs/utilities/data_masking.md | 39 +++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 87cca695da0..c2e8e4d1420 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -10,25 +10,29 @@ The data masking utility provides a simple solution to mask or encrypt incoming ```mermaid stateDiagram-v2 direction LR - Source: Customer information

Sensitive data

PII

- LambdaInit: Lambda invocation - Processor: Data Masker - Handler: Your function - YourLogic: Your logic to mask or encrypt data - LambdaResponse: Logs - - Source --> LambdaInit - - LambdaInit --> Processor - Processor --> Handler - - state Processor { - [*] --> Handler - Handler --> YourLogic + LambdaFn: Your Lambda function + DataMasking: DataMasking + Operation: Masking operation + Input: Sensitive value + Mask: Mask + Encrypt: Encrypt + Decrypt: Decrypt + Provider: Encryption provider + Result: Data transformed (masked, encrypted, or decrypted) + + LambdaFn --> DataMasking + DataMasking --> Operation + + state Operation { + [*] --> Input + Input --> Mask: Irreversible + Input --> Encrypt + Input --> Decrypt + Encrypt --> Provider + Decrypt --> Provider } - Handler --> Processor: Collect results - Processor --> LambdaResponse: Masked/encrypted data + Operation --> Result ``` ## Key features @@ -64,6 +68,7 @@ If using your own encryption provider, make sure to have the necessary resources ### Working with nested data #### JSON + When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values. If no fields are provided, the entire data object will be masked or encrypted. You can select values of nested keys by using dot notation. ???+ note From b35effdafe9c9e7ed0deb767d363b50b9e889ee0 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Fri, 8 Dec 2023 14:51:40 +0100 Subject: [PATCH 021/151] docs: line editing for intro line --- docs/utilities/data_masking.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index c2e8e4d1420..c2068e79fa8 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -5,7 +5,7 @@ description: Utility -The data masking utility provides a simple solution to mask or encrypt incoming data so that sensitive information is not passed downstream or logged. +The data masking utility can encrypt, decrypt, or irreversibly mask sensitive information to protect data confidentiality. ```mermaid stateDiagram-v2 From 202d906ce2c28fc73d6b9bfd1f084db7971b7b39 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Fri, 8 Dec 2023 15:05:56 +0100 Subject: [PATCH 022/151] docs: line editing for one key feature --- docs/utilities/data_masking.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index c2068e79fa8..8a112857537 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -37,7 +37,7 @@ stateDiagram-v2 ## Key features -* Mask data irreversibly without having to install any encryption library. +* Mask, encrypt, or decrypt data in one or multiple fields * Out of the box integration with the [AWS Encryption SDK](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/introduction.html){target="_blank" rel="nofollow"} to easily encrypt and decrypt data. * Install any encryption provider and connect it with our new Data Masker class to easily mask, encrypt, and decrypt data. From 6687ebc093b94e72b743d942995aa679d562bef2 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Fri, 8 Dec 2023 15:08:32 +0100 Subject: [PATCH 023/151] docs: Masking to Possible Operations to remove ambiguity --- docs/utilities/data_masking.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 8a112857537..4407fdca8e0 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -12,7 +12,7 @@ stateDiagram-v2 direction LR LambdaFn: Your Lambda function DataMasking: DataMasking - Operation: Masking operation + Operation: Possible operations Input: Sensitive value Mask: Mask Encrypt: Encrypt From 5942a70a4204be0e37b968c24bf22deaf07707ce Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Fri, 8 Dec 2023 14:14:25 +0000 Subject: [PATCH 024/151] Removing custom provider --- docs/utilities/data_masking.md | 70 +++------------------------------- 1 file changed, 6 insertions(+), 64 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 4407fdca8e0..b6f395e2581 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -61,18 +61,16 @@ To use the AWS Encryption SDK, your Lambda function IAM Role must have the `kms: You must also have an AWS KMS key with full read/write permissions. You can create one and learn more on the [AWS KMS console](https://us-east-1.console.aws.amazon.com/kms/home?region=us-east-1#/kms/home){target="_blank" rel="nofollow"}. -#### Using a custom encryption provider - -If using your own encryption provider, make sure to have the necessary resources and permissions for your Lambda function's role. - ### Working with nested data #### JSON When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values. If no fields are provided, the entire data object will be masked or encrypted. You can select values of nested keys by using dot notation. + ???+ note If you're using our example [AWS Serverless Application Model (SAM) template](#using-a-custom-encryption-provider), you will notice we have configured the Lambda function to use a memory size of 1024 MB. We compared the performances of Lambda functions of several different memory sizes and concluded 1024 MB was the most optimal size for this feature. For more information, you can see the full reports of our [load tests](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1730571597) and [traces](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1732060923). + === "AWS Serverless Application Model (SAM) example" ```yaml hl_lines="11-23 30 33-39 46" @@ -145,73 +143,17 @@ You have the option to modify some of the configurations we have set as defaults #### Caching + The `CACHE_CAPACITY` value is currently set to `100`. This value represents the maximum number of entries that can be retained in the local cryptographic materials cache. Please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.caches.local.html){target="_blank" rel="nofollow"} for more information. The `MAX_CACHE_AGE_SECONDS` value is currently set to `300`. This represents the maximum time (in seconds) that a cache entry may be kept in the cache. Please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.materials_managers.caching.html#module-aws_encryption_sdk.materials_managers.caching){target="_blank" rel="nofollow"} for more information about this. + #### Limit messages + The `MAX_MESSAGES_ENCRYPTED` value is currently set to `200`. This represents the maximum number of messages that may be encrypted under a cache entry. Please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.materials_managers.caching.html#module-aws_encryption_sdk.materials_managers.caching){target="_blank" rel="nofollow"} for more information about this. - -### Create your own encryption provider - -You can create your own custom encryption provider by inheriting the `BaseProvider` class, and implementing the `encrypt()` and `decrypt()` methods, and optionally the `mask()` method. You can also either use your own data serializer and deserializer by passing the `BaseProvider` class a `json_serializer` and `json_deserializer` argument, or you can use the default. - - -
-```mermaid -classDiagram - direction LR - class BaseProvider { - <> - +encrypt(data: Any) - +decrypt(data: str) - +mask(data: Any) - } - - class YourCustomEncryptionProvider { - +encrypt(data: Any) - +decrypt(data: str) - +mask(data: Any) - } - - BaseProvider <|-- YourCustomEncryptionProvider : implement -``` -Visual representation to bring your own encryption provider -
- -* **`encrypt()`** – handles all logic for how to encrypt any data -* **`decrypt()`** – handles all logic for how to decrypt encrypted data -* **`mask()`** – handles all logic for how to irreversably mask data (optional) - -You can then use this custom encryption provider class as the `provider` argument when creating a new `DataMasking` instance to use the encryption and decryption algorithms of the encryption library you have chosen. - -Here is an example of implementing a custom encryption using an external encryption library like [ItsDangerous](https://itsdangerous.palletsprojects.com/en/2.1.x/){target="_blank" rel="nofollow"}, a widely popular encryption library. - -=== "input.json" - ```json - --8<-- "examples/data_masking/src/generic_data_input.json" - ``` - -=== "custom_provider.py" - ```python hl_lines="1 3 6 8" - --8<-- "examples/data_masking/src/custom_data_masking_provider.py" - ``` - -=== "working_with_own_provider.py" - ```python hl_lines="1-2 9-10" - --8<-- "examples/data_masking/src/working_with_own_provider.py" - ``` - -=== "encrypted_output.json" - ```json hl_lines="5-7 12" - --8<-- "examples/data_masking/src/encrypt_data_output.json" - ``` - -=== "decrypted_output.json" - ```json hl_lines="5-7 12-17" - --8<-- "examples/data_masking/src/decrypt_data_output.json" - ``` + ## Testing your code From 619292d2297ecac11b32dd213832b2e290ee0ceb Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Fri, 8 Dec 2023 14:19:24 +0000 Subject: [PATCH 025/151] Adding banner --- docs/utilities/data_masking.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index b6f395e2581..016d2bd9bca 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -155,6 +155,10 @@ The `MAX_CACHE_AGE_SECONDS` value is currently set to `300`. This represents the The `MAX_MESSAGES_ENCRYPTED` value is currently set to `200`. This represents the maximum number of messages that may be encrypted under a cache entry. Please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.materials_managers.caching.html#module-aws_encryption_sdk.materials_managers.caching){target="_blank" rel="nofollow"} for more information about this. +### Creating your own provider + +!!! info "In Q1 2024, we will implement support for bringing your own encryption provider." + ## Testing your code For unit testing your applications, you can mock the calls to the data masking utility to avoid calling AWS APIs. This can be achieved in a number of ways - in this example, we use the pytest monkeypatch fixture to patch the `data_masking.decrypt` method. From c33f18e85184113c460c3aa833bc8a6b0d0131a0 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Fri, 8 Dec 2023 15:21:37 +0100 Subject: [PATCH 026/151] docs: remove BYO from key features, highlight best practices --- docs/utilities/data_masking.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 016d2bd9bca..e1d2df1a242 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -37,9 +37,9 @@ stateDiagram-v2 ## Key features -* Mask, encrypt, or decrypt data in one or multiple fields -* Out of the box integration with the [AWS Encryption SDK](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/introduction.html){target="_blank" rel="nofollow"} to easily encrypt and decrypt data. -* Install any encryption provider and connect it with our new Data Masker class to easily mask, encrypt, and decrypt data. +* Encrypt, decrypt, or irreversibly mask data with ease +* Remove sensitive information in one or more fields within nested data +* Seamless integration with [AWS Encryption SDK](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/introduction.html){target="_blank" rel="nofollow"} for industry and AWS security best practices ## Terminology From 9ab5a400ba3538d2e2f774fc8c27704eff0f27ef Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Fri, 8 Dec 2023 14:56:51 +0000 Subject: [PATCH 027/151] Modifying SAM --- docs/utilities/data_masking.md | 46 ++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index e1d2df1a242..dae66f9e391 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -39,7 +39,7 @@ stateDiagram-v2 * Encrypt, decrypt, or irreversibly mask data with ease * Remove sensitive information in one or more fields within nested data -* Seamless integration with [AWS Encryption SDK](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/introduction.html){target="_blank" rel="nofollow"} for industry and AWS security best practices +* Seamless integration with [AWS Encryption SDK](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/introduction.html){target="_blank"} for industry and AWS security best practices ## Terminology @@ -53,28 +53,24 @@ stateDiagram-v2 ### Install -If not using any encryption services and only masking data, your Lambda function does not need any additional permissions or resources to use this utility. +### Required resources -#### Using AWS Encryption SDK - -To use the AWS Encryption SDK, your Lambda function IAM Role must have the `kms:Decrypt` and `kms:GenerateDataKey` IAM permissions. - -You must also have an AWS KMS key with full read/write permissions. You can create one and learn more on the [AWS KMS console](https://us-east-1.console.aws.amazon.com/kms/home?region=us-east-1#/kms/home){target="_blank" rel="nofollow"}. +=== "AWS Serverless Application Model (SAM) example" + ```yaml hl_lines="11-23 30 33-39 46" + --8<-- "examples/data_masking/sam/template.yaml" + ``` -### Working with nested data +If your Lambda function only masks data without utilizing any encryption services, it requires no additional permissions or library to use this utility. -#### JSON +#### Using AWS Encryption SDK -When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values. If no fields are provided, the entire data object will be masked or encrypted. You can select values of nested keys by using dot notation. +To use the AWS Encryption SDK, your Lambda function IAM Role must have the `kms:Decrypt` and `kms:GenerateDataKey` IAM permissions. - -???+ note - If you're using our example [AWS Serverless Application Model (SAM) template](#using-a-custom-encryption-provider), you will notice we have configured the Lambda function to use a memory size of 1024 MB. We compared the performances of Lambda functions of several different memory sizes and concluded 1024 MB was the most optimal size for this feature. For more information, you can see the full reports of our [load tests](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1730571597) and [traces](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1732060923). - +When using AWS Encryption SDK with AWS KMS keys for data encryption and decryption, it's important to be aware that configuring additional permissions in the KMS Key Policy may be necessary. Learn more about KMS Key Policies [here](https://docs.aws.amazon.com/kms/latest/developerguide/key-policies.html){target="_blank"}. -=== "AWS Serverless Application Model (SAM) example" - ```yaml hl_lines="11-23 30 33-39 46" - --8<-- "examples/data_masking/sam/template.yaml" +=== "data_masking_function_example.py" + ```python hl_lines="8 20-22" + --8<-- "examples/data_masking/src/data_masking_function_example.py" ``` === "input.json" @@ -82,16 +78,22 @@ When using the data masking utility with dictionaries or JSON strings, you can p --8<-- "examples/data_masking/src/large_data_input.json" ``` -=== "data_masking_function_example.py" - ```python hl_lines="8 20-22" - --8<-- "examples/data_masking/src/data_masking_function_example.py" - ``` - === "output.json" ```json --8<-- "examples/data_masking/src/data_masking_function_example_output.json" ``` +### Working with nested data + +#### JSON + +When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values. If no fields are provided, the entire data object will be masked or encrypted. You can select values of nested keys by using dot notation. + + +???+ note + If you're using our example [AWS Serverless Application Model (SAM) template](#using-a-custom-encryption-provider), you will notice we have configured the Lambda function to use a memory size of 1024 MB. We compared the performances of Lambda functions of several different memory sizes and concluded 1024 MB was the most optimal size for this feature. For more information, you can see the full reports of our [load tests](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1730571597) and [traces](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1732060923). + + ### Masking data You can mask data without having to install any encryption library. From 896541343eeacd66028a1f34aa806b03fd47426c Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Fri, 8 Dec 2023 17:04:29 +0100 Subject: [PATCH 028/151] docs: line editing terminology --- docs/utilities/data_masking.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index dae66f9e391..057e2d8b390 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -43,11 +43,11 @@ stateDiagram-v2 ## Terminology -**Masking** replaces sensitive information **irreversibly** with a non-sensitive placeholder. For example, display the last four digits of a credit card number as `"**** **** **** 1234"`. +**Masking** replaces sensitive information **irreversibly** with a non-sensitive placeholder _(`*****`)_. Data is replaced in-memory hence why being irreversible. -**Encrypting** transforms plaintext into ciphertext using an encryption algorithm and a cryptographic key. Encryption can be reversed with the correct decryption key. This allows you to encrypt any PII (personally identifiable information) and make sure only the users with appropirate permissions can decrypt it to view the plaintext. +**Encrypting** transforms plaintext into ciphertext using an encryption algorithm and a cryptographic key. This allows you to encrypt any PII (personally identifiable information) to ensure only authorized personnel can decrypt it. -**Decrypting** reverses the encryption process, converting ciphertext back into its original plaintext using a decryption algorithm and the correct decryption key. +**Decrypting** transforms ciphertext back into plaintext using a decryption algorithm and the correct decryption key. ## Getting started From 0762ba2eaec29cce077ad7bcc12a56dadad4775a Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Fri, 8 Dec 2023 16:11:24 -0800 Subject: [PATCH 029/151] Revise docs --- docs/utilities/data_masking.md | 52 ++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 057e2d8b390..bca35a4df6f 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -89,6 +89,8 @@ When using AWS Encryption SDK with AWS KMS keys for data encryption and decrypti When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values. If no fields are provided, the entire data object will be masked or encrypted. You can select values of nested keys by using dot notation. +fields dict and string + ???+ note If you're using our example [AWS Serverless Application Model (SAM) template](#using-a-custom-encryption-provider), you will notice we have configured the Lambda function to use a memory size of 1024 MB. We compared the performances of Lambda functions of several different memory sizes and concluded 1024 MB was the most optimal size for this feature. For more information, you can see the full reports of our [load tests](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1730571597) and [traces](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1732060923). @@ -96,7 +98,7 @@ When using the data masking utility with dictionaries or JSON strings, you can p ### Masking data -You can mask data without having to install any encryption library. +You can mask data without having to install any encryption library. Masking data will result in the loss of its original type, and the masked data will always be represented as a string. === "input.json" ```json @@ -113,9 +115,11 @@ You can mask data without having to install any encryption library. --8<-- "examples/data_masking/src/mask_data_output.json" ``` -### Encryting and decrypting data +### Encryting data + +In order to encrypt data, you must use either our out-of-the-box integration with the AWS Encryption SDK, or install another encryption provider of your own. Encrypting data will temporarily result in the loss of its original type, and the encrypted data will be represented as a string while it is in ciphertext form. After decryption, the data will regain its original type. -In order to encrypt data, you must use either our out-of-the-box integration with the AWS Encryption SDK, or install another encryption provider of your own. You can still use the masking feature while using any encryption provider. +You can still use the masking feature while using any encryption provider. === "input.json" ```json @@ -123,7 +127,7 @@ In order to encrypt data, you must use either our out-of-the-box integration wit ``` === "getting_started_encrypt_data.py" - ```python hl_lines="3-4 12-13" + ```python hl_lines="3-4 12-13 15" --8<-- "examples/data_masking/src/getting_started_encrypt_data.py" ``` @@ -132,6 +136,22 @@ In order to encrypt data, you must use either our out-of-the-box integration wit --8<-- "examples/data_masking/src/encrypt_data_output.json" ``` +### Decrypting data + +To decrypt data, use the appropriate key to transform ciphertext back into plaintext. Upon decryption, the data will return to its original type. + +Decrypting a ciphertext string will transform the data to its original type. + +=== "encrypted_input.json" + ```json hl_lines="5-7 12" + --8<-- "examples/data_masking/src/encrypt_data_output.json" + ``` + +=== "getting_started_encrypt_data.py" + ```python hl_lines="3-4 12-13 17" + --8<-- "examples/data_masking/src/getting_started_encrypt_data.py" + ``` + === "decrypted_output.json" ```json hl_lines="5-7 12-17" --8<-- "examples/data_masking/src/decrypt_data_output.json" @@ -139,25 +159,21 @@ In order to encrypt data, you must use either our out-of-the-box integration wit ## Advanced -### Adjusting configurations for AWS Encryption SDK +### Providers -You have the option to modify some of the configurations we have set as defaults when connecting to the AWS Encryption SDK. You can find and modify the following values in `utilities/data_masking/provider/kms/aws_encryption_sdk.py`. +#### AWS Encryption SDK -#### Caching +You have the option to modify some of the configurations we have set as defaults when connecting to the AWS Encryption SDK. You can find and modify the following values when initializing the `AwsEncryptionSdkProvider`. - -The `CACHE_CAPACITY` value is currently set to `100`. This value represents the maximum number of entries that can be retained in the local cryptographic materials cache. Please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.caches.local.html){target="_blank" rel="nofollow"} for more information. +| Parameter | Required | Default | Description | +| --------------------------- | ------------------ | ------------------------------------ | -------------------------------------------------------------------------------------------------------- | +| **local_cache_capacity** | | `100` | The maximum number of entries that can be retained in the local cryptographic materials cache | +| **max_cache_age_seconds** | | `300` | The maximum time (in seconds) that a cache entry may be kept in the cache | +| **max_messages_encrypted** | | `200` | The maximum number of messages that may be encrypted under a cache entry -The `MAX_CACHE_AGE_SECONDS` value is currently set to `300`. This represents the maximum time (in seconds) that a cache entry may be kept in the cache. Please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.materials_managers.caching.html#module-aws_encryption_sdk.materials_managers.caching){target="_blank" rel="nofollow"} for more information about this. - - -#### Limit messages - - -The `MAX_MESSAGES_ENCRYPTED` value is currently set to `200`. This represents the maximum number of messages that may be encrypted under a cache entry. Please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.materials_managers.caching.html#module-aws_encryption_sdk.materials_managers.caching){target="_blank" rel="nofollow"} for more information about this. - +For more information about the parameters for this provider, please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.materials_managers.caching.html#aws_encryption_sdk.materials_managers.caching.CachingCryptoMaterialsManager){target="_blank" rel="nofollow"}. -### Creating your own provider +#### Creating your own provider !!! info "In Q1 2024, we will implement support for bringing your own encryption provider." From 3b49271a18e4e14e9a668baf603664110d0e1f32 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Mon, 11 Dec 2023 21:27:42 +0000 Subject: [PATCH 030/151] Modifying SAM --- docs/utilities/data_masking.md | 4 +- examples/data_masking/sam/template.yaml | 76 +++++++++++++------------ 2 files changed, 42 insertions(+), 38 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index bca35a4df6f..8361983b89a 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -85,6 +85,8 @@ When using AWS Encryption SDK with AWS KMS keys for data encryption and decrypti ### Working with nested data +!!! info "In Q1 2024, we plan to introduce support for Pydantic models, Dataclasses, and standard Python classes." + #### JSON When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values. If no fields are provided, the entire data object will be masked or encrypted. You can select values of nested keys by using dot notation. @@ -175,7 +177,7 @@ For more information about the parameters for this provider, please see the [AWS #### Creating your own provider -!!! info "In Q1 2024, we will implement support for bringing your own encryption provider." +!!! info "In Q1 2024, we plan to add support for bringing your own encryption provider." ## Testing your code diff --git a/examples/data_masking/sam/template.yaml b/examples/data_masking/sam/template.yaml index 4b4c8f9a34a..660df5c10d2 100644 --- a/examples/data_masking/sam/template.yaml +++ b/examples/data_masking/sam/template.yaml @@ -8,50 +8,52 @@ Globals: # https://docs.aws.amazon.com/serverless-application-model/latest/devel Timeout: 5 Runtime: python3.11 Tracing: Active + Environment: + Variables: + POWERTOOLS_SERVICE_NAME: PowertoolsHelloWorld + POWERTOOLS_METRICS_NAMESPACE: Powertools + LOG_LEVEL: INFO + KMS_KEY_ARN: !GetAtt DataMaskingKMSKey.Arn + + Resources: - MyKMSKey: - Type: AWS::KMS::Key - Properties: - Enabled: true - KeyPolicy: - Version: 2012-10-17 - Statement: - - Effect: Allow - Action: kms:* - Resource: "*" - Principal: - AWS: !Join [ "", [ "arn:aws:iam::", !Ref "AWS::AccountId", ":root" ] ] + # Lambda Function DataMaskingFunctionExample: - Type: AWS::Serverless::Function # More info about Function Resource: https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-resource-function.html + Type: AWS::Serverless::Function Properties: Handler: data_masking_function_example.lambda_handler CodeUri: ../src Description: Data Masking Function Example + # Cryptographic operations demand more memory usage. + # It is recommended to allocate a minimum of 1024MB of memory to your Lambda function + # when utilizing the DataMasking Utility. MemorySize: 1024 Architectures: - x86_64 - Policies: - Statement: - - Effect: Allow - Action: - - kms:Decrypt - - kms:GenerateDataKey - Resource: !GetAtt MyKMSKey.Arn - Tracing: Active - Environment: - Variables: - POWERTOOLS_SERVICE_NAME: PowertoolsHelloWorld - POWERTOOLS_METRICS_NAMESPACE: Powertools - LOG_LEVEL: INFO - KMS_KEY_ARN: !GetAtt MyKMSKey.Arn - Tags: - LambdaPowertools: python - -Outputs: - KMSKeyArn: - Description: ARN of the KMS Key - Value: !GetAtt MyKMSKey.Arn - DataMaskingFunctionExample: - Description: Data Masking Function Example - Value: !GetAtt DataMaskingFunctionExample.Arn + # KMS KEY + DataMaskingKMSKey: + Type: 'AWS::KMS::Key' + Properties: + Description: KMS Key for Lambda - DataMasking + KeyPolicy: + Version: '2012-10-17' + Id: key-default-1 + Statement: + # To ensure key management security, a KMS Key should have at least one administrator. + # In this example, the root account is granted administrator permissions. + # In a production environment, it is recommended to configure specific users or roles for enhanced security. + - Effect: Allow + Principal: + AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:root' + Action: 'kms:*' + Resource: '*' + # KMS utilizes resource policies, allowing direct access grant to the Lambda Role on the KMS Key. + # For more details: https://docs.aws.amazon.com/kms/latest/developerguide/key-policies.html + - Effect: Allow + Principal: + AWS: !GetAtt DataMaskingFunctionExampleRole.Arn # Permission for the Lambda role + Action: + - kms:Decrypt + - kms:GenerateDataKey + Resource: "*" From 847c3262050c9a359e0c4785da69ffd97b50f8b1 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Mon, 11 Dec 2023 21:34:17 +0000 Subject: [PATCH 031/151] Removing itsdangerous dependency - we are not using --- docs/utilities/data_masking.md | 2 +- poetry.lock | 13 +------------ pyproject.toml | 1 - 3 files changed, 2 insertions(+), 14 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 8361983b89a..8d27804872b 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -56,7 +56,7 @@ stateDiagram-v2 ### Required resources === "AWS Serverless Application Model (SAM) example" - ```yaml hl_lines="11-23 30 33-39 46" + ```yaml hl_lines="16 30 54-55" --8<-- "examples/data_masking/sam/template.yaml" ``` diff --git a/poetry.lock b/poetry.lock index 7e25fda50cc..7d8bac68420 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1325,17 +1325,6 @@ pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib" plugins = ["setuptools"] requirements-deprecated-finder = ["pip-api", "pipreqs"] -[[package]] -name = "itsdangerous" -version = "2.1.2" -description = "Safely pass data to untrusted environments and back." -optional = false -python-versions = ">=3.7" -files = [ - {file = "itsdangerous-2.1.2-py3-none-any.whl", hash = "sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44"}, - {file = "itsdangerous-2.1.2.tar.gz", hash = "sha256:5dbbc68b317e5e42f327f9021763545dc3fc3bfe22e6deb96aaf1fc38874156a"}, -] - [[package]] name = "jinja2" version = "3.1.2" @@ -3223,4 +3212,4 @@ validation = ["fastjsonschema"] [metadata] lock-version = "2.0" python-versions = "^3.7.4" -content-hash = "6426cacfd613b4ffc5a4c8bfd6971a6d2f22c37dc834c5f793fcedac50bde5f7" +content-hash = "f2ab5f7bf9528b67c376a73ab98b77c67042c86d189908eabc804454c33c87db" diff --git a/pyproject.toml b/pyproject.toml index e7f4bcc9196..ba64fab3e95 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,7 +109,6 @@ sentry-sdk = "^1.22.2" ruff = ">=0.0.272,<0.1.8" retry2 = "^0.9.5" pytest-socket = "^0.6.0" -itsdangerous = "^2.1.2" [tool.coverage.run] source = ["aws_lambda_powertools"] From ed768ca2f944f59ce8cd8919a570be48c4a70f08 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Mon, 11 Dec 2023 21:46:05 +0000 Subject: [PATCH 032/151] Fixing mypy errors --- .../src/custom_data_masking_provider.py | 20 ------------------- .../src/data_masking_function_example.py | 2 +- .../src/working_with_own_provider.py | 14 ------------- mypy.ini | 1 - 4 files changed, 1 insertion(+), 36 deletions(-) delete mode 100644 examples/data_masking/src/custom_data_masking_provider.py delete mode 100644 examples/data_masking/src/working_with_own_provider.py diff --git a/examples/data_masking/src/custom_data_masking_provider.py b/examples/data_masking/src/custom_data_masking_provider.py deleted file mode 100644 index e01c47bb683..00000000000 --- a/examples/data_masking/src/custom_data_masking_provider.py +++ /dev/null @@ -1,20 +0,0 @@ -from itsdangerous.url_safe import URLSafeSerializer - -from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider - - -class MyCustomEncryption(BaseProvider): - def __init__(self, secret): - super().__init__() - self.secret = secret - self.serializer = URLSafeSerializer(self.secret) - - def encrypt(self, data: str) -> str: - if data is None: - return data - return self.serializer.dumps(data) - - def decrypt(self, data: str) -> str: - if data is None: - return data - return self.serializer.loads(data) diff --git a/examples/data_masking/src/data_masking_function_example.py b/examples/data_masking/src/data_masking_function_example.py index 98ec2d91bb2..96ed6d17f71 100644 --- a/examples/data_masking/src/data_masking_function_example.py +++ b/examples/data_masking/src/data_masking_function_example.py @@ -5,7 +5,7 @@ from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider from aws_lambda_powertools.utilities.typing import LambdaContext -KMS_KEY_ARN = os.getenv("KMS_KEY_ARN") +KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") tracer = Tracer() logger = Logger() diff --git a/examples/data_masking/src/working_with_own_provider.py b/examples/data_masking/src/working_with_own_provider.py deleted file mode 100644 index fda62eae7e9..00000000000 --- a/examples/data_masking/src/working_with_own_provider.py +++ /dev/null @@ -1,14 +0,0 @@ -from aws_lambda_powertools.utilities._data_masking.base import DataMasking -from examples.data_masking.src.custom_data_masking_provider import MyCustomEncryption - - -def lambda_handler(event, context): - - data = event["body"] - - encryption_provider = MyCustomEncryption(secret="secret-key") - data_masker = DataMasking(provider=encryption_provider) - - encrypted = data_masker.encrypt(data, fields=["email", "address.street", "company_address"]) - - data_masker.decrypt(data=encrypted, fields=["email", "address.street", "company_address"]) diff --git a/mypy.ini b/mypy.ini index 36df3cebfdc..cb2d3ce2443 100644 --- a/mypy.ini +++ b/mypy.ini @@ -8,7 +8,6 @@ show_column_numbers = True show_error_codes = True show_error_context = True disable_error_code = annotation-unchecked -exclude = examples/data_masking/src [mypy-jmespath] ignore_missing_imports=True From 243e8cb1863085027c462dfde761ddfa93548dca Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Mon, 11 Dec 2023 22:01:06 +0000 Subject: [PATCH 033/151] Adding more information --- docs/utilities/data_masking.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 8d27804872b..fb6f7c6d8ef 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -53,6 +53,10 @@ stateDiagram-v2 ### Install +Before you start, you need to create a KMS key to encrypt and decrypt your data - your Lambda function will need read and write access to it. + +!!! note "If your Lambda function only masks data without utilizing any encryption services, it requires no additional permissions or library to use this utility." + ### Required resources === "AWS Serverless Application Model (SAM) example" @@ -60,8 +64,6 @@ stateDiagram-v2 --8<-- "examples/data_masking/sam/template.yaml" ``` -If your Lambda function only masks data without utilizing any encryption services, it requires no additional permissions or library to use this utility. - #### Using AWS Encryption SDK To use the AWS Encryption SDK, your Lambda function IAM Role must have the `kms:Decrypt` and `kms:GenerateDataKey` IAM permissions. From b6d0470ae5e113721ff6077e977cdc0ba4d5d070 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Mon, 11 Dec 2023 15:09:28 -0800 Subject: [PATCH 034/151] Added more info about fields param --- docs/utilities/data_masking.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index fb6f7c6d8ef..ac822225521 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -91,9 +91,9 @@ When using AWS Encryption SDK with AWS KMS keys for data encryption and decrypti #### JSON -When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values. If no fields are provided, the entire data object will be masked or encrypted. You can select values of nested keys by using dot notation. +When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values to the `fields` parameter. You can select values of nested keys by using dot notation. The `fields` parameter only supports selecting values using basic dot notation and does not provide support for wildcards or any other matching expressions. If a `fields` parameter is provided, then the rest of the dictionary or JSON string will remain unchanged, and only the values corresponding to the keys given will be masked (or encrypted/decrypted). -fields dict and string +If `fields` is not provided, the entire data object will be masked (or encrypted/decrypted). ???+ note @@ -121,7 +121,7 @@ You can mask data without having to install any encryption library. Masking data ### Encryting data -In order to encrypt data, you must use either our out-of-the-box integration with the AWS Encryption SDK, or install another encryption provider of your own. Encrypting data will temporarily result in the loss of its original type, and the encrypted data will be represented as a string while it is in ciphertext form. After decryption, the data will regain its original type. +In order to encrypt data, you must use either our out-of-the-box integration with the AWS Encryption SDK, or install another encryption provider of your own. Encrypting data will temporarily result in the loss of the data's original type, as the encrypted data will be represented as a string while it is in ciphertext form. After decryption, the data will regain its original type. You can still use the masking feature while using any encryption provider. @@ -135,7 +135,7 @@ You can still use the masking feature while using any encryption provider. --8<-- "examples/data_masking/src/getting_started_encrypt_data.py" ``` -=== "encrypted_output.json" +=== "output.json" ```json hl_lines="5-7 12" --8<-- "examples/data_masking/src/encrypt_data_output.json" ``` @@ -146,7 +146,7 @@ To decrypt data, use the appropriate key to transform ciphertext back into plain Decrypting a ciphertext string will transform the data to its original type. -=== "encrypted_input.json" +=== "input.json" ```json hl_lines="5-7 12" --8<-- "examples/data_masking/src/encrypt_data_output.json" ``` @@ -156,7 +156,7 @@ Decrypting a ciphertext string will transform the data to its original type. --8<-- "examples/data_masking/src/getting_started_encrypt_data.py" ``` -=== "decrypted_output.json" +=== "output.json" ```json hl_lines="5-7 12-17" --8<-- "examples/data_masking/src/decrypt_data_output.json" ``` From 7111fba8c054fb49d4b9cd8948769c50ff8d055a Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Tue, 12 Dec 2023 14:24:02 +0000 Subject: [PATCH 035/151] Making error message actionable --- .../utilities/_data_masking/base.py | 5 ++- .../utilities/_data_masking/exceptions.py | 43 +++++++++++++++++++ .../provider/kms/aws_encryption_sdk.py | 19 +++++++- .../data_masking/test_unit_data_masking.py | 3 +- 4 files changed, 65 insertions(+), 5 deletions(-) create mode 100644 aws_lambda_powertools/utilities/_data_masking/exceptions.py diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index 211e44c3759..7281a34d19b 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -1,6 +1,7 @@ import json from typing import Optional, Union +from aws_lambda_powertools.utilities._data_masking.exceptions import DataMaskingUnsupportedTypeError from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider @@ -141,8 +142,8 @@ def _apply_action_to_fields( # Turn back into dict so can parse it my_dict_parsed = json.loads(my_dict_parsed) else: - raise TypeError( - f"Unsupported data type for 'data' parameter. Expected a traversable type, but got {type(data)}.", + raise DataMaskingUnsupportedTypeError( + f"Unsupported data type. Expected a traversable type (dict or str), but got {type(data)}.", ) # For example: ['a.b.c'] in ['a.b.c', 'a.x.y'] diff --git a/aws_lambda_powertools/utilities/_data_masking/exceptions.py b/aws_lambda_powertools/utilities/_data_masking/exceptions.py new file mode 100644 index 00000000000..0c08cc18d0c --- /dev/null +++ b/aws_lambda_powertools/utilities/_data_masking/exceptions.py @@ -0,0 +1,43 @@ +""" +Idempotency errors +""" + + +from typing import Optional, Union + + +class BaseError(Exception): + """ + Base error class that overwrites the way exception and extra information is printed. + See https://github.com/aws-powertools/powertools-lambda-python/issues/1772 + """ + + def __init__(self, *args: Optional[Union[str, Exception]]): + self.message = str(args[0]) if args else "" + self.details = "".join(str(arg) for arg in args[1:]) if args[1:] else None + + def __str__(self): + """ + Return all arguments formatted or original message + """ + if self.message and self.details: + return f"{self.message} - ({self.details})" + return self.message + + +class DataMaskingUnsupportedTypeError(BaseError): + """ + UnsupportedType Error + """ + + +class DataMaskingDecryptKeyError(BaseError): + """ + Decrypting with an invalid AWS KMS Key ARN. + """ + + +class DataMaskingEncryptKeyError(BaseError): + """ + Encrypting with an invalid AWS KMS Key ARN. + """ diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py index a895f8de0ac..ab7e25944c2 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py @@ -10,6 +10,7 @@ LocalCryptoMaterialsCache, StrictAwsKmsMasterKeyProvider, ) +from aws_encryption_sdk.exceptions import DecryptKeyError, GenerateKeyError from aws_lambda_powertools.shared.user_agent import register_feature_to_botocore_session from aws_lambda_powertools.utilities._data_masking.constants import ( @@ -17,6 +18,10 @@ MAX_CACHE_AGE_SECONDS, MAX_MESSAGES_ENCRYPTED, ) +from aws_lambda_powertools.utilities._data_masking.exceptions import ( + DataMaskingDecryptKeyError, + DataMaskingEncryptKeyError, +) from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider @@ -80,10 +85,20 @@ def __init__( ) def encrypt(self, data: bytes | str | Dict | int, **provider_options) -> str: - return self._key_provider.encrypt(data=data, **provider_options) + try: + return self._key_provider.encrypt(data=data, **provider_options) + except GenerateKeyError: + raise DataMaskingEncryptKeyError( + "Failed to encrypt data - Please make sure you are using a valid Symmetric AWS MSK Key ARN", + ) def decrypt(self, data: str, **provider_options) -> Any: - return self._key_provider.decrypt(data=data, **provider_options) + try: + return self._key_provider.decrypt(data=data, **provider_options) + except DecryptKeyError: + raise DataMaskingDecryptKeyError( + "Failed to decrypt data - Please make sure you are using a valid Symmetric AWS MSK Key ARN", + ) class KMSKeyProvider: diff --git a/tests/unit/data_masking/test_unit_data_masking.py b/tests/unit/data_masking/test_unit_data_masking.py index 4a92a668d73..6379fbdb192 100644 --- a/tests/unit/data_masking/test_unit_data_masking.py +++ b/tests/unit/data_masking/test_unit_data_masking.py @@ -4,6 +4,7 @@ from aws_lambda_powertools.utilities._data_masking.base import DataMasking from aws_lambda_powertools.utilities._data_masking.constants import DATA_MASKING_STRING +from aws_lambda_powertools.utilities._data_masking.exceptions import DataMaskingUnsupportedTypeError @pytest.fixture @@ -153,7 +154,7 @@ def test_parsing_unsupported_data_type(data_masker): # GIVEN an initialization of the DataMasking class # WHEN attempting to pass in a list of fields with input data that is not a dict - with pytest.raises(TypeError): + with pytest.raises(DataMaskingUnsupportedTypeError): # THEN the result is a TypeError data_masker.mask(42, ["this.field"]) From 08fcadca0ec421bb54075d1ef6d178c4f2ef274b Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Tue, 12 Dec 2023 14:24:54 +0000 Subject: [PATCH 036/151] Making error message actionable --- .../utilities/_data_masking/exceptions.py | 33 ++----------------- 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/exceptions.py b/aws_lambda_powertools/utilities/_data_masking/exceptions.py index 0c08cc18d0c..4143e909c3d 100644 --- a/aws_lambda_powertools/utilities/_data_masking/exceptions.py +++ b/aws_lambda_powertools/utilities/_data_masking/exceptions.py @@ -1,43 +1,16 @@ -""" -Idempotency errors -""" - - -from typing import Optional, Union - - -class BaseError(Exception): - """ - Base error class that overwrites the way exception and extra information is printed. - See https://github.com/aws-powertools/powertools-lambda-python/issues/1772 - """ - - def __init__(self, *args: Optional[Union[str, Exception]]): - self.message = str(args[0]) if args else "" - self.details = "".join(str(arg) for arg in args[1:]) if args[1:] else None - - def __str__(self): - """ - Return all arguments formatted or original message - """ - if self.message and self.details: - return f"{self.message} - ({self.details})" - return self.message - - -class DataMaskingUnsupportedTypeError(BaseError): +class DataMaskingUnsupportedTypeError(Exception): """ UnsupportedType Error """ -class DataMaskingDecryptKeyError(BaseError): +class DataMaskingDecryptKeyError(Exception): """ Decrypting with an invalid AWS KMS Key ARN. """ -class DataMaskingEncryptKeyError(BaseError): +class DataMaskingEncryptKeyError(Exception): """ Encrypting with an invalid AWS KMS Key ARN. """ From 3dc9d57d7b18c07e8ac1f8e483c91008979d59c4 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Tue, 12 Dec 2023 16:08:04 +0000 Subject: [PATCH 037/151] Making error message actionable --- .../utilities/_data_masking/exceptions.py | 12 +++++ .../provider/kms/aws_encryption_sdk.py | 50 +++++++++++-------- docs/utilities/data_masking.md | 3 +- mypy.ini | 2 +- 4 files changed, 44 insertions(+), 23 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/exceptions.py b/aws_lambda_powertools/utilities/_data_masking/exceptions.py index 4143e909c3d..b43b4b37554 100644 --- a/aws_lambda_powertools/utilities/_data_masking/exceptions.py +++ b/aws_lambda_powertools/utilities/_data_masking/exceptions.py @@ -14,3 +14,15 @@ class DataMaskingEncryptKeyError(Exception): """ Encrypting with an invalid AWS KMS Key ARN. """ + + +class DataMaskingDecryptValueError(Exception): + """ + Decrypting an invalid field. + """ + + +class DataMaskingContextMismatchError(Exception): + """ + Decrypting an invalid field. + """ diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py index ab7e25944c2..2f541500f59 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py @@ -1,6 +1,7 @@ from __future__ import annotations import base64 +from binascii import Error from typing import Any, Callable, Dict, List import botocore @@ -10,7 +11,7 @@ LocalCryptoMaterialsCache, StrictAwsKmsMasterKeyProvider, ) -from aws_encryption_sdk.exceptions import DecryptKeyError, GenerateKeyError +from aws_encryption_sdk.exceptions import DecryptKeyError, GenerateKeyError, NotSupportedError from aws_lambda_powertools.shared.user_agent import register_feature_to_botocore_session from aws_lambda_powertools.utilities._data_masking.constants import ( @@ -19,18 +20,14 @@ MAX_MESSAGES_ENCRYPTED, ) from aws_lambda_powertools.utilities._data_masking.exceptions import ( + DataMaskingContextMismatchError, DataMaskingDecryptKeyError, + DataMaskingDecryptValueError, DataMaskingEncryptKeyError, ) from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider -class ContextMismatchError(Exception): - def __init__(self, key): - super().__init__(f"Encryption Context does not match expected value for key: {key}") - self.key = key - - class AwsEncryptionSdkProvider(BaseProvider): """ The AwsEncryptionSdkProvider is used as a provider for the DataMasking class. @@ -89,16 +86,11 @@ def encrypt(self, data: bytes | str | Dict | int, **provider_options) -> str: return self._key_provider.encrypt(data=data, **provider_options) except GenerateKeyError: raise DataMaskingEncryptKeyError( - "Failed to encrypt data - Please make sure you are using a valid Symmetric AWS MSK Key ARN", + "Failed to encrypt data. Please ensure you are using a valid Symmetric AWS KMS Key ARN, not KMS Key ID or alias.", # noqa E501 ) def decrypt(self, data: str, **provider_options) -> Any: - try: - return self._key_provider.decrypt(data=data, **provider_options) - except DecryptKeyError: - raise DataMaskingDecryptKeyError( - "Failed to decrypt data - Please make sure you are using a valid Symmetric AWS MSK Key ARN", - ) + return self._key_provider.decrypt(data=data, **provider_options) class KMSKeyProvider: @@ -174,19 +166,35 @@ def decrypt(self, data: str, **provider_options) -> Any: ciphertext : bytes The decrypted data in bytes """ - ciphertext_decoded = base64.b64decode(data) + try: + ciphertext_decoded = base64.b64decode(data) + except Error: + raise DataMaskingDecryptValueError( + "Data decryption failed. Please ensure that you are using a field that was previously encrypted.", + ) expected_context = provider_options.pop("encryption_context", {}) - ciphertext, decryptor_header = self.client.decrypt( - source=ciphertext_decoded, - key_provider=self.key_provider, - **provider_options, - ) + try: + ciphertext, decryptor_header = self.client.decrypt( + source=ciphertext_decoded, + key_provider=self.key_provider, + **provider_options, + ) + except DecryptKeyError: + raise DataMaskingDecryptKeyError( + "Failed to decrypt data - Please ensure you are using a valid Symmetric AWS KMS Key ARN, not KMS Key ID or alias.", # noqa E501 + ) + except (TypeError, NotSupportedError): + raise DataMaskingDecryptValueError( + "Data decryption failed. Please ensure that you are using a field that was previously encrypted.", + ) for key, value in expected_context.items(): if decryptor_header.encryption_context.get(key) != value: - raise ContextMismatchError(key) + raise DataMaskingContextMismatchError( + f"Encryption Context does not match expected value for key: {key}", + ) ciphertext = self.json_deserializer(ciphertext) return ciphertext diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index ac822225521..335bd43ccce 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -90,8 +90,9 @@ When using AWS Encryption SDK with AWS KMS keys for data encryption and decrypti !!! info "In Q1 2024, we plan to introduce support for Pydantic models, Dataclasses, and standard Python classes." #### JSON - + When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values to the `fields` parameter. You can select values of nested keys by using dot notation. The `fields` parameter only supports selecting values using basic dot notation and does not provide support for wildcards or any other matching expressions. If a `fields` parameter is provided, then the rest of the dictionary or JSON string will remain unchanged, and only the values corresponding to the keys given will be masked (or encrypted/decrypted). + If `fields` is not provided, the entire data object will be masked (or encrypted/decrypted). diff --git a/mypy.ini b/mypy.ini index cb2d3ce2443..b32cfe4aabe 100644 --- a/mypy.ini +++ b/mypy.ini @@ -12,7 +12,7 @@ disable_error_code = annotation-unchecked [mypy-jmespath] ignore_missing_imports=True -[mypy-aws_encryption_sdk] +[mypy-aws_encryption_sdk.*] ignore_missing_imports=True [mypy-sentry_sdk] From 410ed3b8014800a9ecdbe69749f03d027f6d2628 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 12 Dec 2023 16:36:06 +0100 Subject: [PATCH 038/151] docs: add first sequence diagram for operations --- docs/utilities/data_masking.md | 35 +++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 335bd43ccce..330c9ad3ea4 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -170,11 +170,11 @@ Decrypting a ciphertext string will transform the data to its original type. You have the option to modify some of the configurations we have set as defaults when connecting to the AWS Encryption SDK. You can find and modify the following values when initializing the `AwsEncryptionSdkProvider`. -| Parameter | Required | Default | Description | -| --------------------------- | ------------------ | ------------------------------------ | -------------------------------------------------------------------------------------------------------- | -| **local_cache_capacity** | | `100` | The maximum number of entries that can be retained in the local cryptographic materials cache | -| **max_cache_age_seconds** | | `300` | The maximum time (in seconds) that a cache entry may be kept in the cache | -| **max_messages_encrypted** | | `200` | The maximum number of messages that may be encrypted under a cache entry +| Parameter | Required | Default | Description | +| -------------------------- | -------- | ------- | --------------------------------------------------------------------------------------------- | +| **local_cache_capacity** | | `100` | The maximum number of entries that can be retained in the local cryptographic materials cache | +| **max_cache_age_seconds** | | `300` | The maximum time (in seconds) that a cache entry may be kept in the cache | +| **max_messages_encrypted** | | `200` | The maximum number of messages that may be encrypted under a cache entry | For more information about the parameters for this provider, please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.materials_managers.caching.html#aws_encryption_sdk.materials_managers.caching.CachingCryptoMaterialsManager){target="_blank" rel="nofollow"}. @@ -182,6 +182,31 @@ For more information about the parameters for this provider, please see the [AWS !!! info "In Q1 2024, we plan to add support for bringing your own encryption provider." +### Data masking request flow + +The following sequence diagrams explain how `DataMasking` behaves under different scenarios. + +#### Masking operation + +Masking operations occur in-memory and we cannot recover the original value. + +
+```mermaid +sequenceDiagram + autonumber + participant Client + participant Lambda + participant DataMasking as Data Masking (in memory) + Client->>Lambda: Invoke (event) + Lambda->>DataMasking: .mask(data) + DataMasking->>DataMasking: replaces data with ***** + Note over Lambda,DataMasking: No encryption providers involved. + DataMasking->>Lambda: return masked data + Lambda-->>Client: Return response +``` +Simple masking operation +
+ ## Testing your code For unit testing your applications, you can mock the calls to the data masking utility to avoid calling AWS APIs. This can be achieved in a number of ways - in this example, we use the pytest monkeypatch fixture to patch the `data_masking.decrypt` method. From 42a682b8e3c10925190fe5d7172bee2468b69984 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 12 Dec 2023 17:34:12 +0100 Subject: [PATCH 039/151] docs: add encrypt operations sequence diagram --- docs/utilities/data_masking.md | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 330c9ad3ea4..77548ff49f0 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -186,7 +186,7 @@ For more information about the parameters for this provider, please see the [AWS The following sequence diagrams explain how `DataMasking` behaves under different scenarios. -#### Masking operation +#### Mask operation Masking operations occur in-memory and we cannot recover the original value. @@ -201,12 +201,37 @@ sequenceDiagram Lambda->>DataMasking: .mask(data) DataMasking->>DataMasking: replaces data with ***** Note over Lambda,DataMasking: No encryption providers involved. - DataMasking->>Lambda: return masked data + DataMasking->>Lambda: data masked Lambda-->>Client: Return response ``` Simple masking operation +#### Encrypt operation with Encryption SDK (KMS) + +We call KMS to generate an unique data key once. It allows us to encrypt this key in-memory, and use it for multiple operations to improve performance and prevent throttling. + +> This is known as [envelope encryption](https://docs.aws.amazon.com/kms/latest/developerguide/concepts.html#enveloping){target="_blank"}. + +
+```mermaid +sequenceDiagram + autonumber + participant Client + participant Lambda + participant DataMasking as Data Masking + participant EncryptionProvider as Encryption Provider + Client->>Lambda: Invoke (event) + Lambda->>DataMasking: encrypt(data) + DataMasking->>EncryptionProvider: Request unique data key (kms:GenerateDataKey) + DataMasking->>DataMasking: Encrypt data key with wrapping key (in-memory) + DataMasking->>DataMasking: Encrypt data with newly encrypted key (in-memory) + DataMasking->>Lambda: ciphertext containing encrypted data + Lambda-->>Client: Return response +``` +Encrypting operation using envelope encryption. +
+ ## Testing your code For unit testing your applications, you can mock the calls to the data masking utility to avoid calling AWS APIs. This can be achieved in a number of ways - in this example, we use the pytest monkeypatch fixture to patch the `data_masking.decrypt` method. From abe27f114342759c65c2a0c356d717bc7fff2372 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 12 Dec 2023 17:35:41 +0100 Subject: [PATCH 040/151] docs: remove dot notation from mask operation --- docs/utilities/data_masking.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 77548ff49f0..62085daab3e 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -198,7 +198,7 @@ sequenceDiagram participant Lambda participant DataMasking as Data Masking (in memory) Client->>Lambda: Invoke (event) - Lambda->>DataMasking: .mask(data) + Lambda->>DataMasking: mask(data) DataMasking->>DataMasking: replaces data with ***** Note over Lambda,DataMasking: No encryption providers involved. DataMasking->>Lambda: data masked From a265b644754478c8b49ec88b793dc73f7ef92521 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Tue, 12 Dec 2023 16:39:01 +0000 Subject: [PATCH 041/151] Adding typing --- .../utilities/_data_masking/base.py | 6 ++--- .../provider/kms/aws_encryption_sdk.py | 22 +++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index 7281a34d19b..0b395fdbb07 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -1,5 +1,5 @@ import json -from typing import Optional, Union +from typing import Callable, Optional, Union from aws_lambda_powertools.utilities._data_masking.exceptions import DataMaskingUnsupportedTypeError from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider @@ -54,7 +54,7 @@ def decrypt(self, data, fields=None, **provider_options): def mask(self, data, fields=None, **provider_options): return self._apply_action(data, fields, self.provider.mask, **provider_options) - def _apply_action(self, data, fields, action, **provider_options): + def _apply_action(self, data, fields, action: Callable, **provider_options): """ Helper method to determine whether to apply a given action to the entire input data or to specific fields if the 'fields' argument is specified. @@ -84,7 +84,7 @@ def _apply_action_to_fields( self, data: Union[dict, str], fields: list, - action, + action: Callable, **provider_options, ) -> Union[dict, str]: """ diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py index 2f541500f59..ca56e018769 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py @@ -82,12 +82,7 @@ def __init__( ) def encrypt(self, data: bytes | str | Dict | int, **provider_options) -> str: - try: - return self._key_provider.encrypt(data=data, **provider_options) - except GenerateKeyError: - raise DataMaskingEncryptKeyError( - "Failed to encrypt data. Please ensure you are using a valid Symmetric AWS KMS Key ARN, not KMS Key ID or alias.", # noqa E501 - ) + return self._key_provider.encrypt(data=data, **provider_options) def decrypt(self, data: str, **provider_options) -> Any: return self._key_provider.decrypt(data=data, **provider_options) @@ -142,11 +137,16 @@ def encrypt(self, data: bytes | str | Dict | float, **provider_options) -> str: The encrypted data, as a base64-encoded string. """ data_encoded = self.json_serializer(data) - ciphertext, _ = self.client.encrypt( - source=data_encoded, - materials_manager=self.cache_cmm, - **provider_options, - ) + try: + ciphertext, _ = self.client.encrypt( + source=data_encoded, + materials_manager=self.cache_cmm, + **provider_options, + ) + except GenerateKeyError: + raise DataMaskingEncryptKeyError( + "Failed to encrypt data. Please ensure you are using a valid Symmetric AWS KMS Key ARN, not KMS Key ID or alias.", # noqa E501 + ) ciphertext = base64.b64encode(ciphertext).decode() return ciphertext From e16833f660eb9b437a930d64731e00b1c9042ed0 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Tue, 12 Dec 2023 16:17:55 -0800 Subject: [PATCH 042/151] Fixes for SAM template comments --- examples/data_masking/sam/template.yaml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/examples/data_masking/sam/template.yaml b/examples/data_masking/sam/template.yaml index 660df5c10d2..449e40c324d 100644 --- a/examples/data_masking/sam/template.yaml +++ b/examples/data_masking/sam/template.yaml @@ -18,6 +18,9 @@ Globals: # https://docs.aws.amazon.com/serverless-application-model/latest/devel Resources: # Lambda Function + # This function is mainly for documentation purposes. In prod, we recommend you split up the encrypt and decrypt + # calls, so that one function can act as the encryption proxy via HTTP requests, data pipeline, etc. + # while authorized personnel can call decrypt from scripts or a separate function. DataMaskingFunctionExample: Type: AWS::Serverless::Function Properties: @@ -25,7 +28,7 @@ Resources: CodeUri: ../src Description: Data Masking Function Example # Cryptographic operations demand more memory usage. - # It is recommended to allocate a minimum of 1024MB of memory to your Lambda function + # We recommend to allocate a minimum of 1024MB of memory to your Lambda function # when utilizing the DataMasking Utility. MemorySize: 1024 Architectures: @@ -53,6 +56,10 @@ Resources: - Effect: Allow Principal: AWS: !GetAtt DataMaskingFunctionExampleRole.Arn # Permission for the Lambda role + # These IAM permissions are necessary for the envelope encryption that AWS Encryption SDK uses. + # Envelope encryption randomly generates a data key and encrypts that data key along with your data, + # so we encrypt in-memory to prevent too many calls to KMS to reduce latency. + # For more details: https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/concepts.html#envelope-encryption Action: - kms:Decrypt - kms:GenerateDataKey From 0cb967a3563a722a9c220a2a4c20851d9df934d6 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Tue, 12 Dec 2023 16:47:35 -0800 Subject: [PATCH 043/151] Add return types for mask() --- aws_lambda_powertools/utilities/_data_masking/base.py | 4 ++-- .../utilities/_data_masking/provider/base.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index 0b395fdbb07..fc6f827458d 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -1,5 +1,5 @@ import json -from typing import Callable, Optional, Union +from typing import Callable, Iterable, Optional, Union from aws_lambda_powertools.utilities._data_masking.exceptions import DataMaskingUnsupportedTypeError from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider @@ -51,7 +51,7 @@ def encrypt(self, data, fields=None, **provider_options): def decrypt(self, data, fields=None, **provider_options): return self._apply_action(data, fields, self.provider.decrypt, **provider_options) - def mask(self, data, fields=None, **provider_options): + def mask(self, data, fields=None, **provider_options) -> Union[str, Iterable]: return self._apply_action(data, fields, self.provider.mask, **provider_options) def _apply_action(self, data, fields, action: Callable, **provider_options): diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/base.py b/aws_lambda_powertools/utilities/_data_masking/provider/base.py index a293c6aff9a..f1488fe542d 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/base.py @@ -1,5 +1,5 @@ import json -from typing import Any +from typing import Any, Iterable, Union from aws_lambda_powertools.utilities._data_masking.constants import DATA_MASKING_STRING @@ -26,7 +26,7 @@ def encrypt(self, data) -> str: def decrypt(self, data) -> Any: raise NotImplementedError("Subclasses must implement decrypt()") - def mask(self, data) -> Any: + def mask(self, data) -> Union[str, Iterable]: if isinstance(data, (str, dict, bytes)): return DATA_MASKING_STRING elif isinstance(data, (list, tuple, set)): From e3c62fdf2702372007dc62247a7769e43497c3e7 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Thu, 14 Dec 2023 16:49:58 +0000 Subject: [PATCH 044/151] Addressing Seshu's feedback --- .../_data_masking/provider/kms/aws_encryption_sdk.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py index ca56e018769..cd7ccfe7eaf 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py @@ -170,7 +170,7 @@ def decrypt(self, data: str, **provider_options) -> Any: ciphertext_decoded = base64.b64decode(data) except Error: raise DataMaskingDecryptValueError( - "Data decryption failed. Please ensure that you are using a field that was previously encrypted.", + "Data decryption failed. Please ensure that you are attempting to decrypt data that was previously encrypted.", # noqa E501 ) expected_context = provider_options.pop("encryption_context", {}) @@ -187,7 +187,7 @@ def decrypt(self, data: str, **provider_options) -> Any: ) except (TypeError, NotSupportedError): raise DataMaskingDecryptValueError( - "Data decryption failed. Please ensure that you are using a field that was previously encrypted.", + "Data decryption failed. Please ensure that you are attempting to decrypt data that was previously encrypted.", # noqa E501 ) for key, value in expected_context.items(): From f41026b9150ba61298e1163d5ae6b4a1ce91ad96 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Thu, 14 Dec 2023 19:12:04 +0000 Subject: [PATCH 045/151] Improving examples --- docs/utilities/data_masking.md | 12 ++++++------ examples/data_masking/sam/template.yaml | 17 +++++++++++++---- .../src/getting_started_mask_data.py | 16 ++++++++++++---- 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 62085daab3e..ad0c78895f5 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -60,7 +60,7 @@ Before you start, you need to create a KMS key to encrypt and decrypt your data ### Required resources === "AWS Serverless Application Model (SAM) example" - ```yaml hl_lines="16 30 54-55" + ```yaml hl_lines="16 24 35 59-60 66-67" --8<-- "examples/data_masking/sam/template.yaml" ``` @@ -105,16 +105,16 @@ If `fields` is not provided, the entire data object will be masked (or encrypted You can mask data without having to install any encryption library. Masking data will result in the loss of its original type, and the masked data will always be represented as a string. -=== "input.json" - ```json - --8<-- "examples/data_masking/src/generic_data_input.json" - ``` - === "getting_started_mask_data.py" ```python hl_lines="1 6 10" --8<-- "examples/data_masking/src/getting_started_mask_data.py" ``` +=== "input.json" + ```json + --8<-- "examples/data_masking/src/generic_data_input.json" + ``` + === "output.json" ```json hl_lines="5 7 12" --8<-- "examples/data_masking/src/mask_data_output.json" diff --git a/examples/data_masking/sam/template.yaml b/examples/data_masking/sam/template.yaml index 449e40c324d..40e920b7064 100644 --- a/examples/data_masking/sam/template.yaml +++ b/examples/data_masking/sam/template.yaml @@ -21,7 +21,7 @@ Resources: # This function is mainly for documentation purposes. In prod, we recommend you split up the encrypt and decrypt # calls, so that one function can act as the encryption proxy via HTTP requests, data pipeline, etc. # while authorized personnel can call decrypt from scripts or a separate function. - DataMaskingFunctionExample: + DataMaskingEncryptFunctionExample: Type: AWS::Serverless::Function Properties: Handler: data_masking_function_example.lambda_handler @@ -31,8 +31,17 @@ Resources: # We recommend to allocate a minimum of 1024MB of memory to your Lambda function # when utilizing the DataMasking Utility. MemorySize: 1024 - Architectures: - - x86_64 + + # DataMaskingDecryptFunctionExample: + # Type: AWS::Serverless::Function + # Properties: + # Handler: data_masking_function_decrypt.lambda_handler + # CodeUri: ../src + # Description: Data Masking Function Example + # # Cryptographic operations demand more memory usage. + # # We recommend to allocate a minimum of 1024MB of memory to your Lambda function + # # when utilizing the DataMasking Utility. + # MemorySize: 1024 # KMS KEY DataMaskingKMSKey: @@ -55,7 +64,7 @@ Resources: # For more details: https://docs.aws.amazon.com/kms/latest/developerguide/key-policies.html - Effect: Allow Principal: - AWS: !GetAtt DataMaskingFunctionExampleRole.Arn # Permission for the Lambda role + AWS: !GetAtt DataMaskingEncryptFunctionExampleRole.Arn # Permission for the Lambda role # These IAM permissions are necessary for the envelope encryption that AWS Encryption SDK uses. # Envelope encryption randomly generates a data key and encrypts that data key along with your data, # so we encrypt in-memory to prevent too many calls to KMS to reduce latency. diff --git a/examples/data_masking/src/getting_started_mask_data.py b/examples/data_masking/src/getting_started_mask_data.py index 1f5e34af03e..18a7e96f11d 100644 --- a/examples/data_masking/src/getting_started_mask_data.py +++ b/examples/data_masking/src/getting_started_mask_data.py @@ -1,10 +1,18 @@ +from typing import Dict + +from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities._data_masking import DataMasking +from aws_lambda_powertools.utilities.typing import LambdaContext + +logger = Logger() +data_masker = DataMasking() -def lambda_handler(event, context): +def lambda_handler(event: dict, context: LambdaContext) -> Dict: + data = event.get("body") - data_masker = DataMasking() + logger.info("Masking fields email, address.street, and company_address") - data = event["body"] + fields_masked = data_masker.mask(data=data, fields=["email", "address.street", "company_address"]) - data_masker.mask(data=data, fields=["email", "address.street", "company_address"]) + return {"fields_masked": fields_masked} From 9413a260fb14d347358c44a7dd7d2ec8edfe13af Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Thu, 14 Dec 2023 19:56:51 +0000 Subject: [PATCH 046/151] Improving examples --- docs/utilities/data_masking.md | 18 +++++++-------- .../src/getting_started_decrypt_data.py | 23 +++++++++++++++++++ .../src/getting_started_encrypt_data.py | 16 +++++++++---- .../src/getting_started_mask_data.py | 4 ++-- 4 files changed, 45 insertions(+), 16 deletions(-) create mode 100644 examples/data_masking/src/getting_started_decrypt_data.py diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index ad0c78895f5..12c6f250c64 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -106,7 +106,7 @@ If `fields` is not provided, the entire data object will be masked (or encrypted You can mask data without having to install any encryption library. Masking data will result in the loss of its original type, and the masked data will always be represented as a string. === "getting_started_mask_data.py" - ```python hl_lines="1 6 10" + ```python hl_lines="4 8 16" --8<-- "examples/data_masking/src/getting_started_mask_data.py" ``` @@ -122,20 +122,20 @@ You can mask data without having to install any encryption library. Masking data ### Encryting data -In order to encrypt data, you must use either our out-of-the-box integration with the AWS Encryption SDK, or install another encryption provider of your own. Encrypting data will temporarily result in the loss of the data's original type, as the encrypted data will be represented as a string while it is in ciphertext form. After decryption, the data will regain its original type. +To encrypt data, utilize our built-in integration with the AWS Encryption SDK. Encrypting data will temporarily result in the loss of the original data type, as it transforms into a [ciphertext](https://en.wikipedia.org/wiki/Ciphertext){target="_blank" rel="nofollow"} string. -You can still use the masking feature while using any encryption provider. - -=== "input.json" - ```json - --8<-- "examples/data_masking/src/generic_data_input.json" - ``` +To encrypt your data, you'll need a valid and symmetric [AWS KMS](https://docs.aws.amazon.com/kms/latest/developerguide/data-protection.html){target="_blank"} key. === "getting_started_encrypt_data.py" - ```python hl_lines="3-4 12-13 15" + ```python hl_lines="5-6 12-13 22" --8<-- "examples/data_masking/src/getting_started_encrypt_data.py" ``` +=== "input.json" + ```json hl_lines="7-9 14" + --8<-- "examples/data_masking/src/generic_data_input.json" + ``` + === "output.json" ```json hl_lines="5-7 12" --8<-- "examples/data_masking/src/encrypt_data_output.json" diff --git a/examples/data_masking/src/getting_started_decrypt_data.py b/examples/data_masking/src/getting_started_decrypt_data.py new file mode 100644 index 00000000000..0b83c41ace1 --- /dev/null +++ b/examples/data_masking/src/getting_started_decrypt_data.py @@ -0,0 +1,23 @@ +import os +from typing import Dict + +from aws_lambda_powertools import Logger +from aws_lambda_powertools.utilities._data_masking import DataMasking +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +from aws_lambda_powertools.utilities.typing import LambdaContext + +logger = Logger() + +KMS_KEY_ARN = os.getenv("KMS_KEY_ARN") +encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) +data_masker = DataMasking(provider=encryption_provider) + + +def lambda_handler(event: Dict, context: LambdaContext) -> Dict: + data = event.get("body") + + logger.info("Encrypting fields email, address.street, and company_address") + + encrypted = data_masker.encrypt(data=data, fields=["email", "address.street", "company_address"]) + + return {"payload_encrypted": encrypted} diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py index e774bd8e4a3..0b83c41ace1 100644 --- a/examples/data_masking/src/getting_started_encrypt_data.py +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -1,17 +1,23 @@ import os +from typing import Dict +from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities._data_masking import DataMasking from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +from aws_lambda_powertools.utilities.typing import LambdaContext + +logger = Logger() KMS_KEY_ARN = os.getenv("KMS_KEY_ARN") +encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) +data_masker = DataMasking(provider=encryption_provider) -def lambda_handler(event, context): - data = event["body"] +def lambda_handler(event: Dict, context: LambdaContext) -> Dict: + data = event.get("body") - encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) - data_masker = DataMasking(provider=encryption_provider) + logger.info("Encrypting fields email, address.street, and company_address") encrypted = data_masker.encrypt(data=data, fields=["email", "address.street", "company_address"]) - data_masker.decrypt(data=encrypted, fields=["email", "address.street", "company_address"]) + return {"payload_encrypted": encrypted} diff --git a/examples/data_masking/src/getting_started_mask_data.py b/examples/data_masking/src/getting_started_mask_data.py index 18a7e96f11d..10b16961ae9 100644 --- a/examples/data_masking/src/getting_started_mask_data.py +++ b/examples/data_masking/src/getting_started_mask_data.py @@ -13,6 +13,6 @@ def lambda_handler(event: dict, context: LambdaContext) -> Dict: logger.info("Masking fields email, address.street, and company_address") - fields_masked = data_masker.mask(data=data, fields=["email", "address.street", "company_address"]) + masked = data_masker.mask(data=data, fields=["email", "address.street", "company_address"]) - return {"fields_masked": fields_masked} + return {"payload_masked": masked} From cfae26719846d9fdeb446781accc9d179de6915c Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Thu, 14 Dec 2023 21:04:35 +0100 Subject: [PATCH 047/151] docs: improve encrypt ops sequence diagram Signed-off-by: heitorlessa --- docs/utilities/data_masking.md | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 12c6f250c64..90559303d4a 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -209,9 +209,9 @@ sequenceDiagram #### Encrypt operation with Encryption SDK (KMS) -We call KMS to generate an unique data key once. It allows us to encrypt this key in-memory, and use it for multiple operations to improve performance and prevent throttling. +We call KMS to generate an unique data key that can be used for multiple `encrypt` operation in-memory. It improves performance, cost and prevent throttling. -> This is known as [envelope encryption](https://docs.aws.amazon.com/kms/latest/developerguide/concepts.html#enveloping){target="_blank"}. +To make this operation simpler to visualize, we keep caching details in a [separate sequence diagram](#encrypt-operation-with-caching-in-encryption-sdk). Caching is enabled by default.
```mermaid @@ -222,16 +222,25 @@ sequenceDiagram participant DataMasking as Data Masking participant EncryptionProvider as Encryption Provider Client->>Lambda: Invoke (event) + Lambda->>DataMasking: Init Encryption Provider with master key + Note over Lambda,DataMasking: AwsEncryptionSdkProvider([KMS_KEY]) Lambda->>DataMasking: encrypt(data) - DataMasking->>EncryptionProvider: Request unique data key (kms:GenerateDataKey) - DataMasking->>DataMasking: Encrypt data key with wrapping key (in-memory) - DataMasking->>DataMasking: Encrypt data with newly encrypted key (in-memory) - DataMasking->>Lambda: ciphertext containing encrypted data + DataMasking->>EncryptionProvider: Create unique data key + Note over DataMasking,EncryptionProvider: KMS GenerateDataKey API + DataMasking->>DataMasking: DATA_KEY.encrypt(data) + DataMasking->>DataMasking: MASTER_KEY.encrypt(DATA_KEY) + DataMasking->>DataMasking: Create encrypted message + Note over DataMasking: Encrypted message includes encrypted data, data key encrypted, algorithm, and more. + DataMasking->>Lambda: Ciphertext from encrypted message Lambda-->>Client: Return response ``` Encrypting operation using envelope encryption.
+#### Encrypt operation with caching in Encryption SDK + +TODO + ## Testing your code For unit testing your applications, you can mock the calls to the data masking utility to avoid calling AWS APIs. This can be achieved in a number of ways - in this example, we use the pytest monkeypatch fixture to patch the `data_masking.decrypt` method. From 74bbd6011f6757624eedfe23e4ce74d59d2a7618 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Thu, 14 Dec 2023 21:47:06 +0100 Subject: [PATCH 048/151] docs: early caching msg before diagram --- docs/utilities/data_masking.md | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 90559303d4a..41e4709172e 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -211,7 +211,7 @@ sequenceDiagram We call KMS to generate an unique data key that can be used for multiple `encrypt` operation in-memory. It improves performance, cost and prevent throttling. -To make this operation simpler to visualize, we keep caching details in a [separate sequence diagram](#encrypt-operation-with-caching-in-encryption-sdk). Caching is enabled by default. +To make this operation simpler to visualize, we keep caching details in a [separate sequence diagram](#caching-encrypt-operations-with-encryption-sdk). Caching is enabled by default.
```mermaid @@ -237,9 +237,22 @@ sequenceDiagram Encrypting operation using envelope encryption.
-#### Encrypt operation with caching in Encryption SDK +#### Caching encrypt operations with Encryption SDK + +Without caching, every `encrypt()` operation would generate a new data key. It significantly increases latency and cost for ephemeral and short running environments like Lambda. + +With caching, we balance ephemeral Lambda environment performance characteristics with adjustable thresholds to meet your security needs. + +!!! info "Data key recycling" + We request a new data key when a cached data key exceeds any of the following security thresholds: + + 1. **Max age in seconds** + 2. **Max number of encrypted messages** + 3. **Max bytes encrypted** across all operations + + +> Diagram tbd -TODO ## Testing your code From db318cd5c6e0d6d5bc3bbec9ded9d457b6759bc3 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Thu, 14 Dec 2023 22:03:37 +0100 Subject: [PATCH 049/151] docs: add caching in encryption sdk ops --- docs/utilities/data_masking.md | 39 +++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 41e4709172e..bb8139215b6 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -227,6 +227,7 @@ sequenceDiagram Lambda->>DataMasking: encrypt(data) DataMasking->>EncryptionProvider: Create unique data key Note over DataMasking,EncryptionProvider: KMS GenerateDataKey API + DataMasking->>DataMasking: Cache new unique data key DataMasking->>DataMasking: DATA_KEY.encrypt(data) DataMasking->>DataMasking: MASTER_KEY.encrypt(DATA_KEY) DataMasking->>DataMasking: Create encrypted message @@ -250,9 +251,41 @@ With caching, we balance ephemeral Lambda environment performance characteristic 2. **Max number of encrypted messages** 3. **Max bytes encrypted** across all operations - -> Diagram tbd - +
+```mermaid +sequenceDiagram + autonumber + participant Client + participant Lambda + participant DataMasking as Data Masking + participant EncryptionProvider as Encryption Provider + Client->>Lambda: Invoke (event) + Lambda->>DataMasking: Init Encryption Provider with master key + Note over Lambda,DataMasking: AwsEncryptionSdkProvider([KMS_KEY]) + Lambda->>DataMasking: encrypt(data) + DataMasking->>EncryptionProvider: Create unique data key + Note over DataMasking,EncryptionProvider: KMS GenerateDataKey API + DataMasking->>DataMasking: Cache new unique data key + DataMasking->>DataMasking: DATA_KEY.encrypt(data) + DataMasking->>DataMasking: MASTER_KEY.encrypt(DATA_KEY) + DataMasking->>DataMasking: Create encrypted message + Note over DataMasking: Encrypted message includes encrypted data, data key encrypted, algorithm, and more. + DataMasking->>Lambda: Ciphertext from encrypted message + Lambda->>DataMasking: encrypt(another_data) + DataMasking->>DataMasking: Searches for data key in cache + alt Is Data key in cache? + DataMasking->>DataMasking: Reuses data key + else Is Data key evicted from cache? + DataMasking->>EncryptionProvider: Create unique data key + DataMasking->>DataMasking: MASTER_KEY.encrypt(DATA_KEY) + end + DataMasking->>DataMasking: DATA_KEY.encrypt(data) + DataMasking->>DataMasking: Create encrypted message + DataMasking->>Lambda: Ciphertext from encrypted message + Lambda-->>Client: Return response +``` +Caching data keys during encrypt operation. +
## Testing your code From fe184c4024880024a237a3e19240a6a420144968 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Thu, 14 Dec 2023 14:24:44 -0800 Subject: [PATCH 050/151] Added max_bytes_encrypted to CMM --- .../utilities/_data_masking/constants.py | 8 ++++++-- .../_data_masking/provider/kms/aws_encryption_sdk.py | 5 +++++ docs/utilities/data_masking.md | 3 ++- examples/data_masking/src/getting_started_decrypt_data.py | 2 +- examples/data_masking/src/getting_started_encrypt_data.py | 2 +- 5 files changed, 15 insertions(+), 5 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/constants.py b/aws_lambda_powertools/utilities/_data_masking/constants.py index 47e74f472cf..b44c7a60857 100644 --- a/aws_lambda_powertools/utilities/_data_masking/constants.py +++ b/aws_lambda_powertools/utilities/_data_masking/constants.py @@ -1,5 +1,9 @@ DATA_MASKING_STRING: str = "*****" +# The maximum number of entries that can be retained in the local cryptographic materials cache CACHE_CAPACITY: int = 100 +# The maximum time (in seconds) that a cache entry may be kept in the cache MAX_CACHE_AGE_SECONDS: float = 300.0 -MAX_MESSAGES_ENCRYPTED: int = 200 -# NOTE: You can also set max messages/bytes per data key +# Maximum number of messages which are allowed to be encrypted under a single cached data key +MAX_MESSAGES_ENCRYPTED: int = 4294967296 # 2 ** 32 +# Maximum number of bytes which are allowed to be encrypted under a single cached data key +MAX_BYTES_ENCRYPTED: int = 9223372036854775807 # 2 ** 63 - 1 diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py index cd7ccfe7eaf..ecd1745f74d 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py @@ -16,6 +16,7 @@ from aws_lambda_powertools.shared.user_agent import register_feature_to_botocore_session from aws_lambda_powertools.utilities._data_masking.constants import ( CACHE_CAPACITY, + MAX_BYTES_ENCRYPTED, MAX_CACHE_AGE_SECONDS, MAX_MESSAGES_ENCRYPTED, ) @@ -67,6 +68,7 @@ def __init__( local_cache_capacity: int = CACHE_CAPACITY, max_cache_age_seconds: float = MAX_CACHE_AGE_SECONDS, max_messages_encrypted: int = MAX_MESSAGES_ENCRYPTED, + max_bytes_encrypted: int = MAX_BYTES_ENCRYPTED, json_serializer: Callable | None = None, json_deserializer: Callable | None = None, ): @@ -77,6 +79,7 @@ def __init__( local_cache_capacity=local_cache_capacity, max_cache_age_seconds=max_cache_age_seconds, max_messages_encrypted=max_messages_encrypted, + max_bytes_encrypted=max_bytes_encrypted, json_serializer=self.json_serializer, json_deserializer=self.json_deserializer, ) @@ -103,6 +106,7 @@ def __init__( local_cache_capacity: int = CACHE_CAPACITY, max_cache_age_seconds: float = MAX_CACHE_AGE_SECONDS, max_messages_encrypted: int = MAX_MESSAGES_ENCRYPTED, + max_bytes_encrypted: int = MAX_BYTES_ENCRYPTED, ): session = botocore.session.Session() register_feature_to_botocore_session(session, "data-masking") @@ -118,6 +122,7 @@ def __init__( cache=self.cache, max_age=max_cache_age_seconds, max_messages_encrypted=max_messages_encrypted, + max_bytes_encrypted=max_bytes_encrypted, ) def encrypt(self, data: bytes | str | Dict | float, **provider_options) -> str: diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index bb8139215b6..26107e70f9b 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -174,7 +174,8 @@ You have the option to modify some of the configurations we have set as defaults | -------------------------- | -------- | ------- | --------------------------------------------------------------------------------------------- | | **local_cache_capacity** | | `100` | The maximum number of entries that can be retained in the local cryptographic materials cache | | **max_cache_age_seconds** | | `300` | The maximum time (in seconds) that a cache entry may be kept in the cache | -| **max_messages_encrypted** | | `200` | The maximum number of messages that may be encrypted under a cache entry | +| **max_messages_encrypted** | | `4294967296` | The maximum number of messages that may be encrypted under a cache entry | +| **max_bytes_encrypted** | | `9223372036854775807` | The maximum number of bytes that may be encrypted under a cache entry | For more information about the parameters for this provider, please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.materials_managers.caching.html#aws_encryption_sdk.materials_managers.caching.CachingCryptoMaterialsManager){target="_blank" rel="nofollow"}. diff --git a/examples/data_masking/src/getting_started_decrypt_data.py b/examples/data_masking/src/getting_started_decrypt_data.py index 0b83c41ace1..7c407392937 100644 --- a/examples/data_masking/src/getting_started_decrypt_data.py +++ b/examples/data_masking/src/getting_started_decrypt_data.py @@ -8,7 +8,7 @@ logger = Logger() -KMS_KEY_ARN = os.getenv("KMS_KEY_ARN") +KMS_KEY_ARN: str = os.getenv("KMS_KEY_ARN") encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) data_masker = DataMasking(provider=encryption_provider) diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py index 0b83c41ace1..7c407392937 100644 --- a/examples/data_masking/src/getting_started_encrypt_data.py +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -8,7 +8,7 @@ logger = Logger() -KMS_KEY_ARN = os.getenv("KMS_KEY_ARN") +KMS_KEY_ARN: str = os.getenv("KMS_KEY_ARN") encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) data_masker = DataMasking(provider=encryption_provider) From 65bf540b58f1d4671d861596fbc72c233d28a739 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Thu, 14 Dec 2023 14:50:04 -0800 Subject: [PATCH 051/151] Fix mypy errors --- examples/data_masking/src/getting_started_decrypt_data.py | 2 +- examples/data_masking/src/getting_started_encrypt_data.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/data_masking/src/getting_started_decrypt_data.py b/examples/data_masking/src/getting_started_decrypt_data.py index 7c407392937..d31cdb26a8a 100644 --- a/examples/data_masking/src/getting_started_decrypt_data.py +++ b/examples/data_masking/src/getting_started_decrypt_data.py @@ -8,7 +8,7 @@ logger = Logger() -KMS_KEY_ARN: str = os.getenv("KMS_KEY_ARN") +KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) data_masker = DataMasking(provider=encryption_provider) diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py index 7c407392937..d31cdb26a8a 100644 --- a/examples/data_masking/src/getting_started_encrypt_data.py +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -8,7 +8,7 @@ logger = Logger() -KMS_KEY_ARN: str = os.getenv("KMS_KEY_ARN") +KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) data_masker = DataMasking(provider=encryption_provider) From aacf0db81d03e026f701ab44500cd9cf3190494a Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Thu, 14 Dec 2023 23:33:31 +0000 Subject: [PATCH 052/151] Adding logging + data type + variable names --- .../utilities/_data_masking/base.py | 57 +++++++++++-------- .../provider/kms/aws_encryption_sdk.py | 3 + .../src/getting_started_decrypt_data.py | 2 +- .../src/getting_started_encrypt_data.py | 2 +- 4 files changed, 37 insertions(+), 27 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index fc6f827458d..98dc792e936 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -1,9 +1,14 @@ +from __future__ import annotations + import json -from typing import Callable, Iterable, Optional, Union +import logging +from typing import Any, Callable, Iterable, Optional, Union from aws_lambda_powertools.utilities._data_masking.exceptions import DataMaskingUnsupportedTypeError from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider +logger = logging.getLogger(__name__) + class DataMasking: """ @@ -45,23 +50,23 @@ def lambda_handler(event, context): def __init__(self, provider: Optional[BaseProvider] = None): self.provider = provider or BaseProvider() - def encrypt(self, data, fields=None, **provider_options): + def encrypt(self, data, fields=None, **provider_options) -> str: return self._apply_action(data, fields, self.provider.encrypt, **provider_options) - def decrypt(self, data, fields=None, **provider_options): + def decrypt(self, data, fields=None, **provider_options) -> Any: return self._apply_action(data, fields, self.provider.decrypt, **provider_options) def mask(self, data, fields=None, **provider_options) -> Union[str, Iterable]: return self._apply_action(data, fields, self.provider.mask, **provider_options) - def _apply_action(self, data, fields, action: Callable, **provider_options): + def _apply_action(self, data: str | dict, fields, action: Callable, **provider_options): """ Helper method to determine whether to apply a given action to the entire input data or to specific fields if the 'fields' argument is specified. Parameters ---------- - data : any + data : str | dict The input data to process. fields : Optional[List[any]] = None A list of fields to apply the action to. If 'None', the action is applied to the entire 'data'. @@ -76,8 +81,10 @@ def _apply_action(self, data, fields, action: Callable, **provider_options): """ if fields is not None: + logger.debug(f"Running action {action.__name__} with fields {fields}") return self._apply_action_to_fields(data, fields, action, **provider_options) else: + logger.debug(f"Running action {action.__name__} with the entire data") return action(data, **provider_options) def _apply_action_to_fields( @@ -130,46 +137,46 @@ def _apply_action_to_fields( ``` """ + data_parsed = {} + if fields is None: raise ValueError("No fields specified.") if isinstance(data, str): # Parse JSON string as dictionary - my_dict_parsed = json.loads(data) + data_parsed = json.loads(data) elif isinstance(data, dict): - # In case their data has keys that are not strings (i.e. ints), convert it all into a JSON string - my_dict_parsed = json.dumps(data) - # Turn back into dict so can parse it - my_dict_parsed = json.loads(my_dict_parsed) + # Convert the data to a JSON string in case it contains non-string keys (e.g., ints) + # Parse the JSON string back into a dictionary + data_parsed = json.loads(json.dumps(data)) else: raise DataMaskingUnsupportedTypeError( f"Unsupported data type. Expected a traversable type (dict or str), but got {type(data)}.", ) - # For example: ['a.b.c'] in ['a.b.c', 'a.x.y'] - for nested_key in fields: + for nested_field in fields: # Prevent overriding loop variable - curr_nested_key = nested_key + current_nested_field = nested_field - # If the nested_key is not a string, convert it to a string representation - if not isinstance(curr_nested_key, str): - curr_nested_key = json.dumps(curr_nested_key) + # Ensure the nested field is represented as a string + if not isinstance(current_nested_field, str): + current_nested_field = json.dumps(current_nested_field) - # Split the nested key string into a list of nested keys + # Split the nested field string into a list of nested keys # ['a.b.c'] -> ['a', 'b', 'c'] - keys = curr_nested_key.split(".") + nested_keys = current_nested_field.split(".") - # Initialize a current dictionary to the root dictionary - curr_dict = my_dict_parsed + # Initialize the current dictionary to the root dictionary + current_dict = data_parsed # Traverse the dictionary hierarchy by iterating through the list of nested keys - for key in keys[:-1]: - curr_dict = curr_dict[key] + for key in nested_keys[:-1]: + current_dict = current_dict[key] # Retrieve the final value of the nested field - valtochange = curr_dict[(keys[-1])] + target_value = current_dict[nested_keys[-1]] # Apply the specified 'action' to the target value - curr_dict[keys[-1]] = action(valtochange, **provider_options) + current_dict[nested_keys[-1]] = action(target_value, **provider_options) - return my_dict_parsed + return data_parsed diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py index ecd1745f74d..f7e26a3252b 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py @@ -1,6 +1,7 @@ from __future__ import annotations import base64 +import logging from binascii import Error from typing import Any, Callable, Dict, List @@ -28,6 +29,8 @@ ) from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider +logger = logging.getLogger(__name__) + class AwsEncryptionSdkProvider(BaseProvider): """ diff --git a/examples/data_masking/src/getting_started_decrypt_data.py b/examples/data_masking/src/getting_started_decrypt_data.py index d31cdb26a8a..09f0524cb56 100644 --- a/examples/data_masking/src/getting_started_decrypt_data.py +++ b/examples/data_masking/src/getting_started_decrypt_data.py @@ -8,7 +8,7 @@ logger = Logger() -KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") +KMS_KEY_ARN: str = os.getenv("KMS_KEY_ARN", "") encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) data_masker = DataMasking(provider=encryption_provider) diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py index d31cdb26a8a..09f0524cb56 100644 --- a/examples/data_masking/src/getting_started_encrypt_data.py +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -8,7 +8,7 @@ logger = Logger() -KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") +KMS_KEY_ARN: str = os.getenv("KMS_KEY_ARN", "") encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) data_masker = DataMasking(provider=encryption_provider) From 6970cbfa8758f2dad18aea7f19ff56e88db3bc7c Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Thu, 14 Dec 2023 16:35:20 -0800 Subject: [PATCH 053/151] Added docstrings to baseprovider --- .../utilities/_data_masking/base.py | 2 +- .../utilities/_data_masking/exceptions.py | 2 +- .../utilities/_data_masking/provider/base.py | 69 ++++++++++++++++++- .../provider/kms/aws_encryption_sdk.py | 8 +-- 4 files changed, 73 insertions(+), 8 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index 98dc792e936..4f3430af763 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -37,7 +37,7 @@ def lambda_handler(event, context): data = { "project": "powertools", - "sensitive": "xxxxxxxxxx" + "sensitive": "password" } masked = masker.mask(data,fields=["sensitive"]) diff --git a/aws_lambda_powertools/utilities/_data_masking/exceptions.py b/aws_lambda_powertools/utilities/_data_masking/exceptions.py index b43b4b37554..45f98315df5 100644 --- a/aws_lambda_powertools/utilities/_data_masking/exceptions.py +++ b/aws_lambda_powertools/utilities/_data_masking/exceptions.py @@ -24,5 +24,5 @@ class DataMaskingDecryptValueError(Exception): class DataMaskingContextMismatchError(Exception): """ - Decrypting an invalid field. + Decrypting with the incorrect encryption context. """ diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/base.py b/aws_lambda_powertools/utilities/_data_masking/provider/base.py index f1488fe542d..417bdbb6540 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/base.py @@ -6,8 +6,63 @@ class BaseProvider: """ - When you try to create an instance of a subclass that does not implement the encrypt method, - you will get a NotImplementedError with a message that says the method is not implemented: + The BaseProvider class serves as an abstract base class for data masking providers. + + Attributes + ---------- + json_serializer : Callable + A callable function responsible for JSON serialization. + json_deserializer : Callable + A callable function responsible for JSON deserialization. + + Methods + ------- + default_json_serializer(data) + Default method for JSON serialization. + default_json_deserializer(data) + Default method for JSON deserialization. + encrypt(data) + Abstract method for encrypting data. Subclasses must implement this method. + decrypt(data) + Abstract method for decrypting data. Subclasses must implement this method. + mask(data) + Default method for masking data. + + Examples + -------- + ``` + from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider + from aws_lambda_powertools.utilities.data_masking import DataMasking + + class MyCustomProvider(BaseProvider): + def encrypt(self, data) -> str: + # Implementation logic for data encryption + + def decrypt(self, data) -> Any: + # Implementation logic for data decryption + + def mask(self, data) -> Union[str, Iterable]: + # Implementation logic for data masking + pass + + def lambda_handler(event, context): + provider = MyCustomProvider(["secret-key"]) + data_masker = DataMasking(provider=provider) + + data = { + "project": "powertools", + "sensitive": "password" + } + + encrypted = data_masker.encrypt(data, fields=["sensitive"]) + + return encrypted + ``` + + Raises + ------- + NotImplementedError + If `encrypt()` or `decrypt()` methods are not implemented. """ def __init__(self, json_serializer=None, json_deserializer=None) -> None: @@ -27,6 +82,16 @@ def decrypt(self, data) -> Any: raise NotImplementedError("Subclasses must implement decrypt()") def mask(self, data) -> Union[str, Iterable]: + """ + This method irreversibly masks data. + + If the data to be masked is of type `str`, `dict`, or `bytes`, + this method will return a masked string, i.e. "*****". + + If the data to be masked is of an iterable type like `list`, `tuple`, + or `set`, this method will return a new object of the same type as the + input data but with each element replaced by the string "*****". + """ if isinstance(data, (str, dict, bytes)): return DATA_MASKING_STRING elif isinstance(data, (list, tuple, set)): diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py index f7e26a3252b..ac535131e68 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py @@ -50,16 +50,16 @@ class AwsEncryptionSdkProvider(BaseProvider): def lambda_handler(event, context): provider = AwsEncryptionSdkProvider(["arn:aws:kms:us-east-1:0123456789012:key/key-id"]) - masker = DataMasking(provider=provider) + data_masker = DataMasking(provider=provider) data = { "project": "powertools", - "sensitive": "xxxxxxxxxx" + "sensitive": "password" } - masked = masker.encrypt(data,fields=["sensitive"]) + encrypted = data_masker.encrypt(data, fields=["sensitive"]) - return masked + return encrypted ``` """ From 20f1315ef20b0b0daaf4138da32a17369f81f2fa Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Thu, 14 Dec 2023 23:43:53 -0800 Subject: [PATCH 054/151] Explain fields syntax more --- docs/utilities/data_masking.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 26107e70f9b..1284bd42768 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -91,7 +91,10 @@ When using AWS Encryption SDK with AWS KMS keys for data encryption and decrypti #### JSON -When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values to the `fields` parameter. You can select values of nested keys by using dot notation. The `fields` parameter only supports selecting values using basic dot notation and does not provide support for wildcards or any other matching expressions. If a `fields` parameter is provided, then the rest of the dictionary or JSON string will remain unchanged, and only the values corresponding to the keys given will be masked (or encrypted/decrypted). +When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values to the `fields` parameter. You can select values of nested keys by using dot notation. The `fields` parameter only supports selecting values using basic dot notation and does not provide support for wildcards or any other matching expressions. + +If a `fields` parameter is provided along with a dictionary as the input data, then the rest of the dictionary will remain unchanged, and only the values corresponding to the keys given will be masked (or encrypted/decrypted). If a `fields` parameter is provided while the input data is a JSON string, the returned data structure will be a Python dictionary. The values corresponding to the keys given in the `fields` parameter will be accordingly obfuscated, and the content of everything else in the returned object will remain the same as the input data. + If `fields` is not provided, the entire data object will be masked (or encrypted/decrypted). From 007231aba79785e4ff2c1ea298c7c298b094396e Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Fri, 15 Dec 2023 14:46:19 -0800 Subject: [PATCH 055/151] Clarify fields param --- docs/utilities/data_masking.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 1284bd42768..421434e68c8 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -93,7 +93,9 @@ When using AWS Encryption SDK with AWS KMS keys for data encryption and decrypti When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values to the `fields` parameter. You can select values of nested keys by using dot notation. The `fields` parameter only supports selecting values using basic dot notation and does not provide support for wildcards or any other matching expressions. -If a `fields` parameter is provided along with a dictionary as the input data, then the rest of the dictionary will remain unchanged, and only the values corresponding to the keys given will be masked (or encrypted/decrypted). If a `fields` parameter is provided while the input data is a JSON string, the returned data structure will be a Python dictionary. The values corresponding to the keys given in the `fields` parameter will be accordingly obfuscated, and the content of everything else in the returned object will remain the same as the input data. +If a `fields` parameter is provided along with a dictionary as the input data, then the rest of content of the dictionary will remain unchanged, and only the values corresponding to the keys given will be masked (or encrypted/decrypted). However, if there were any non-string keys in the original dictionary, they will be transformed into strings while perserving their original content. + +If a `fields` parameter is provided while the input data is a JSON string, the returned data structure will be a Python dictionary. The values corresponding to the keys given in the `fields` parameter will be accordingly obfuscated, and the content of everything else in the returned object will remain the same as the input data. From b395f12094d1b797c426a49b4534e24e44ef2b58 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 08:58:36 +0100 Subject: [PATCH 056/151] docs: add decrypt operation diag --- docs/utilities/data_masking.md | 44 ++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 421434e68c8..36892265701 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -91,7 +91,7 @@ When using AWS Encryption SDK with AWS KMS keys for data encryption and decrypti #### JSON -When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values to the `fields` parameter. You can select values of nested keys by using dot notation. The `fields` parameter only supports selecting values using basic dot notation and does not provide support for wildcards or any other matching expressions. +When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values to the `fields` parameter. You can select values of nested keys by using dot notation. The `fields` parameter only supports selecting values using basic dot notation and does not provide support for wildcards or any other matching expressions. If a `fields` parameter is provided along with a dictionary as the input data, then the rest of content of the dictionary will remain unchanged, and only the values corresponding to the keys given will be masked (or encrypted/decrypted). However, if there were any non-string keys in the original dictionary, they will be transformed into strings while perserving their original content. @@ -175,12 +175,12 @@ Decrypting a ciphertext string will transform the data to its original type. You have the option to modify some of the configurations we have set as defaults when connecting to the AWS Encryption SDK. You can find and modify the following values when initializing the `AwsEncryptionSdkProvider`. -| Parameter | Required | Default | Description | -| -------------------------- | -------- | ------- | --------------------------------------------------------------------------------------------- | -| **local_cache_capacity** | | `100` | The maximum number of entries that can be retained in the local cryptographic materials cache | -| **max_cache_age_seconds** | | `300` | The maximum time (in seconds) that a cache entry may be kept in the cache | -| **max_messages_encrypted** | | `4294967296` | The maximum number of messages that may be encrypted under a cache entry | -| **max_bytes_encrypted** | | `9223372036854775807` | The maximum number of bytes that may be encrypted under a cache entry | +| Parameter | Required | Default | Description | +| -------------------------- | -------- | --------------------- | --------------------------------------------------------------------------------------------- | +| **local_cache_capacity** | | `100` | The maximum number of entries that can be retained in the local cryptographic materials cache | +| **max_cache_age_seconds** | | `300` | The maximum time (in seconds) that a cache entry may be kept in the cache | +| **max_messages_encrypted** | | `4294967296` | The maximum number of messages that may be encrypted under a cache entry | +| **max_bytes_encrypted** | | `9223372036854775807` | The maximum number of bytes that may be encrypted under a cache entry | For more information about the parameters for this provider, please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.materials_managers.caching.html#aws_encryption_sdk.materials_managers.caching.CachingCryptoMaterialsManager){target="_blank" rel="nofollow"}. @@ -244,6 +244,36 @@ sequenceDiagram Encrypting operation using envelope encryption. +#### Decrypt operation with Encryption SDK (KMS) + +We call KMS to decrypt the encrypted data key available in the encrypted message. If successful, we run authentication _(context)_ and integrity checks (_algorithm, data key length, etc_) to confirm its proceedings. + +Lastly, we decrypt the original encrypted data, throw away the decrypted data key for security reasons, and return the original plaintext data. + +
+```mermaid +sequenceDiagram + autonumber + participant Client + participant Lambda + participant DataMasking as Data Masking + participant EncryptionProvider as Encryption Provider + Client->>Lambda: Invoke (event) + Lambda->>DataMasking: Init Encryption Provider with master key + Note over Lambda,DataMasking: AwsEncryptionSdkProvider([KMS_KEY]) + Lambda->>DataMasking: decrypt(data) + DataMasking->>EncryptionProvider: Decrypt encrypted data key + Note over DataMasking,EncryptionProvider: KMS Decrypt API + DataMasking->>DataMasking: Authentication and integrity checks + DataMasking->>DataMasking: DATA_KEY.decrypt(data) + DataMasking->>DataMasking: MASTER_KEY.encrypt(DATA_KEY) + DataMasking->>DataMasking: Discards decrypted data key + DataMasking->>Lambda: Plaintext + Lambda-->>Client: Return response +``` +Decrypting operation using envelope encryption. +
+ #### Caching encrypt operations with Encryption SDK Without caching, every `encrypt()` operation would generate a new data key. It significantly increases latency and cost for ephemeral and short running environments like Lambda. From 13a1f5d45786b38fe71e161659b4b770b1086ce6 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 10:35:15 +0100 Subject: [PATCH 057/151] docs: add encryption ctx, envelope encryption terminologies --- docs/utilities/data_masking.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 36892265701..c47a754715c 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -49,6 +49,19 @@ stateDiagram-v2 **Decrypting** transforms ciphertext back into plaintext using a decryption algorithm and the correct decryption key. +**Encryption context** is a non-secret `key:value` data used for authentication like `tenant_id:`. This adds extra security and confirms data decryption is related to a given context. + +**Envelope encryption** uses two different keys to encrypt data safely: master and data key. The data key encrypts the plaintext, and the master key encrypts the data key. It simplifies key management (_you own the master key_), isolates compromises to data key, and scales faster with large data volumes. + +
+```mermaid +graph LR + M(Master key) --> |Encrypts| D(Data key) + D(Data key) --> |Encrypts| S(Sensitive data) +``` +Envelope encryption visualized. +
+ ## Getting started ### Install From 0d8c53097411e43518c0066420d89c4002dda302 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 10:42:54 +0100 Subject: [PATCH 058/151] docs: line editing terminology --- docs/utilities/data_masking.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index c47a754715c..fd2ae137986 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -43,15 +43,15 @@ stateDiagram-v2 ## Terminology -**Masking** replaces sensitive information **irreversibly** with a non-sensitive placeholder _(`*****`)_. Data is replaced in-memory hence why being irreversible. +**Masking** replaces sensitive information **irreversibly** with a non-sensitive placeholder _(`*****`)_. It replaces data in-memory, hence why being irreversible. -**Encrypting** transforms plaintext into ciphertext using an encryption algorithm and a cryptographic key. This allows you to encrypt any PII (personally identifiable information) to ensure only authorized personnel can decrypt it. +**Encrypting** transforms plaintext into ciphertext using an encryption algorithm and a cryptographic key. It allows you to encrypt any sensitive data, so only allowed personnel to decrypt it. **Decrypting** transforms ciphertext back into plaintext using a decryption algorithm and the correct decryption key. -**Encryption context** is a non-secret `key:value` data used for authentication like `tenant_id:`. This adds extra security and confirms data decryption is related to a given context. +**Encryption context** is a non-secret `key:value` data used for authentication like `tenant_id:`. This adds extra security and confirms encrypted data relationship with a context. -**Envelope encryption** uses two different keys to encrypt data safely: master and data key. The data key encrypts the plaintext, and the master key encrypts the data key. It simplifies key management (_you own the master key_), isolates compromises to data key, and scales faster with large data volumes. +**Envelope encryption** uses two different keys to encrypt data safely: master and data key. The data key encrypts the plaintext, and the master key encrypts the data key. It simplifies key management _(you own the master key)_, isolates compromises to data key, and scales better with large data volumes.
```mermaid From 352ed1e14901134007419d1fb6a0d29ec6df275c Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 11:26:50 +0100 Subject: [PATCH 059/151] docs: correct getting started, install sections --- docs/utilities/data_masking.md | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index fd2ae137986..05e2e462b42 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -64,14 +64,25 @@ graph LR ## Getting started +???+ tip + All examples shared in this documentation are available within the [project repository](https://github.com/aws-powertools/powertools-lambda-python/tree/develop/examples){target="_blank"}. + ### Install -Before you start, you need to create a KMS key to encrypt and decrypt your data - your Lambda function will need read and write access to it. +!!! note "This is not necessary if you're installing Powertools for AWS Lambda (Python) via [Lambda Layer/SAR](../index.md#lambda-layer){target="_blank"}" + +Add `aws-lambda-powertools[datamasking-aws-sdk]` as a dependency in your preferred tool: _e.g._, _requirements.txt_, _pyproject.toml_. This will install the [AWS Encryption SDK](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/introduction.html){target="_blank"}. -!!! note "If your Lambda function only masks data without utilizing any encryption services, it requires no additional permissions or library to use this utility." + +AWS Encryption SDK contains non-Python dependencies. This means you should use [AWS SAM CLI](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/using-sam-cli-build.html#using-sam-cli-build-options-container){target="_blank"} or [official build container images](https://gallery.ecr.aws/search?searchTerm=sam%2Fbuild-python&popularRegistries=amazon){target="_blank"} when building your application for AWS Lambda. Local development should work as expected. + ### Required resources +!!! info "By default, we use Amazon Key Management Service (KMS) for encryption and decryption operations." + +Before you start, you will need a KMS key to encrypt and decrypt your data. Your Lambda function will need read and write access to it. + === "AWS Serverless Application Model (SAM) example" ```yaml hl_lines="16 24 35 59-60 66-67" --8<-- "examples/data_masking/sam/template.yaml" From 1dc130cd34f6425c9d05b6c0f1449eba3a761b27 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 11:36:03 +0100 Subject: [PATCH 060/151] docs: add note on min memory and separation of concerns upfront --- docs/utilities/data_masking.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 05e2e462b42..b0143d81e96 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -83,6 +83,8 @@ AWS Encryption SDK contains non-Python dependencies. This means you should use [ Before you start, you will need a KMS key to encrypt and decrypt your data. Your Lambda function will need read and write access to it. +**NOTE**. We recommend setting a minimum of 1024MB of memory _(CPU intensive)_, and separate Lambda functions for encrypt and decrypt. + === "AWS Serverless Application Model (SAM) example" ```yaml hl_lines="16 24 35 59-60 66-67" --8<-- "examples/data_masking/sam/template.yaml" From fd7fd71adcf7a80002ef213e2c2e5392ddce6caa Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 11:39:42 +0100 Subject: [PATCH 061/151] docs: use newer Powertools log level env var --- examples/data_masking/sam/template.yaml | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/examples/data_masking/sam/template.yaml b/examples/data_masking/sam/template.yaml index 40e920b7064..0c43e7d603d 100644 --- a/examples/data_masking/sam/template.yaml +++ b/examples/data_masking/sam/template.yaml @@ -1,4 +1,4 @@ -AWSTemplateFormatVersion: '2010-09-09' +AWSTemplateFormatVersion: "2010-09-09" Transform: AWS::Serverless-2016-10-31 Description: > Powertools for AWS Lambda (Python) data masking example @@ -9,12 +9,11 @@ Globals: # https://docs.aws.amazon.com/serverless-application-model/latest/devel Runtime: python3.11 Tracing: Active Environment: - Variables: - POWERTOOLS_SERVICE_NAME: PowertoolsHelloWorld - POWERTOOLS_METRICS_NAMESPACE: Powertools - LOG_LEVEL: INFO - KMS_KEY_ARN: !GetAtt DataMaskingKMSKey.Arn - + Variables: + POWERTOOLS_SERVICE_NAME: PowertoolsHelloWorld + POWERTOOLS_METRICS_NAMESPACE: Powertools + POWERTOOLS_LOG_LEVEL: INFO + KMS_KEY_ARN: !GetAtt DataMaskingKMSKey.Arn Resources: # Lambda Function @@ -45,11 +44,11 @@ Resources: # KMS KEY DataMaskingKMSKey: - Type: 'AWS::KMS::Key' + Type: "AWS::KMS::Key" Properties: Description: KMS Key for Lambda - DataMasking KeyPolicy: - Version: '2012-10-17' + Version: "2012-10-17" Id: key-default-1 Statement: # To ensure key management security, a KMS Key should have at least one administrator. @@ -57,14 +56,14 @@ Resources: # In a production environment, it is recommended to configure specific users or roles for enhanced security. - Effect: Allow Principal: - AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:root' - Action: 'kms:*' - Resource: '*' + AWS: !Sub "arn:aws:iam::${AWS::AccountId}:root" + Action: "kms:*" + Resource: "*" # KMS utilizes resource policies, allowing direct access grant to the Lambda Role on the KMS Key. # For more details: https://docs.aws.amazon.com/kms/latest/developerguide/key-policies.html - Effect: Allow Principal: - AWS: !GetAtt DataMaskingEncryptFunctionExampleRole.Arn # Permission for the Lambda role + AWS: !GetAtt DataMaskingEncryptFunctionExampleRole.Arn # Permission for the Lambda role # These IAM permissions are necessary for the envelope encryption that AWS Encryption SDK uses. # Envelope encryption randomly generates a data key and encrypts that data key along with your data, # so we encrypt in-memory to prevent too many calls to KMS to reduce latency. From 9b1bf22cc5a1dc4d25ce5c366305cf58fdb3b8fa Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 11:41:08 +0100 Subject: [PATCH 062/151] docs: add missing logger ctx, remove metrics --- examples/data_masking/sam/template.yaml | 1 - examples/data_masking/src/data_masking_function_example.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/data_masking/sam/template.yaml b/examples/data_masking/sam/template.yaml index 0c43e7d603d..edbb1e66ac3 100644 --- a/examples/data_masking/sam/template.yaml +++ b/examples/data_masking/sam/template.yaml @@ -11,7 +11,6 @@ Globals: # https://docs.aws.amazon.com/serverless-application-model/latest/devel Environment: Variables: POWERTOOLS_SERVICE_NAME: PowertoolsHelloWorld - POWERTOOLS_METRICS_NAMESPACE: Powertools POWERTOOLS_LOG_LEVEL: INFO KMS_KEY_ARN: !GetAtt DataMaskingKMSKey.Arn diff --git a/examples/data_masking/src/data_masking_function_example.py b/examples/data_masking/src/data_masking_function_example.py index 96ed6d17f71..abeccbca76c 100644 --- a/examples/data_masking/src/data_masking_function_example.py +++ b/examples/data_masking/src/data_masking_function_example.py @@ -12,6 +12,7 @@ @tracer.capture_lambda_handler +@logger.inject_lambda_context def lambda_handler(event: dict, context: LambdaContext) -> dict: logger.info("Hello world function - HTTP 200") From 7540dccf84c1ac50f50ae40cdeee702bf12fd8aa Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 11:59:09 +0100 Subject: [PATCH 063/151] docs: line editing comments --- examples/data_masking/sam/template.yaml | 39 ++++++++++++------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/examples/data_masking/sam/template.yaml b/examples/data_masking/sam/template.yaml index edbb1e66ac3..a3874f6e96c 100644 --- a/examples/data_masking/sam/template.yaml +++ b/examples/data_masking/sam/template.yaml @@ -12,22 +12,20 @@ Globals: # https://docs.aws.amazon.com/serverless-application-model/latest/devel Variables: POWERTOOLS_SERVICE_NAME: PowertoolsHelloWorld POWERTOOLS_LOG_LEVEL: INFO - KMS_KEY_ARN: !GetAtt DataMaskingKMSKey.Arn + KMS_KEY_ARN: !GetAtt DataMaskingMasterKey.Arn +# In production, we recommend you split up the encrypt and decrypt for fine-grained security. +# For example, one function can act as the encryption proxy via HTTP requests, data pipeline, etc., +# while only authorized personnel can call decrypt via a separate function. Resources: - # Lambda Function - # This function is mainly for documentation purposes. In prod, we recommend you split up the encrypt and decrypt - # calls, so that one function can act as the encryption proxy via HTTP requests, data pipeline, etc. - # while authorized personnel can call decrypt from scripts or a separate function. DataMaskingEncryptFunctionExample: Type: AWS::Serverless::Function Properties: Handler: data_masking_function_example.lambda_handler CodeUri: ../src - Description: Data Masking Function Example - # Cryptographic operations demand more memory usage. - # We recommend to allocate a minimum of 1024MB of memory to your Lambda function - # when utilizing the DataMasking Utility. + Description: Data Masking encryption function + # Cryptographic operations demand more CPU. CPU is proportionally allocated based on memory size. + # We recommend allocating a minimum of 1024MB of memory. MemorySize: 1024 # DataMaskingDecryptFunctionExample: @@ -35,39 +33,38 @@ Resources: # Properties: # Handler: data_masking_function_decrypt.lambda_handler # CodeUri: ../src - # Description: Data Masking Function Example - # # Cryptographic operations demand more memory usage. - # # We recommend to allocate a minimum of 1024MB of memory to your Lambda function - # # when utilizing the DataMasking Utility. + # Description: Data Masking decryption function + # Cryptographic operations demand more CPU. CPU is proportionally allocated based on memory size. + # We recommend allocating a minimum of 1024MB of memory. # MemorySize: 1024 # KMS KEY - DataMaskingKMSKey: + DataMaskingMasterKey: Type: "AWS::KMS::Key" Properties: - Description: KMS Key for Lambda - DataMasking + Description: KMS Key for encryption and decryption using Powertools for AWS Lambda Data masking feature KeyPolicy: Version: "2012-10-17" - Id: key-default-1 + Id: data-masking-enc-dec Statement: - # To ensure key management security, a KMS Key should have at least one administrator. + # For security reasons, ensure your KMS Key has at least one administrator. # In this example, the root account is granted administrator permissions. - # In a production environment, it is recommended to configure specific users or roles for enhanced security. + # However, in production we recommended configuring specific IAM Roles for enhanced security. - Effect: Allow Principal: AWS: !Sub "arn:aws:iam::${AWS::AccountId}:root" Action: "kms:*" Resource: "*" - # KMS utilizes resource policies, allowing direct access grant to the Lambda Role on the KMS Key. + # KMS supports IAM resource policies to grant Lambda's IAM Role access to the KMS Key. # For more details: https://docs.aws.amazon.com/kms/latest/developerguide/key-policies.html - Effect: Allow Principal: AWS: !GetAtt DataMaskingEncryptFunctionExampleRole.Arn # Permission for the Lambda role # These IAM permissions are necessary for the envelope encryption that AWS Encryption SDK uses. - # Envelope encryption randomly generates a data key and encrypts that data key along with your data, - # so we encrypt in-memory to prevent too many calls to KMS to reduce latency. # For more details: https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/concepts.html#envelope-encryption Action: + # We use `Decrypt` to decrypt encrypted data key - kms:Decrypt + # We use `GeneratedDataKey` to create an unique and random data key for encryption - kms:GenerateDataKey Resource: "*" From be60985a9f566f12febd5ed1822070ddf612d0f2 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 13:11:19 +0100 Subject: [PATCH 064/151] docs: add code annotation, further cleanup --- docs/utilities/data_masking.md | 9 +++++++-- examples/data_masking/sam/template.yaml | 23 +++++++++-------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index b0143d81e96..0fdc60a2f14 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -51,7 +51,9 @@ stateDiagram-v2 **Encryption context** is a non-secret `key:value` data used for authentication like `tenant_id:`. This adds extra security and confirms encrypted data relationship with a context. -**Envelope encryption** uses two different keys to encrypt data safely: master and data key. The data key encrypts the plaintext, and the master key encrypts the data key. It simplifies key management _(you own the master key)_, isolates compromises to data key, and scales better with large data volumes. + +**[Envelope encryption](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/concepts.html#envelope-encryption){target="_blank"}** uses two different keys to encrypt data safely: master and data key. The data key encrypts the plaintext, and the master key encrypts the data key. It simplifies key management _(you own the master key)_, isolates compromises to data key, and scales better with large data volumes. +
```mermaid @@ -86,10 +88,13 @@ Before you start, you will need a KMS key to encrypt and decrypt your data. Your **NOTE**. We recommend setting a minimum of 1024MB of memory _(CPU intensive)_, and separate Lambda functions for encrypt and decrypt. === "AWS Serverless Application Model (SAM) example" - ```yaml hl_lines="16 24 35 59-60 66-67" + ```yaml hl_lines="15 29 41 61 66-67" --8<-- "examples/data_masking/sam/template.yaml" ``` + 1. [Key policy examples using IAM Roles](https://docs.aws.amazon.com/kms/latest/developerguide/key-policy-default.html#key-policy-default-allow-administrators){target="_blank"} + 2. [SAM generated CloudFormation Resources](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-specification-generated-resources-function.html#sam-specification-generated-resources-function-not-role){target="_blank"} + #### Using AWS Encryption SDK To use the AWS Encryption SDK, your Lambda function IAM Role must have the `kms:Decrypt` and `kms:GenerateDataKey` IAM permissions. diff --git a/examples/data_masking/sam/template.yaml b/examples/data_masking/sam/template.yaml index a3874f6e96c..96410cc1425 100644 --- a/examples/data_masking/sam/template.yaml +++ b/examples/data_masking/sam/template.yaml @@ -34,37 +34,32 @@ Resources: # Handler: data_masking_function_decrypt.lambda_handler # CodeUri: ../src # Description: Data Masking decryption function - # Cryptographic operations demand more CPU. CPU is proportionally allocated based on memory size. - # We recommend allocating a minimum of 1024MB of memory. # MemorySize: 1024 - # KMS KEY + # KMS Key DataMaskingMasterKey: Type: "AWS::KMS::Key" Properties: Description: KMS Key for encryption and decryption using Powertools for AWS Lambda Data masking feature + # KMS Key support both IAM Resource Policies and Key Policies + # For more details: https://docs.aws.amazon.com/kms/latest/developerguide/key-policies.html KeyPolicy: Version: "2012-10-17" Id: data-masking-enc-dec Statement: # For security reasons, ensure your KMS Key has at least one administrator. # In this example, the root account is granted administrator permissions. - # However, in production we recommended configuring specific IAM Roles for enhanced security. + # However, we recommended configuring specific IAM Roles for enhanced security in production. - Effect: Allow Principal: - AWS: !Sub "arn:aws:iam::${AWS::AccountId}:root" + AWS: !Sub "arn:aws:iam::${AWS::AccountId}:root" # (1)! Action: "kms:*" Resource: "*" - # KMS supports IAM resource policies to grant Lambda's IAM Role access to the KMS Key. - # For more details: https://docs.aws.amazon.com/kms/latest/developerguide/key-policies.html + # We must grant Lambda's IAM Role access to the KMS Key - Effect: Allow Principal: - AWS: !GetAtt DataMaskingEncryptFunctionExampleRole.Arn # Permission for the Lambda role - # These IAM permissions are necessary for the envelope encryption that AWS Encryption SDK uses. - # For more details: https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/concepts.html#envelope-encryption + AWS: !GetAtt DataMaskingEncryptFunctionExampleRole.Arn # (2)! Action: - # We use `Decrypt` to decrypt encrypted data key - - kms:Decrypt - # We use `GeneratedDataKey` to create an unique and random data key for encryption - - kms:GenerateDataKey + - kms:Decrypt # to decrypt encrypted data key + - kms:GenerateDataKey # to create an unique and random data key for encryption Resource: "*" From 963f8cc9547c2539b2dd91869775c85c7da15376 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 13:13:27 +0100 Subject: [PATCH 065/151] docs: remove unused section --- docs/utilities/data_masking.md | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 0fdc60a2f14..227c36be55b 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -95,27 +95,6 @@ Before you start, you will need a KMS key to encrypt and decrypt your data. Your 1. [Key policy examples using IAM Roles](https://docs.aws.amazon.com/kms/latest/developerguide/key-policy-default.html#key-policy-default-allow-administrators){target="_blank"} 2. [SAM generated CloudFormation Resources](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-specification-generated-resources-function.html#sam-specification-generated-resources-function-not-role){target="_blank"} -#### Using AWS Encryption SDK - -To use the AWS Encryption SDK, your Lambda function IAM Role must have the `kms:Decrypt` and `kms:GenerateDataKey` IAM permissions. - -When using AWS Encryption SDK with AWS KMS keys for data encryption and decryption, it's important to be aware that configuring additional permissions in the KMS Key Policy may be necessary. Learn more about KMS Key Policies [here](https://docs.aws.amazon.com/kms/latest/developerguide/key-policies.html){target="_blank"}. - -=== "data_masking_function_example.py" - ```python hl_lines="8 20-22" - --8<-- "examples/data_masking/src/data_masking_function_example.py" - ``` - -=== "input.json" - ```json - --8<-- "examples/data_masking/src/large_data_input.json" - ``` - -=== "output.json" - ```json - --8<-- "examples/data_masking/src/data_masking_function_example_output.json" - ``` - ### Working with nested data !!! info "In Q1 2024, we plan to introduce support for Pydantic models, Dataclasses, and standard Python classes." From 40644f1854085c4875580b7c8b840b68f4b6d8a9 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 13:15:19 +0100 Subject: [PATCH 066/151] docs: move navigation order, fix encryption typo --- docs/utilities/data_masking.md | 44 +++++++++++++++++----------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 227c36be55b..ec4788f4948 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -95,27 +95,6 @@ Before you start, you will need a KMS key to encrypt and decrypt your data. Your 1. [Key policy examples using IAM Roles](https://docs.aws.amazon.com/kms/latest/developerguide/key-policy-default.html#key-policy-default-allow-administrators){target="_blank"} 2. [SAM generated CloudFormation Resources](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-specification-generated-resources-function.html#sam-specification-generated-resources-function-not-role){target="_blank"} -### Working with nested data - -!!! info "In Q1 2024, we plan to introduce support for Pydantic models, Dataclasses, and standard Python classes." - -#### JSON - -When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values to the `fields` parameter. You can select values of nested keys by using dot notation. The `fields` parameter only supports selecting values using basic dot notation and does not provide support for wildcards or any other matching expressions. - -If a `fields` parameter is provided along with a dictionary as the input data, then the rest of content of the dictionary will remain unchanged, and only the values corresponding to the keys given will be masked (or encrypted/decrypted). However, if there were any non-string keys in the original dictionary, they will be transformed into strings while perserving their original content. - -If a `fields` parameter is provided while the input data is a JSON string, the returned data structure will be a Python dictionary. The values corresponding to the keys given in the `fields` parameter will be accordingly obfuscated, and the content of everything else in the returned object will remain the same as the input data. - - - -If `fields` is not provided, the entire data object will be masked (or encrypted/decrypted). - - -???+ note - If you're using our example [AWS Serverless Application Model (SAM) template](#using-a-custom-encryption-provider), you will notice we have configured the Lambda function to use a memory size of 1024 MB. We compared the performances of Lambda functions of several different memory sizes and concluded 1024 MB was the most optimal size for this feature. For more information, you can see the full reports of our [load tests](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1730571597) and [traces](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1732060923). - - ### Masking data You can mask data without having to install any encryption library. Masking data will result in the loss of its original type, and the masked data will always be represented as a string. @@ -135,7 +114,7 @@ You can mask data without having to install any encryption library. Masking data --8<-- "examples/data_masking/src/mask_data_output.json" ``` -### Encryting data +### Encrypting data To encrypt data, utilize our built-in integration with the AWS Encryption SDK. Encrypting data will temporarily result in the loss of the original data type, as it transforms into a [ciphertext](https://en.wikipedia.org/wiki/Ciphertext){target="_blank" rel="nofollow"} string. @@ -177,6 +156,27 @@ Decrypting a ciphertext string will transform the data to its original type. --8<-- "examples/data_masking/src/decrypt_data_output.json" ``` +### Working with nested data + +!!! info "In Q1 2024, we plan to introduce support for Pydantic models, Dataclasses, and standard Python classes." + +#### JSON + +When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values to the `fields` parameter. You can select values of nested keys by using dot notation. The `fields` parameter only supports selecting values using basic dot notation and does not provide support for wildcards or any other matching expressions. + +If a `fields` parameter is provided along with a dictionary as the input data, then the rest of content of the dictionary will remain unchanged, and only the values corresponding to the keys given will be masked (or encrypted/decrypted). However, if there were any non-string keys in the original dictionary, they will be transformed into strings while perserving their original content. + +If a `fields` parameter is provided while the input data is a JSON string, the returned data structure will be a Python dictionary. The values corresponding to the keys given in the `fields` parameter will be accordingly obfuscated, and the content of everything else in the returned object will remain the same as the input data. + + + +If `fields` is not provided, the entire data object will be masked (or encrypted/decrypted). + + +???+ note + If you're using our example [AWS Serverless Application Model (SAM) template](#using-a-custom-encryption-provider), you will notice we have configured the Lambda function to use a memory size of 1024 MB. We compared the performances of Lambda functions of several different memory sizes and concluded 1024 MB was the most optimal size for this feature. For more information, you can see the full reports of our [load tests](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1730571597) and [traces](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1732060923). + + ## Advanced ### Providers From 9a333936bf6a9f4393a165b5a9af1d25da04e6d2 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 13:41:06 +0100 Subject: [PATCH 067/151] docs: line editing masking data --- docs/utilities/data_masking.md | 10 +++++++--- examples/data_masking/src/getting_started_mask_data.py | 5 +++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index ec4788f4948..dc52335850e 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -97,15 +97,19 @@ Before you start, you will need a KMS key to encrypt and decrypt your data. Your ### Masking data -You can mask data without having to install any encryption library. Masking data will result in the loss of its original type, and the masked data will always be represented as a string. +!!! note "You can mask data without [installing any dependency](#install)." + +Masking will erase the original data and replace with `*****`. This means you cannot recover masked data, and its type will change to `str`. === "getting_started_mask_data.py" - ```python hl_lines="4 8 16" + ```python hl_lines="4 8 17" --8<-- "examples/data_masking/src/getting_started_mask_data.py" ``` + 1. See [working with nested data](#working-with-nested-data) to learn more about the `fields` parameter.

If we omit `fields` parameter, the entire dictionary will be erased with `*****`. + === "input.json" - ```json + ```json hl_lines="7 9 14" --8<-- "examples/data_masking/src/generic_data_input.json" ``` diff --git a/examples/data_masking/src/getting_started_mask_data.py b/examples/data_masking/src/getting_started_mask_data.py index 10b16961ae9..1a288389551 100644 --- a/examples/data_masking/src/getting_started_mask_data.py +++ b/examples/data_masking/src/getting_started_mask_data.py @@ -8,11 +8,12 @@ data_masker = DataMasking() +@logger.inject_lambda_context def lambda_handler(event: dict, context: LambdaContext) -> Dict: data = event.get("body") logger.info("Masking fields email, address.street, and company_address") - masked = data_masker.mask(data=data, fields=["email", "address.street", "company_address"]) + masked: dict = data_masker.mask(data, fields=["email", "address.street", "company_address"]) # (1)! - return {"payload_masked": masked} + return masked From 7733633c9a7b526dd704a27364d3e73b4745e2fd Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 13:45:18 +0100 Subject: [PATCH 068/151] docs: fix file names in data masking --- docs/utilities/data_masking.md | 6 +++--- ...ta_output.json => getting_started_mask_data_output.json} | 0 2 files changed, 3 insertions(+), 3 deletions(-) rename examples/data_masking/src/{mask_data_output.json => getting_started_mask_data_output.json} (100%) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index dc52335850e..039d72dad4a 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -108,14 +108,14 @@ Masking will erase the original data and replace with `*****`. This means you ca 1. See [working with nested data](#working-with-nested-data) to learn more about the `fields` parameter.

If we omit `fields` parameter, the entire dictionary will be erased with `*****`. -=== "input.json" +=== "generic_data_input.json" ```json hl_lines="7 9 14" --8<-- "examples/data_masking/src/generic_data_input.json" ``` -=== "output.json" +=== "getting_started_mask_data_output.json" ```json hl_lines="5 7 12" - --8<-- "examples/data_masking/src/mask_data_output.json" + --8<-- "examples/data_masking/src/getting_started_mask_data_output.json" ``` ### Encrypting data diff --git a/examples/data_masking/src/mask_data_output.json b/examples/data_masking/src/getting_started_mask_data_output.json similarity index 100% rename from examples/data_masking/src/mask_data_output.json rename to examples/data_masking/src/getting_started_mask_data_output.json From 3d7b237089ce9dba81a49b3707288f1ab131cb08 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 13:48:41 +0100 Subject: [PATCH 069/151] docs: add symmetric word in required resources --- docs/utilities/data_masking.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 039d72dad4a..2cd6bda06a5 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -83,7 +83,7 @@ AWS Encryption SDK contains non-Python dependencies. This means you should use [ !!! info "By default, we use Amazon Key Management Service (KMS) for encryption and decryption operations." -Before you start, you will need a KMS key to encrypt and decrypt your data. Your Lambda function will need read and write access to it. +Before you start, you will need a KMS symmetric key to encrypt and decrypt your data. Your Lambda function will need read and write access to it. **NOTE**. We recommend setting a minimum of 1024MB of memory _(CPU intensive)_, and separate Lambda functions for encrypt and decrypt. From b816197924091790943e2b0a952687b31af88155 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 14:11:25 +0100 Subject: [PATCH 070/151] docs: line editing encrypting data --- docs/utilities/data_masking.md | 15 ++++++++++----- .../src/getting_started_encrypt_data.py | 14 ++++++++------ 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 2cd6bda06a5..f16730c6a1c 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -120,21 +120,26 @@ Masking will erase the original data and replace with `*****`. This means you ca ### Encrypting data -To encrypt data, utilize our built-in integration with the AWS Encryption SDK. Encrypting data will temporarily result in the loss of the original data type, as it transforms into a [ciphertext](https://en.wikipedia.org/wiki/Ciphertext){target="_blank" rel="nofollow"} string. +!!! note "About static typing and encryption" + Encrypting data may lead to a different data type, as it always transforms into a string _(``)_. -To encrypt your data, you'll need a valid and symmetric [AWS KMS](https://docs.aws.amazon.com/kms/latest/developerguide/data-protection.html){target="_blank"} key. +To encrypt, you will need an [encryption provider](#providers). Here, we will use `AWSEncryptionSDKProvider`. === "getting_started_encrypt_data.py" - ```python hl_lines="5-6 12-13 22" + + ```python hl_lines="5-6 11-12 23" --8<-- "examples/data_masking/src/getting_started_encrypt_data.py" ``` -=== "input.json" + 1. You can use more than one KMS Key for higher availability with increased latency.

Encryption SDK will ensure the data key is encrypted with both keys. + 2. See [working with nested data](#working-with-nested-data) to learn more about the `fields` parameter.

If we omit `fields` parameter, the entire dictionary will be encrypted. + +=== "generic_data_input.json" ```json hl_lines="7-9 14" --8<-- "examples/data_masking/src/generic_data_input.json" ``` -=== "output.json" +=== "encrypt_data_output.json" ```json hl_lines="5-7 12" --8<-- "examples/data_masking/src/encrypt_data_output.json" ``` diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py index 09f0524cb56..1cb67f15cd9 100644 --- a/examples/data_masking/src/getting_started_encrypt_data.py +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -6,18 +6,20 @@ from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider from aws_lambda_powertools.utilities.typing import LambdaContext -logger = Logger() - KMS_KEY_ARN: str = os.getenv("KMS_KEY_ARN", "") -encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) + +encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) # (1)! data_masker = DataMasking(provider=encryption_provider) +logger = Logger() + +@logger.inject_lambda_context def lambda_handler(event: Dict, context: LambdaContext) -> Dict: - data = event.get("body") + data = event.get("body", {}) logger.info("Encrypting fields email, address.street, and company_address") - encrypted = data_masker.encrypt(data=data, fields=["email", "address.street", "company_address"]) + encrypted: dict = data_masker.encrypt(data, fields=["email", "address.street", "company_address"]) # (2)! - return {"payload_encrypted": encrypted} + return encrypted From 2bae9403bc6602529c362270907d1abe4857f2ec Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 14:39:59 +0100 Subject: [PATCH 071/151] docs: line editing decrypting data; code snippet fixes --- docs/utilities/data_masking.md | 34 +++++++++++++------ .../src/getting_started_decrypt_data.py | 14 ++++---- ... getting_started_decrypt_data_output.json} | 0 .../src/getting_started_encrypt_data.py | 2 +- 4 files changed, 33 insertions(+), 17 deletions(-) rename examples/data_masking/src/{decrypt_data_output.json => getting_started_decrypt_data_output.json} (100%) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index f16730c6a1c..80e9ea2d737 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -125,13 +125,15 @@ Masking will erase the original data and replace with `*****`. This means you ca To encrypt, you will need an [encryption provider](#providers). Here, we will use `AWSEncryptionSDKProvider`. +Under the hood, we delegate a [number of operations](#encrypt-operation-with-encryption-sdk-kms) to AWS Encryption SDK to authenticate, create a portable encryption message, and actual data encryption. + === "getting_started_encrypt_data.py" ```python hl_lines="5-6 11-12 23" --8<-- "examples/data_masking/src/getting_started_encrypt_data.py" ``` - 1. You can use more than one KMS Key for higher availability with increased latency.

Encryption SDK will ensure the data key is encrypted with both keys. + 1. You can use more than one KMS Key for higher availability but increased latency.

Encryption SDK will ensure the data key is encrypted with both keys. 2. See [working with nested data](#working-with-nested-data) to learn more about the `fields` parameter.

If we omit `fields` parameter, the entire dictionary will be encrypted. === "generic_data_input.json" @@ -146,23 +148,35 @@ To encrypt, you will need an [encryption provider](#providers). Here, we will us ### Decrypting data -To decrypt data, use the appropriate key to transform ciphertext back into plaintext. Upon decryption, the data will return to its original type. +!!! note "About static typing and decryption" + Decrypting data may lead to a different data type, as encrypted data is always a string _(``)_. + +To decrypt, you will need an [encryption provider](#providers). Here, we will use `AWSEncryptionSDKProvider`. + +Under the hood, we delegate a [number of operations](#decrypt-operation-with-encryption-sdk-kms) to AWS Encryption SDK to verify authentication, integrity, and actual ciphertext decryption. + +=== "getting_started_decrypt_data.py" -Decrypting a ciphertext string will transform the data to its original type. + **NOTE**. Decryption only works with KMS Key ARN. + + ```python hl_lines="5-6 11-12 23" + --8<-- "examples/data_masking/src/getting_started_decrypt_data.py" + ``` + + 1. Note that KMS key alias or key ID won't work. + 2. You can use more than one KMS Key for higher availability but increased latency.

Encryption SDK will call `Decrypt` API with all master keys when trying to decrypt the data key. + 3. See [working with nested data](#working-with-nested-data) to learn more about the `fields` parameter. + +=== "encrypt_data_output.json" -=== "input.json" ```json hl_lines="5-7 12" --8<-- "examples/data_masking/src/encrypt_data_output.json" ``` -=== "getting_started_encrypt_data.py" - ```python hl_lines="3-4 12-13 17" - --8<-- "examples/data_masking/src/getting_started_encrypt_data.py" - ``` +=== "getting_started_decrypt_data_output.json" -=== "output.json" ```json hl_lines="5-7 12-17" - --8<-- "examples/data_masking/src/decrypt_data_output.json" + --8<-- "examples/data_masking/src/getting_started_decrypt_data_output.json" ``` ### Working with nested data diff --git a/examples/data_masking/src/getting_started_decrypt_data.py b/examples/data_masking/src/getting_started_decrypt_data.py index 09f0524cb56..6844c1572dd 100644 --- a/examples/data_masking/src/getting_started_decrypt_data.py +++ b/examples/data_masking/src/getting_started_decrypt_data.py @@ -6,18 +6,20 @@ from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider from aws_lambda_powertools.utilities.typing import LambdaContext -logger = Logger() +KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") # (1)! -KMS_KEY_ARN: str = os.getenv("KMS_KEY_ARN", "") -encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) +encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) # (2)! data_masker = DataMasking(provider=encryption_provider) +logger = Logger() + +@logger.inject_lambda_context def lambda_handler(event: Dict, context: LambdaContext) -> Dict: data = event.get("body") - logger.info("Encrypting fields email, address.street, and company_address") + logger.info("Decrypting fields email, address.street, and company_address") - encrypted = data_masker.encrypt(data=data, fields=["email", "address.street", "company_address"]) + decrypted = data_masker.decrypt(data, fields=["email", "address.street", "company_address"]) # (3)! - return {"payload_encrypted": encrypted} + return decrypted diff --git a/examples/data_masking/src/decrypt_data_output.json b/examples/data_masking/src/getting_started_decrypt_data_output.json similarity index 100% rename from examples/data_masking/src/decrypt_data_output.json rename to examples/data_masking/src/getting_started_decrypt_data_output.json diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py index 1cb67f15cd9..a9fecfc4c90 100644 --- a/examples/data_masking/src/getting_started_encrypt_data.py +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -6,7 +6,7 @@ from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider from aws_lambda_powertools.utilities.typing import LambdaContext -KMS_KEY_ARN: str = os.getenv("KMS_KEY_ARN", "") +KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) # (1)! data_masker = DataMasking(provider=encryption_provider) From 8104cb35d3d32a064e6ce1fe19ab9bcd9e7e9d20 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 15:10:33 +0100 Subject: [PATCH 072/151] docs: move load tests and traces upfront --- docs/utilities/data_masking.md | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 80e9ea2d737..1cae83abafd 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -85,7 +85,7 @@ AWS Encryption SDK contains non-Python dependencies. This means you should use [ Before you start, you will need a KMS symmetric key to encrypt and decrypt your data. Your Lambda function will need read and write access to it. -**NOTE**. We recommend setting a minimum of 1024MB of memory _(CPU intensive)_, and separate Lambda functions for encrypt and decrypt. +**NOTE**. We recommend setting a minimum of 1024MB of memory _(CPU intensive)_, and separate Lambda functions for encrypt and decrypt. For more information, you can see the full reports of our [load tests](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1730571597){target="_blank"} and [traces](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1732060923){target="_blank"}. === "AWS Serverless Application Model (SAM) example" ```yaml hl_lines="15 29 41 61 66-67" @@ -195,11 +195,6 @@ If a `fields` parameter is provided while the input data is a JSON string, the r If `fields` is not provided, the entire data object will be masked (or encrypted/decrypted). - -???+ note - If you're using our example [AWS Serverless Application Model (SAM) template](#using-a-custom-encryption-provider), you will notice we have configured the Lambda function to use a memory size of 1024 MB. We compared the performances of Lambda functions of several different memory sizes and concluded 1024 MB was the most optimal size for this feature. For more information, you can see the full reports of our [load tests](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1730571597) and [traces](https://github.com/aws-powertools/powertools-lambda-python/pull/2197#issuecomment-1732060923). - - ## Advanced ### Providers From a7146230b81ea36dac12a2c760971cad1ce7ba3f Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 15:12:12 +0100 Subject: [PATCH 073/151] docs: use pascal case for encryption sdk provider --- docs/utilities/data_masking.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 1cae83abafd..7941c28a718 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -201,7 +201,7 @@ If `fields` is not provided, the entire data object will be masked (or encrypted #### AWS Encryption SDK -You have the option to modify some of the configurations we have set as defaults when connecting to the AWS Encryption SDK. You can find and modify the following values when initializing the `AwsEncryptionSdkProvider`. +You have the option to modify some of the configurations we have set as defaults when connecting to the AWS Encryption SDK. You can find and modify the following values when initializing the `AWSEncryptionSDKProvider`. | Parameter | Required | Default | Description | | -------------------------- | -------- | --------------------- | --------------------------------------------------------------------------------------------- | @@ -257,7 +257,7 @@ sequenceDiagram participant EncryptionProvider as Encryption Provider Client->>Lambda: Invoke (event) Lambda->>DataMasking: Init Encryption Provider with master key - Note over Lambda,DataMasking: AwsEncryptionSdkProvider([KMS_KEY]) + Note over Lambda,DataMasking: AWSEncryptionSDKProvider([KMS_KEY]) Lambda->>DataMasking: encrypt(data) DataMasking->>EncryptionProvider: Create unique data key Note over DataMasking,EncryptionProvider: KMS GenerateDataKey API @@ -288,7 +288,7 @@ sequenceDiagram participant EncryptionProvider as Encryption Provider Client->>Lambda: Invoke (event) Lambda->>DataMasking: Init Encryption Provider with master key - Note over Lambda,DataMasking: AwsEncryptionSdkProvider([KMS_KEY]) + Note over Lambda,DataMasking: AWSEncryptionSDKProvider([KMS_KEY]) Lambda->>DataMasking: decrypt(data) DataMasking->>EncryptionProvider: Decrypt encrypted data key Note over DataMasking,EncryptionProvider: KMS Decrypt API @@ -325,7 +325,7 @@ sequenceDiagram participant EncryptionProvider as Encryption Provider Client->>Lambda: Invoke (event) Lambda->>DataMasking: Init Encryption Provider with master key - Note over Lambda,DataMasking: AwsEncryptionSdkProvider([KMS_KEY]) + Note over Lambda,DataMasking: AWSEncryptionSDKProvider([KMS_KEY]) Lambda->>DataMasking: encrypt(data) DataMasking->>EncryptionProvider: Create unique data key Note over DataMasking,EncryptionProvider: KMS GenerateDataKey API From fa24044712f90d0690144132b4e8a00d4c5e9d45 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 15:19:42 +0100 Subject: [PATCH 074/151] docs: remove redundant info from encryption sdk --- docs/utilities/data_masking.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 7941c28a718..dab92b23283 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -201,7 +201,7 @@ If `fields` is not provided, the entire data object will be masked (or encrypted #### AWS Encryption SDK -You have the option to modify some of the configurations we have set as defaults when connecting to the AWS Encryption SDK. You can find and modify the following values when initializing the `AWSEncryptionSDKProvider`. +You can modify the following values when initializing the `AWSEncryptionSDKProvider` to best accommodate your security and performance thresholds. | Parameter | Required | Default | Description | | -------------------------- | -------- | --------------------- | --------------------------------------------------------------------------------------------- | @@ -210,8 +210,6 @@ You have the option to modify some of the configurations we have set as defaults | **max_messages_encrypted** | | `4294967296` | The maximum number of messages that may be encrypted under a cache entry | | **max_bytes_encrypted** | | `9223372036854775807` | The maximum number of bytes that may be encrypted under a cache entry | -For more information about the parameters for this provider, please see the [AWS Encryption SDK documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.materials_managers.caching.html#aws_encryption_sdk.materials_managers.caching.CachingCryptoMaterialsManager){target="_blank" rel="nofollow"}. - #### Creating your own provider !!! info "In Q1 2024, we plan to add support for bringing your own encryption provider." From 0afe8fa7dd7440145499771d170f4b4531e064f0 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 15:28:50 +0100 Subject: [PATCH 075/151] docs: add encryption message, link provider section --- docs/utilities/data_masking.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index dab92b23283..e3b8733feba 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -51,6 +51,8 @@ stateDiagram-v2 **Encryption context** is a non-secret `key:value` data used for authentication like `tenant_id:`. This adds extra security and confirms encrypted data relationship with a context. +**[Encrypted message](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/message-format.html){target="_blank"}** is a portable data structure that includes encrypted data along with copies of the encrypted data key. It includes everything Encryption SDK needs to validate authenticity, integrity, and to decrypt with the right master key. + **[Envelope encryption](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/concepts.html#envelope-encryption){target="_blank"}** uses two different keys to encrypt data safely: master and data key. The data key encrypts the plaintext, and the master key encrypts the data key. It simplifies key management _(you own the master key)_, isolates compromises to data key, and scales better with large data volumes. @@ -304,7 +306,7 @@ sequenceDiagram Without caching, every `encrypt()` operation would generate a new data key. It significantly increases latency and cost for ephemeral and short running environments like Lambda. -With caching, we balance ephemeral Lambda environment performance characteristics with adjustable thresholds to meet your security needs. +With caching, we balance ephemeral Lambda environment performance characteristics with [adjustable thresholds](#aws-encryption-sdk) to meet your security needs. !!! info "Data key recycling" We request a new data key when a cached data key exceeds any of the following security thresholds: From 9b1425200e1c42efddb30606319dac0b9b05bed3 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 16:04:51 +0100 Subject: [PATCH 076/151] docs: add initial encryption context section --- docs/utilities/data_masking.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index e3b8733feba..2def7aba2d8 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -148,6 +148,15 @@ Under the hood, we delegate a [number of operations](#encrypt-operation-with-enc --8<-- "examples/data_masking/src/encrypt_data_output.json" ``` +#### Including encryption context + +For a stronger security posture, you can add metadata to each encryption operation. This is known as encryption context. These are non-sensitive data that can help protect authenticity and integrity of your encrypted data. + +!!! note "Encryption context will be required in [decryption operation](#decrypting-data)." + Make sure this metadata is standard and not random unless it's also available during decrypt operations. + +!!! todo "Change encrypt/decrypt signature to allow fluid encryption context" + ### Decrypting data !!! note "About static typing and decryption" From b5a0aef584476a1fd3dba914c79b5c11ac03a483 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 16:24:53 +0100 Subject: [PATCH 077/151] docs: add initial encryption context section --- docs/utilities/data_masking.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 2def7aba2d8..ebf4ae0e3f0 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -49,7 +49,7 @@ stateDiagram-v2 **Decrypting** transforms ciphertext back into plaintext using a decryption algorithm and the correct decryption key. -**Encryption context** is a non-secret `key:value` data used for authentication like `tenant_id:`. This adds extra security and confirms encrypted data relationship with a context. +**Encryption context** is a non-secret `key=value` data used for authentication like `tenant_id:`. This adds extra security and confirms encrypted data relationship with a context. **[Encrypted message](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/message-format.html){target="_blank"}** is a portable data structure that includes encrypted data along with copies of the encrypted data key. It includes everything Encryption SDK needs to validate authenticity, integrity, and to decrypt with the right master key. @@ -156,6 +156,7 @@ For a stronger security posture, you can add metadata to each encryption operati Make sure this metadata is standard and not random unless it's also available during decrypt operations. !!! todo "Change encrypt/decrypt signature to allow fluid encryption context" + [We need an API change](https://github.com/aws-powertools/powertools-lambda-python/pull/3186#issuecomment-1860778334). ### Decrypting data From 07cf10078514d669fc4266ae1d81171cf2be2aed Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 16:42:42 +0100 Subject: [PATCH 078/151] docs: remove incorrect info on enc ctx being required, thus allowing random data --- docs/utilities/data_masking.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index ebf4ae0e3f0..568ae721650 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -152,9 +152,6 @@ Under the hood, we delegate a [number of operations](#encrypt-operation-with-enc For a stronger security posture, you can add metadata to each encryption operation. This is known as encryption context. These are non-sensitive data that can help protect authenticity and integrity of your encrypted data. -!!! note "Encryption context will be required in [decryption operation](#decrypting-data)." - Make sure this metadata is standard and not random unless it's also available during decrypt operations. - !!! todo "Change encrypt/decrypt signature to allow fluid encryption context" [We need an API change](https://github.com/aws-powertools/powertools-lambda-python/pull/3186#issuecomment-1860778334). From 24bcef7ea2457ae4851300fd1a9af2ca8209a297 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 17:14:46 +0100 Subject: [PATCH 079/151] docs: rename to AAD and add examples --- docs/utilities/data_masking.md | 35 +++++++++++++++---- .../src/getting_started_decrypt_context.py | 29 +++++++++++++++ .../src/getting_started_encrypt_context.py | 33 +++++++++++++++++ 3 files changed, 90 insertions(+), 7 deletions(-) create mode 100644 examples/data_masking/src/getting_started_decrypt_context.py create mode 100644 examples/data_masking/src/getting_started_encrypt_context.py diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 568ae721650..6b9088ea27c 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -148,13 +148,6 @@ Under the hood, we delegate a [number of operations](#encrypt-operation-with-enc --8<-- "examples/data_masking/src/encrypt_data_output.json" ``` -#### Including encryption context - -For a stronger security posture, you can add metadata to each encryption operation. This is known as encryption context. These are non-sensitive data that can help protect authenticity and integrity of your encrypted data. - -!!! todo "Change encrypt/decrypt signature to allow fluid encryption context" - [We need an API change](https://github.com/aws-powertools/powertools-lambda-python/pull/3186#issuecomment-1860778334). - ### Decrypting data !!! note "About static typing and decryption" @@ -188,6 +181,34 @@ Under the hood, we delegate a [number of operations](#decrypt-operation-with-enc --8<-- "examples/data_masking/src/getting_started_decrypt_data_output.json" ``` +### Additional authenticated data (AAD) + +For a stronger security posture, you can add metadata to each encryption operation. This is known as **encryption context**. These are non-sensitive data that can help protect authenticity and integrity of your encrypted data. + +**NOTE**. Only string values are supported. + +!!! todo "Change encrypt/decrypt signature to allow fluid encryption context" + [We need an API change](https://github.com/aws-powertools/powertools-lambda-python/pull/3186#issuecomment-1860778334). + +!!! question "Why non-sensitive data?" + Because it is stored in plaintext in AWS CloudTrail; it could be seen by anyone with access to CloudTrail. Unless you [intentionally disabled KMS events in a custom trail](https://docs.aws.amazon.com/kms/latest/developerguide/logging-using-cloudtrail.html#filtering-kms-events){target="_blank"}. + +!!! todo "Explain decryption context not being a strict exact match due to random data." + +=== "getting_started_encrypt_context.py" + + ```python hl_lines="27-30" + --8<-- "examples/data_masking/src/getting_started_encrypt_context.py" + ``` + +=== "getting_started_decrypt_context.py" + + ```python hl_lines="26" + --8<-- "examples/data_masking/src/getting_started_decrypt_context.py" + ``` + + 1. We use `tenant_id` to be sure the data we're decrypting belongs to this tenant. + ### Working with nested data !!! info "In Q1 2024, we plan to introduce support for Pydantic models, Dataclasses, and standard Python classes." diff --git a/examples/data_masking/src/getting_started_decrypt_context.py b/examples/data_masking/src/getting_started_decrypt_context.py new file mode 100644 index 00000000000..4212b0bb4b7 --- /dev/null +++ b/examples/data_masking/src/getting_started_decrypt_context.py @@ -0,0 +1,29 @@ +import os +from typing import Dict + +from aws_lambda_powertools import Logger +from aws_lambda_powertools.utilities._data_masking import DataMasking +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +from aws_lambda_powertools.utilities.typing import LambdaContext + +KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") + +encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) +data_masker = DataMasking(provider=encryption_provider) + +logger = Logger() + + +@logger.inject_lambda_context +def lambda_handler(event: Dict, context: LambdaContext) -> Dict: + data = event.get("body", {}) + + logger.info("Decrypting email field") + + decrypted: dict = data_masker.encrypt( + data, + fields=["email"], + tenant_id=event.get("tenant_id", ""), # (1)! + ) + + return decrypted diff --git a/examples/data_masking/src/getting_started_encrypt_context.py b/examples/data_masking/src/getting_started_encrypt_context.py new file mode 100644 index 00000000000..c10612358bb --- /dev/null +++ b/examples/data_masking/src/getting_started_encrypt_context.py @@ -0,0 +1,33 @@ +import os +from datetime import datetime +from typing import Dict + +from aws_lambda_powertools import Logger +from aws_lambda_powertools.utilities._data_masking import DataMasking +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +from aws_lambda_powertools.utilities.typing import LambdaContext + +KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") + +encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) +data_masker = DataMasking(provider=encryption_provider) + +logger = Logger() + + +@logger.inject_lambda_context +def lambda_handler(event: Dict, context: LambdaContext) -> Dict: + data = event.get("body", {}) + + logger.info("Encrypting email field") + + encrypted: dict = data_masker.encrypt( + data, + fields=["email"], + data_classification="confidential", + data_type="customer-data", + timestamp=datetime.utcnow().isoformat(), + tenant_id="a06bf973-0734-4b53-9072-39d7ac5b2cba", + ) + + return encrypted From ca7897f94d355f80426363d413e266ae35458c73 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Mon, 18 Dec 2023 17:16:47 +0100 Subject: [PATCH 080/151] docs: rename to AAD and add examples --- docs/utilities/data_masking.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 6b9088ea27c..97ea675dac2 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -183,7 +183,7 @@ Under the hood, we delegate a [number of operations](#decrypt-operation-with-enc ### Additional authenticated data (AAD) -For a stronger security posture, you can add metadata to each encryption operation. This is known as **encryption context**. These are non-sensitive data that can help protect authenticity and integrity of your encrypted data. +For a stronger security posture, you can add metadata to each encryption operation, and verify one or more of them during decryption. This is known as **encryption context**. These are non-sensitive data that can help protect authenticity and integrity of your encrypted data. **NOTE**. Only string values are supported. From fec33a6e280a7fefdb7226bdbff69c73f98af4b5 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Mon, 18 Dec 2023 17:06:29 +0000 Subject: [PATCH 081/151] Adding support to list/set/tuple fields + renaming the class of the provider --- .../utilities/_data_masking/base.py | 102 +++++++++++++----- .../utilities/_data_masking/exceptions.py | 6 ++ .../utilities/_data_masking/provider/base.py | 10 +- .../_data_masking/provider/kms/__init__.py | 4 +- .../provider/kms/aws_encryption_sdk.py | 10 +- .../src/data_masking_function_example.py | 4 +- .../src/getting_started_decrypt_context.py | 4 +- .../src/getting_started_decrypt_data.py | 4 +- .../src/getting_started_encrypt_context.py | 4 +- .../src/getting_started_encrypt_data.py | 4 +- .../data_masking/tests/src/single_mock.py | 4 +- .../data_masking/handlers/basic_handler.py | 4 +- .../e2e/data_masking/test_e2e_data_masking.py | 12 +-- .../data_masking/test_aws_encryption_sdk.py | 44 +++++++- .../pt-load-test-stack/function_1024/app.py | 4 +- .../pt-load-test-stack/function_128/app.py | 4 +- .../pt-load-test-stack/function_1769/app.py | 4 +- 17 files changed, 161 insertions(+), 67 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index 4f3430af763..d3e45d700a6 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -4,7 +4,10 @@ import logging from typing import Any, Callable, Iterable, Optional, Union -from aws_lambda_powertools.utilities._data_masking.exceptions import DataMaskingUnsupportedTypeError +from aws_lambda_powertools.utilities._data_masking.exceptions import ( + DataMaskingFieldNotFound, + DataMaskingUnsupportedTypeError, +) from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider logger = logging.getLogger(__name__) @@ -137,9 +140,56 @@ def _apply_action_to_fields( ``` """ - data_parsed = {} + data_parsed: dict = self._normalize_data_to_parse(fields, data) - if fields is None: + for nested_field in fields: + logger.debug(f"Processing nested field: {nested_field}") + + nested_parsed_field = nested_field + + # Ensure the nested field is represented as a string + if not isinstance(nested_parsed_field, str): + nested_parsed_field = json.dumps(nested_parsed_field) + + # Split the nested field into keys using dot, square brackets as separators + # keys = re.split(r"\.|\[|\]", nested_field) # noqa ERA001 - REVIEW THIS + + keys = nested_parsed_field.replace("][", ".").replace("[", ".").replace("]", "").split(".") + keys = [key for key in keys if key] # Remove empty strings from the split + + # Traverse the dictionary hierarchy by iterating through the list of nested keys + current_dict = data_parsed + + for key in keys[:-1]: + # If enter here, the customer is passing potential list, set or tuple + # Example "payload[0]" + + logger.debug(f"Processing {key} in field {nested_field}") + + # It supports dict, list, set and tuple + try: + if isinstance(current_dict, dict) and key in current_dict: + # If enter heres, it captures the name of the key + # Example "payload" + current_dict = current_dict[key] + elif ( + isinstance(current_dict, (set, tuple, list)) and key.isdigit() and int(key) < len(current_dict) + ): + # If enter heres, it captures the index of the key + # Example "[0]" + current_dict = current_dict[int(key)] + except KeyError: + # Handle the case when the key doesn't exist + raise DataMaskingFieldNotFound(f"Key {key} not found in {current_dict}") + + last_key = keys[-1] + + current_dict = self._apply_action_to_specific_type(current_dict, action, last_key, **provider_options) + + return data_parsed + + def _normalize_data_to_parse(self, fields: list, data: str | dict) -> dict: + if not fields: raise ValueError("No fields specified.") if isinstance(data, str): @@ -154,29 +204,27 @@ def _apply_action_to_fields( f"Unsupported data type. Expected a traversable type (dict or str), but got {type(data)}.", ) - for nested_field in fields: - # Prevent overriding loop variable - current_nested_field = nested_field - - # Ensure the nested field is represented as a string - if not isinstance(current_nested_field, str): - current_nested_field = json.dumps(current_nested_field) - - # Split the nested field string into a list of nested keys - # ['a.b.c'] -> ['a', 'b', 'c'] - nested_keys = current_nested_field.split(".") - - # Initialize the current dictionary to the root dictionary - current_dict = data_parsed - - # Traverse the dictionary hierarchy by iterating through the list of nested keys - for key in nested_keys[:-1]: - current_dict = current_dict[key] - - # Retrieve the final value of the nested field - target_value = current_dict[nested_keys[-1]] + return data_parsed - # Apply the specified 'action' to the target value - current_dict[nested_keys[-1]] = action(target_value, **provider_options) + def _apply_action_to_specific_type(self, current_dict: dict, action: Callable, last_key, **provider_options): + logger.debug("Processing the last fields to apply the action") + # Apply the action to the last key (either a specific index or dictionary key) + if isinstance(current_dict, dict) and last_key in current_dict: + current_dict[last_key] = action(current_dict[last_key], **provider_options) + elif isinstance(current_dict, list) and last_key.isdigit() and int(last_key) < len(current_dict): + current_dict[int(last_key)] = action(current_dict[int(last_key)], **provider_options) + elif isinstance(current_dict, tuple) and last_key.isdigit() and int(last_key) < len(current_dict): + index = int(last_key) + current_dict = ( + current_dict[:index] + (action(current_dict[index], **provider_options),) + current_dict[index + 1 :] + ) + elif isinstance(current_dict, set): + # Convert the set to a list, apply the action, and convert back to a set + elements_list = list(current_dict) + elements_list[int(last_key)] = action(elements_list[int(last_key)], **provider_options) + current_dict = set(elements_list) + else: + # Handle the case when the last key doesn't exist + raise DataMaskingFieldNotFound(f"Key {last_key} not found in {current_dict}") - return data_parsed + return current_dict diff --git a/aws_lambda_powertools/utilities/_data_masking/exceptions.py b/aws_lambda_powertools/utilities/_data_masking/exceptions.py index 45f98315df5..1316bef62a4 100644 --- a/aws_lambda_powertools/utilities/_data_masking/exceptions.py +++ b/aws_lambda_powertools/utilities/_data_masking/exceptions.py @@ -26,3 +26,9 @@ class DataMaskingContextMismatchError(Exception): """ Decrypting with the incorrect encryption context. """ + + +class DataMaskingFieldNotFound(Exception): + """ + Field not found. + """ diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/base.py b/aws_lambda_powertools/utilities/_data_masking/provider/base.py index 417bdbb6540..ad16216e8b5 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/base.py @@ -44,7 +44,7 @@ def decrypt(self, data) -> Any: def mask(self, data) -> Union[str, Iterable]: # Implementation logic for data masking pass - + def lambda_handler(event, context): provider = MyCustomProvider(["secret-key"]) data_masker = DataMasking(provider=provider) @@ -83,13 +83,13 @@ def decrypt(self, data) -> Any: def mask(self, data) -> Union[str, Iterable]: """ - This method irreversibly masks data. - + This method irreversibly masks data. + If the data to be masked is of type `str`, `dict`, or `bytes`, this method will return a masked string, i.e. "*****". - If the data to be masked is of an iterable type like `list`, `tuple`, - or `set`, this method will return a new object of the same type as the + If the data to be masked is of an iterable type like `list`, `tuple`, + or `set`, this method will return a new object of the same type as the input data but with each element replaced by the string "*****". """ if isinstance(data, (str, dict, bytes)): diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/__init__.py b/aws_lambda_powertools/utilities/_data_masking/provider/kms/__init__.py index f257339d634..dfcf1f163ff 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/__init__.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/kms/__init__.py @@ -1,5 +1,5 @@ -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider __all__ = [ - "AwsEncryptionSdkProvider", + "AWSEncryptionSDKProvider", ] diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py index ac535131e68..0f30f32922a 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py @@ -32,9 +32,9 @@ logger = logging.getLogger(__name__) -class AwsEncryptionSdkProvider(BaseProvider): +class AWSEncryptionSDKProvider(BaseProvider): """ - The AwsEncryptionSdkProvider is used as a provider for the DataMasking class. + The AWSEncryptionSDKProvider is used as a provider for the DataMasking class. This provider allows you to perform data masking using the AWS Encryption SDK for encryption and decryption. It integrates with the DataMasking class to @@ -44,12 +44,12 @@ class AwsEncryptionSdkProvider(BaseProvider): ``` from aws_lambda_powertools.utilities.data_masking import DataMasking from aws_lambda_powertools.utilities.data_masking.providers.kms.aws_encryption_sdk import ( - AwsEncryptionSdkProvider, + AWSEncryptionSDKProvider, ) def lambda_handler(event, context): - provider = AwsEncryptionSdkProvider(["arn:aws:kms:us-east-1:0123456789012:key/key-id"]) + provider = AWSEncryptionSDKProvider(["arn:aws:kms:us-east-1:0123456789012:key/key-id"]) data_masker = DataMasking(provider=provider) data = { @@ -130,7 +130,7 @@ def __init__( def encrypt(self, data: bytes | str | Dict | float, **provider_options) -> str: """ - Encrypt data using the AwsEncryptionSdkProvider. + Encrypt data using the AWSEncryptionSDKProvider. Parameters ------- diff --git a/examples/data_masking/src/data_masking_function_example.py b/examples/data_masking/src/data_masking_function_example.py index abeccbca76c..78aca8f0940 100644 --- a/examples/data_masking/src/data_masking_function_example.py +++ b/examples/data_masking/src/data_masking_function_example.py @@ -2,7 +2,7 @@ from aws_lambda_powertools import Logger, Tracer from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider from aws_lambda_powertools.utilities.typing import LambdaContext KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") @@ -18,7 +18,7 @@ def lambda_handler(event: dict, context: LambdaContext) -> dict: data = event["body"] - data_masker = DataMasking(provider=AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN])) + data_masker = DataMasking(provider=AWSEncryptionSDKProvider(keys=[KMS_KEY_ARN])) encrypted = data_masker.encrypt(data, fields=["address.street", "job_history.company.company_name"]) decrypted = data_masker.decrypt(encrypted, fields=["address.street", "job_history.company.company_name"]) return {"Decrypted_json": decrypted} diff --git a/examples/data_masking/src/getting_started_decrypt_context.py b/examples/data_masking/src/getting_started_decrypt_context.py index 4212b0bb4b7..721199b04a0 100644 --- a/examples/data_masking/src/getting_started_decrypt_context.py +++ b/examples/data_masking/src/getting_started_decrypt_context.py @@ -3,12 +3,12 @@ from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider from aws_lambda_powertools.utilities.typing import LambdaContext KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") -encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) +encryption_provider = AWSEncryptionSDKProvider(keys=[KMS_KEY_ARN]) data_masker = DataMasking(provider=encryption_provider) logger = Logger() diff --git a/examples/data_masking/src/getting_started_decrypt_data.py b/examples/data_masking/src/getting_started_decrypt_data.py index 6844c1572dd..83eb85a67e5 100644 --- a/examples/data_masking/src/getting_started_decrypt_data.py +++ b/examples/data_masking/src/getting_started_decrypt_data.py @@ -3,12 +3,12 @@ from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider from aws_lambda_powertools.utilities.typing import LambdaContext KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") # (1)! -encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) # (2)! +encryption_provider = AWSEncryptionSDKProvider(keys=[KMS_KEY_ARN]) # (2)! data_masker = DataMasking(provider=encryption_provider) logger = Logger() diff --git a/examples/data_masking/src/getting_started_encrypt_context.py b/examples/data_masking/src/getting_started_encrypt_context.py index c10612358bb..63b808d626c 100644 --- a/examples/data_masking/src/getting_started_encrypt_context.py +++ b/examples/data_masking/src/getting_started_encrypt_context.py @@ -4,12 +4,12 @@ from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider from aws_lambda_powertools.utilities.typing import LambdaContext KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") -encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) +encryption_provider = AWSEncryptionSDKProvider(keys=[KMS_KEY_ARN]) data_masker = DataMasking(provider=encryption_provider) logger = Logger() diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py index a9fecfc4c90..ed9088f5fbc 100644 --- a/examples/data_masking/src/getting_started_encrypt_data.py +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -3,12 +3,12 @@ from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider from aws_lambda_powertools.utilities.typing import LambdaContext KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") -encryption_provider = AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN]) # (1)! +encryption_provider = AWSEncryptionSDKProvider(keys=[KMS_KEY_ARN]) # (1)! data_masker = DataMasking(provider=encryption_provider) logger = Logger() diff --git a/examples/data_masking/tests/src/single_mock.py b/examples/data_masking/tests/src/single_mock.py index 45249a02d58..754d3308144 100644 --- a/examples/data_masking/tests/src/single_mock.py +++ b/examples/data_masking/tests/src/single_mock.py @@ -5,7 +5,7 @@ from aws_lambda_powertools.utilities._data_masking.base import DataMasking from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider class FakeEncryptionKeyProvider(BaseProvider): @@ -31,7 +31,7 @@ def handler(event, context): data = "mock_value" fake_key_provider = FakeEncryptionKeyProvider() - provider = AwsEncryptionSdkProvider( + provider = AWSEncryptionSDKProvider( keys=["dummy"], key_provider=fake_key_provider, ) diff --git a/tests/e2e/data_masking/handlers/basic_handler.py b/tests/e2e/data_masking/handlers/basic_handler.py index f31e822429a..2ad3b379436 100644 --- a/tests/e2e/data_masking/handlers/basic_handler.py +++ b/tests/e2e/data_masking/handlers/basic_handler.py @@ -1,6 +1,6 @@ from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider logger = Logger() @@ -14,7 +14,7 @@ def lambda_handler(event, context): # Encrypting data for test_encryption_in_handler test kms_key = event.get("kms_key", "") - data_masker = DataMasking(provider=AwsEncryptionSdkProvider(keys=[kms_key])) + data_masker = DataMasking(provider=AWSEncryptionSDKProvider(keys=[kms_key])) value = [1, 2, "string", 4.5] encrypted_data = data_masker.encrypt(value) response = {} diff --git a/tests/e2e/data_masking/test_e2e_data_masking.py b/tests/e2e/data_masking/test_e2e_data_masking.py index 80f45564177..b5e5203b4ce 100644 --- a/tests/e2e/data_masking/test_e2e_data_masking.py +++ b/tests/e2e/data_masking/test_e2e_data_masking.py @@ -5,9 +5,9 @@ from aws_encryption_sdk.exceptions import DecryptKeyError from aws_lambda_powertools.utilities._data_masking import DataMasking +from aws_lambda_powertools.utilities._data_masking.exceptions import DataMaskingContextMismatchError from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import ( - AwsEncryptionSdkProvider, - ContextMismatchError, + AWSEncryptionSDKProvider, ) from tests.e2e.utils import data_fetcher @@ -36,7 +36,7 @@ def kms_key2_arn(infrastructure: dict) -> str: @pytest.fixture def data_masker(kms_key1_arn) -> DataMasking: - return DataMasking(provider=AwsEncryptionSdkProvider(keys=[kms_key1_arn])) + return DataMasking(provider=AWSEncryptionSDKProvider(keys=[kms_key1_arn])) @pytest.mark.xdist_group(name="data_masking") @@ -79,7 +79,7 @@ def test_encryption_context_mismatch(data_masker): encrypted_data = data_masker.encrypt(value, encryption_context={"this": "is_secure"}) # THEN decrypting with a different encryption_context should raise a ContextMismatchError - with pytest.raises(ContextMismatchError): + with pytest.raises(DataMaskingContextMismatchError): data_masker.decrypt(encrypted_data, encryption_context={"not": "same_context"}) @@ -93,7 +93,7 @@ def test_encryption_no_context_fail(data_masker): encrypted_data = data_masker.encrypt(value) # THEN decrypting with an encryption_context should raise a ContextMismatchError - with pytest.raises(ContextMismatchError): + with pytest.raises(DataMaskingContextMismatchError): data_masker.decrypt(encrypted_data, encryption_context={"this": "is_secure"}) @@ -106,7 +106,7 @@ def test_encryption_decryption_key_mismatch(data_masker, kms_key2_arn): encrypted_data = data_masker.encrypt(value) # THEN when decrypting with a different key it should fail - data_masker_key2 = DataMasking(provider=AwsEncryptionSdkProvider(keys=[kms_key2_arn])) + data_masker_key2 = DataMasking(provider=AWSEncryptionSDKProvider(keys=[kms_key2_arn])) with pytest.raises(DecryptKeyError): data_masker_key2.decrypt(encrypted_data) diff --git a/tests/functional/data_masking/test_aws_encryption_sdk.py b/tests/functional/data_masking/test_aws_encryption_sdk.py index 978c2e21572..8b216781c93 100644 --- a/tests/functional/data_masking/test_aws_encryption_sdk.py +++ b/tests/functional/data_masking/test_aws_encryption_sdk.py @@ -10,7 +10,7 @@ from aws_lambda_powertools.utilities._data_masking.constants import DATA_MASKING_STRING from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider from aws_lambda_powertools.utilities._data_masking.provider.kms import ( - AwsEncryptionSdkProvider, + AWSEncryptionSDKProvider, ) @@ -37,7 +37,7 @@ def decrypt(self, data: bytes, **kwargs) -> Any: def data_masker(monkeypatch) -> DataMasking: """DataMasking using AWS Encryption SDK Provider with a fake client""" fake_key_provider = FakeEncryptionKeyProvider() - provider = AwsEncryptionSdkProvider( + provider = AWSEncryptionSDKProvider( keys=["dummy"], key_provider=fake_key_provider, ) @@ -281,3 +281,43 @@ def test_encrypt_json_dict_with_fields(data_masker): # THEN the result is only the specified fields are masked assert decrypted_data == json.loads(data) + + +def test_encrypt_json_with_list_fields(data_masker): + # GIVEN the data type is a json representation of a dictionary with a list inside + data = json.dumps( + { + "payload": { + "first": ["value1", "value2"], + "second": [{"key1": [0, 1]}], + }, + }, + ) + + fields_operation = ["payload.first[0]", "payload.second[0].key1[0]"] + # WHEN encrypting and then decrypting the encrypted data + encrypted_data = data_masker.encrypt(data, fields=fields_operation) + decrypted_data = data_masker.decrypt(encrypted_data, fields=fields_operation) + + # THEN the result is only the specified fields are masked + assert decrypted_data == json.loads(data) + + +def test_encrypt_json_with_tuple_fields(data_masker): + # GIVEN the data type is a json representation of a dictionary with a list inside + data = json.dumps( + { + "payload": { + "first": ["value1", "value2"], + "second": (0, 1), + }, + }, + ) + + fields_operation = ["payload.first[0]", "payload.second[0]"] + # WHEN encrypting and then decrypting the encrypted data + encrypted_data = data_masker.encrypt(data, fields=fields_operation) + decrypted_data = data_masker.decrypt(encrypted_data, fields=fields_operation) + + # THEN the result is only the specified fields are masked + assert decrypted_data == json.loads(data) diff --git a/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_1024/app.py b/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_1024/app.py index 9a898ea10cd..33b9673154a 100644 --- a/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_1024/app.py +++ b/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_1024/app.py @@ -4,7 +4,7 @@ from aws_lambda_powertools.event_handler import APIGatewayRestResolver from aws_lambda_powertools.logging import correlation_paths from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider from aws_lambda_powertools.utilities.typing import LambdaContext KMS_KEY_ARN = os.environ["KMS_KEY_ARN"] @@ -48,7 +48,7 @@ @tracer.capture_method def function1024(): logger.info("Hello world function1024 - HTTP 200") - data_masker = DataMasking(provider=AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN])) + data_masker = DataMasking(provider=AWSEncryptionSDKProvider(keys=[KMS_KEY_ARN])) encrypted = data_masker.encrypt(json_blob, fields=["address.street", "job_history.company.company_name"]) decrypted = data_masker.decrypt(encrypted, fields=["address.street", "job_history.company.company_name"]) return {"Decrypted_json_blob_function_1024": decrypted} diff --git a/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_128/app.py b/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_128/app.py index 6b8250579a5..81fffefb2ee 100644 --- a/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_128/app.py +++ b/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_128/app.py @@ -4,7 +4,7 @@ from aws_lambda_powertools.event_handler import APIGatewayRestResolver from aws_lambda_powertools.logging import correlation_paths from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider from aws_lambda_powertools.utilities.typing import LambdaContext KMS_KEY_ARN = os.environ["KMS_KEY_ARN"] @@ -48,7 +48,7 @@ @tracer.capture_method def function128(): logger.info("Hello world function128 - HTTP 200") - data_masker = DataMasking(provider=AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN])) + data_masker = DataMasking(provider=AWSEncryptionSDKProvider(keys=[KMS_KEY_ARN])) encrypted = data_masker.encrypt(json_blob, fields=["address.street", "job_history.company.company_name"]) decrypted = data_masker.decrypt(encrypted, fields=["address.street", "job_history.company.company_name"]) return {"Decrypted_json_blob_function_128": decrypted} diff --git a/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_1769/app.py b/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_1769/app.py index 623a1f7b232..3fbe0d60116 100644 --- a/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_1769/app.py +++ b/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_1769/app.py @@ -4,7 +4,7 @@ from aws_lambda_powertools.event_handler import APIGatewayRestResolver from aws_lambda_powertools.logging import correlation_paths from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider from aws_lambda_powertools.utilities.typing import LambdaContext KMS_KEY_ARN = os.environ["KMS_KEY_ARN"] @@ -48,7 +48,7 @@ @tracer.capture_method def function1769(): logger.info("Hello world function1769 - HTTP 200") - data_masker = DataMasking(provider=AwsEncryptionSdkProvider(keys=[KMS_KEY_ARN])) + data_masker = DataMasking(provider=AWSEncryptionSDKProvider(keys=[KMS_KEY_ARN])) encrypted = data_masker.encrypt(json_blob, fields=["address.street", "job_history.company.company_name"]) decrypted = data_masker.decrypt(encrypted, fields=["address.street", "job_history.company.company_name"]) return {"Decrypted_json_blob_function_1769": decrypted} From 74fccd724d85a951309578dfa58a60b65c4eeb3f Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Mon, 18 Dec 2023 17:50:40 +0000 Subject: [PATCH 082/151] Small fix --- aws_lambda_powertools/utilities/_data_masking/base.py | 6 +++--- .../utilities/_data_masking/exceptions.py | 2 +- tests/unit/data_masking/test_unit_data_masking.py | 7 +++++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index d3e45d700a6..4dd354bfec1 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -5,7 +5,7 @@ from typing import Any, Callable, Iterable, Optional, Union from aws_lambda_powertools.utilities._data_masking.exceptions import ( - DataMaskingFieldNotFound, + DataMaskingFieldNotFoundError, DataMaskingUnsupportedTypeError, ) from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider @@ -180,7 +180,7 @@ def _apply_action_to_fields( current_dict = current_dict[int(key)] except KeyError: # Handle the case when the key doesn't exist - raise DataMaskingFieldNotFound(f"Key {key} not found in {current_dict}") + raise DataMaskingFieldNotFoundError(f"Key {key} not found in {current_dict}") last_key = keys[-1] @@ -225,6 +225,6 @@ def _apply_action_to_specific_type(self, current_dict: dict, action: Callable, l current_dict = set(elements_list) else: # Handle the case when the last key doesn't exist - raise DataMaskingFieldNotFound(f"Key {last_key} not found in {current_dict}") + raise DataMaskingFieldNotFoundError(f"Key {last_key} not found in {current_dict}") return current_dict diff --git a/aws_lambda_powertools/utilities/_data_masking/exceptions.py b/aws_lambda_powertools/utilities/_data_masking/exceptions.py index 1316bef62a4..7c962ddf385 100644 --- a/aws_lambda_powertools/utilities/_data_masking/exceptions.py +++ b/aws_lambda_powertools/utilities/_data_masking/exceptions.py @@ -28,7 +28,7 @@ class DataMaskingContextMismatchError(Exception): """ -class DataMaskingFieldNotFound(Exception): +class DataMaskingFieldNotFoundError(Exception): """ Field not found. """ diff --git a/tests/unit/data_masking/test_unit_data_masking.py b/tests/unit/data_masking/test_unit_data_masking.py index 6379fbdb192..af176bfb53b 100644 --- a/tests/unit/data_masking/test_unit_data_masking.py +++ b/tests/unit/data_masking/test_unit_data_masking.py @@ -4,7 +4,10 @@ from aws_lambda_powertools.utilities._data_masking.base import DataMasking from aws_lambda_powertools.utilities._data_masking.constants import DATA_MASKING_STRING -from aws_lambda_powertools.utilities._data_masking.exceptions import DataMaskingUnsupportedTypeError +from aws_lambda_powertools.utilities._data_masking.exceptions import ( + DataMaskingFieldNotFoundError, + DataMaskingUnsupportedTypeError, +) @pytest.fixture @@ -169,7 +172,7 @@ def test_parsing_nonexistent_fields(data_masker): } # WHEN attempting to pass in fields that do not exist in the input data - with pytest.raises(KeyError): + with pytest.raises(DataMaskingFieldNotFoundError): # THEN the result is a KeyError data_masker.mask(data, ["3.1.True"]) From 189bcba9fd389dd8cb5d0d9a864a336571a7eba4 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Mon, 18 Dec 2023 19:43:42 +0000 Subject: [PATCH 083/151] Fixing typing --- aws_lambda_powertools/utilities/_data_masking/base.py | 2 +- docs/utilities/data_masking.md | 2 +- examples/data_masking/src/data_masking_function_example.py | 2 ++ examples/data_masking/src/getting_started_decrypt_context.py | 5 +++-- examples/data_masking/src/getting_started_decrypt_data.py | 5 +++-- examples/data_masking/src/getting_started_encrypt_context.py | 5 +++-- examples/data_masking/src/getting_started_encrypt_data.py | 5 +++-- examples/data_masking/src/getting_started_mask_data.py | 4 ++-- 8 files changed, 18 insertions(+), 12 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index 4dd354bfec1..96905c993aa 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -59,7 +59,7 @@ def encrypt(self, data, fields=None, **provider_options) -> str: def decrypt(self, data, fields=None, **provider_options) -> Any: return self._apply_action(data, fields, self.provider.decrypt, **provider_options) - def mask(self, data, fields=None, **provider_options) -> Union[str, Iterable]: + def mask(self, data, fields=None, **provider_options) -> str | Iterable: return self._apply_action(data, fields, self.provider.mask, **provider_options) def _apply_action(self, data: str | dict, fields, action: Callable, **provider_options): diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 97ea675dac2..5000f1bf1d7 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -161,7 +161,7 @@ Under the hood, we delegate a [number of operations](#decrypt-operation-with-enc **NOTE**. Decryption only works with KMS Key ARN. - ```python hl_lines="5-6 11-12 23" + ```python hl_lines="6-7 12-13 24" --8<-- "examples/data_masking/src/getting_started_decrypt_data.py" ``` diff --git a/examples/data_masking/src/data_masking_function_example.py b/examples/data_masking/src/data_masking_function_example.py index 78aca8f0940..b25449d8ed4 100644 --- a/examples/data_masking/src/data_masking_function_example.py +++ b/examples/data_masking/src/data_masking_function_example.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import os from aws_lambda_powertools import Logger, Tracer diff --git a/examples/data_masking/src/getting_started_decrypt_context.py b/examples/data_masking/src/getting_started_decrypt_context.py index 721199b04a0..fd46309cec5 100644 --- a/examples/data_masking/src/getting_started_decrypt_context.py +++ b/examples/data_masking/src/getting_started_decrypt_context.py @@ -1,5 +1,6 @@ +from __future__ import annotations + import os -from typing import Dict from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities._data_masking import DataMasking @@ -15,7 +16,7 @@ @logger.inject_lambda_context -def lambda_handler(event: Dict, context: LambdaContext) -> Dict: +def lambda_handler(event: dict, context: LambdaContext) -> dict: data = event.get("body", {}) logger.info("Decrypting email field") diff --git a/examples/data_masking/src/getting_started_decrypt_data.py b/examples/data_masking/src/getting_started_decrypt_data.py index 83eb85a67e5..582a06af1b7 100644 --- a/examples/data_masking/src/getting_started_decrypt_data.py +++ b/examples/data_masking/src/getting_started_decrypt_data.py @@ -1,5 +1,6 @@ +from __future__ import annotations + import os -from typing import Dict from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities._data_masking import DataMasking @@ -15,7 +16,7 @@ @logger.inject_lambda_context -def lambda_handler(event: Dict, context: LambdaContext) -> Dict: +def lambda_handler(event: dict, context: LambdaContext) -> dict: data = event.get("body") logger.info("Decrypting fields email, address.street, and company_address") diff --git a/examples/data_masking/src/getting_started_encrypt_context.py b/examples/data_masking/src/getting_started_encrypt_context.py index 63b808d626c..d34c8fd7d12 100644 --- a/examples/data_masking/src/getting_started_encrypt_context.py +++ b/examples/data_masking/src/getting_started_encrypt_context.py @@ -1,6 +1,7 @@ +from __future__ import annotations + import os from datetime import datetime -from typing import Dict from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities._data_masking import DataMasking @@ -16,7 +17,7 @@ @logger.inject_lambda_context -def lambda_handler(event: Dict, context: LambdaContext) -> Dict: +def lambda_handler(event: dict, context: LambdaContext) -> dict: data = event.get("body", {}) logger.info("Encrypting email field") diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py index ed9088f5fbc..425e634dac0 100644 --- a/examples/data_masking/src/getting_started_encrypt_data.py +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -1,5 +1,6 @@ +from __future__ import annotations + import os -from typing import Dict from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities._data_masking import DataMasking @@ -15,7 +16,7 @@ @logger.inject_lambda_context -def lambda_handler(event: Dict, context: LambdaContext) -> Dict: +def lambda_handler(event: dict, context: LambdaContext) -> dict: data = event.get("body", {}) logger.info("Encrypting fields email, address.street, and company_address") diff --git a/examples/data_masking/src/getting_started_mask_data.py b/examples/data_masking/src/getting_started_mask_data.py index 1a288389551..b31b1e044a8 100644 --- a/examples/data_masking/src/getting_started_mask_data.py +++ b/examples/data_masking/src/getting_started_mask_data.py @@ -1,4 +1,4 @@ -from typing import Dict +from __future__ import annotations from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities._data_masking import DataMasking @@ -9,7 +9,7 @@ @logger.inject_lambda_context -def lambda_handler(event: dict, context: LambdaContext) -> Dict: +def lambda_handler(event: dict, context: LambdaContext) -> dict: data = event.get("body") logger.info("Masking fields email, address.street, and company_address") From 483c1b4425ff6e034299c06dd50bb948b5c966ea Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Mon, 18 Dec 2023 20:00:00 +0000 Subject: [PATCH 084/151] Fixing typing --- aws_lambda_powertools/utilities/_data_masking/base.py | 2 +- .../utilities/_data_masking/provider/base.py | 4 +++- .../data_masking/src/getting_started_decrypt_context.py | 2 +- .../data_masking/src/getting_started_encrypt_context.py | 4 ++-- examples/data_masking/src/getting_started_encrypt_data.py | 5 +++-- examples/data_masking/src/getting_started_mask_data.py | 6 ++++-- 6 files changed, 14 insertions(+), 9 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index 96905c993aa..08018e4f103 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -53,7 +53,7 @@ def lambda_handler(event, context): def __init__(self, provider: Optional[BaseProvider] = None): self.provider = provider or BaseProvider() - def encrypt(self, data, fields=None, **provider_options) -> str: + def encrypt(self, data, fields=None, **provider_options) -> str | dict: return self._apply_action(data, fields, self.provider.encrypt, **provider_options) def decrypt(self, data, fields=None, **provider_options) -> Any: diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/base.py b/aws_lambda_powertools/utilities/_data_masking/provider/base.py index ad16216e8b5..55d08a9be32 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/base.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import json from typing import Any, Iterable, Union @@ -75,7 +77,7 @@ def default_json_serializer(self, data): def default_json_deserializer(self, data): return json.loads(data.decode("utf-8")) - def encrypt(self, data) -> str: + def encrypt(self, data) -> str | dict: raise NotImplementedError("Subclasses must implement encrypt()") def decrypt(self, data) -> Any: diff --git a/examples/data_masking/src/getting_started_decrypt_context.py b/examples/data_masking/src/getting_started_decrypt_context.py index fd46309cec5..56c515e718b 100644 --- a/examples/data_masking/src/getting_started_decrypt_context.py +++ b/examples/data_masking/src/getting_started_decrypt_context.py @@ -21,7 +21,7 @@ def lambda_handler(event: dict, context: LambdaContext) -> dict: logger.info("Decrypting email field") - decrypted: dict = data_masker.encrypt( + decrypted = data_masker.decrypt( data, fields=["email"], tenant_id=event.get("tenant_id", ""), # (1)! diff --git a/examples/data_masking/src/getting_started_encrypt_context.py b/examples/data_masking/src/getting_started_encrypt_context.py index d34c8fd7d12..1442f8d5805 100644 --- a/examples/data_masking/src/getting_started_encrypt_context.py +++ b/examples/data_masking/src/getting_started_encrypt_context.py @@ -17,12 +17,12 @@ @logger.inject_lambda_context -def lambda_handler(event: dict, context: LambdaContext) -> dict: +def lambda_handler(event: dict, context: LambdaContext) -> dict | str: data = event.get("body", {}) logger.info("Encrypting email field") - encrypted: dict = data_masker.encrypt( + encrypted: dict | str = data_masker.encrypt( data, fields=["email"], data_classification="confidential", diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py index 425e634dac0..2246662e4e4 100644 --- a/examples/data_masking/src/getting_started_encrypt_data.py +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -1,6 +1,7 @@ from __future__ import annotations import os +from typing import Iterable from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities._data_masking import DataMasking @@ -16,11 +17,11 @@ @logger.inject_lambda_context -def lambda_handler(event: dict, context: LambdaContext) -> dict: +def lambda_handler(event: dict, context: LambdaContext) -> Iterable | str: data = event.get("body", {}) logger.info("Encrypting fields email, address.street, and company_address") - encrypted: dict = data_masker.encrypt(data, fields=["email", "address.street", "company_address"]) # (2)! + encrypted: Iterable = data_masker.encrypt(data, fields=["email", "address.street", "company_address"]) # (2)! return encrypted diff --git a/examples/data_masking/src/getting_started_mask_data.py b/examples/data_masking/src/getting_started_mask_data.py index b31b1e044a8..84e6dde59ca 100644 --- a/examples/data_masking/src/getting_started_mask_data.py +++ b/examples/data_masking/src/getting_started_mask_data.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import Iterable + from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities._data_masking import DataMasking from aws_lambda_powertools.utilities.typing import LambdaContext @@ -9,11 +11,11 @@ @logger.inject_lambda_context -def lambda_handler(event: dict, context: LambdaContext) -> dict: +def lambda_handler(event: dict, context: LambdaContext) -> Iterable | str: data = event.get("body") logger.info("Masking fields email, address.street, and company_address") - masked: dict = data_masker.mask(data, fields=["email", "address.street", "company_address"]) # (1)! + masked: Iterable = data_masker.mask(data, fields=["email", "address.street", "company_address"]) # (1)! return masked From 79db91ef1ce30585c9b36217e58975622f73b12c Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Mon, 18 Dec 2023 15:37:49 -0800 Subject: [PATCH 085/151] Small fixes --- .../utilities/_data_masking/base.py | 17 ++------ .../utilities/_data_masking/provider/base.py | 40 +++++++------------ .../provider/kms/aws_encryption_sdk.py | 9 ++--- 3 files changed, 22 insertions(+), 44 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index 08018e4f103..1d5c2bfb72c 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -19,17 +19,8 @@ class DataMasking: Please DON'T USE THIS utility in production environments. Keep in mind that when we transition to General Availability (GA), there might be breaking changes introduced. - A utility class for masking sensitive data within various data types. - - This class provides methods for masking sensitive information, such as personal - identifiers or confidential data, within different data types such as strings, - dictionaries, lists, and more. It helps protect sensitive information while - preserving the structure of the original data. - - Usage: - Instantiate an object of this class and use its methods to mask sensitive data - based on the data type. Supported data types include strings, dictionaries, - and more. + The DataMasking class orchestrates masking, encrypting, and decrypting + for the base provider. Example: ``` @@ -118,8 +109,8 @@ def _apply_action_to_fields( Returns ------- - dict - The modified dictionary after applying the action to the + dict | str + The modified dictionary or string after applying the action to the specified fields. Raises diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/base.py b/aws_lambda_powertools/utilities/_data_masking/provider/base.py index 55d08a9be32..53788c6f8ae 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/base.py @@ -10,26 +10,6 @@ class BaseProvider: """ The BaseProvider class serves as an abstract base class for data masking providers. - Attributes - ---------- - json_serializer : Callable - A callable function responsible for JSON serialization. - json_deserializer : Callable - A callable function responsible for JSON deserialization. - - Methods - ------- - default_json_serializer(data) - Default method for JSON serialization. - default_json_deserializer(data) - Default method for JSON deserialization. - encrypt(data) - Abstract method for encrypting data. Subclasses must implement this method. - decrypt(data) - Abstract method for decrypting data. Subclasses must implement this method. - mask(data) - Default method for masking data. - Examples -------- ``` @@ -60,17 +40,21 @@ def lambda_handler(event, context): return encrypted ``` - - Raises - ------- - NotImplementedError - If `encrypt()` or `decrypt()` methods are not implemented. """ def __init__(self, json_serializer=None, json_deserializer=None) -> None: self.json_serializer = json_serializer or self.default_json_serializer self.json_deserializer = json_deserializer or self.default_json_deserializer + """ + Parameters + ---------- + json_serializer : Callable + A callable function responsible for JSON serialization. + json_deserializer : Callable + A callable function responsible for JSON deserialization. + """ + def default_json_serializer(self, data): return json.dumps(data).encode("utf-8") @@ -78,9 +62,15 @@ def default_json_deserializer(self, data): return json.loads(data.decode("utf-8")) def encrypt(self, data) -> str | dict: + """ + Abstract method for encrypting data. Subclasses must implement this method. + """ raise NotImplementedError("Subclasses must implement encrypt()") def decrypt(self, data) -> Any: + """ + Abstract method for decrypting data. Subclasses must implement this method. + """ raise NotImplementedError("Subclasses must implement decrypt()") def mask(self, data) -> Union[str, Iterable]: diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py index 0f30f32922a..86b02d8a05b 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py @@ -36,11 +36,8 @@ class AWSEncryptionSDKProvider(BaseProvider): """ The AWSEncryptionSDKProvider is used as a provider for the DataMasking class. - This provider allows you to perform data masking using the AWS Encryption SDK - for encryption and decryption. It integrates with the DataMasking class to - securely encrypt and decrypt sensitive data. - - Usage Example: + Usage + ------- ``` from aws_lambda_powertools.utilities.data_masking import DataMasking from aws_lambda_powertools.utilities.data_masking.providers.kms.aws_encryption_sdk import ( @@ -160,7 +157,7 @@ def encrypt(self, data: bytes | str | Dict | float, **provider_options) -> str: def decrypt(self, data: str, **provider_options) -> Any: """ - Decrypt data using AwsEncryptionSdkProvider. + Decrypt data using AWSEncryptionSDKProvider. Parameters ------- From e9d93bf1da93aa4e43d043d5b6bb2fd99efc39aa Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 19 Dec 2023 11:02:19 +0100 Subject: [PATCH 086/151] docs: rename to enc ctx and correct wording for upcoming API change to prevent confused deputy --- docs/utilities/data_masking.md | 29 +++++++++---------- .../src/getting_started_decrypt_context.py | 6 ++-- .../src/getting_started_encrypt_context.py | 5 ++-- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 5000f1bf1d7..91de369fb44 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -122,7 +122,7 @@ Masking will erase the original data and replace with `*****`. This means you ca ### Encrypting data -!!! note "About static typing and encryption" +???+ note "About static typing and encryption" Encrypting data may lead to a different data type, as it always transforms into a string _(``)_. To encrypt, you will need an [encryption provider](#providers). Here, we will use `AWSEncryptionSDKProvider`. @@ -150,7 +150,7 @@ Under the hood, we delegate a [number of operations](#encrypt-operation-with-enc ### Decrypting data -!!! note "About static typing and decryption" +???+ note "About static typing and decryption" Decrypting data may lead to a different data type, as encrypted data is always a string _(``)_. To decrypt, you will need an [encryption provider](#providers). Here, we will use `AWSEncryptionSDKProvider`. @@ -181,33 +181,30 @@ Under the hood, we delegate a [number of operations](#decrypt-operation-with-enc --8<-- "examples/data_masking/src/getting_started_decrypt_data_output.json" ``` -### Additional authenticated data (AAD) +### Encryption context for integrity and authenticity -For a stronger security posture, you can add metadata to each encryption operation, and verify one or more of them during decryption. This is known as **encryption context**. These are non-sensitive data that can help protect authenticity and integrity of your encrypted data. +For a stronger security posture, you can add metadata to each encryption operation, and verify them during decryption. This is known as additional authenticated data (AAD). These are non-sensitive data that can help protect authenticity and integrity of your encrypted data. -**NOTE**. Only string values are supported. - -!!! todo "Change encrypt/decrypt signature to allow fluid encryption context" - [We need an API change](https://github.com/aws-powertools/powertools-lambda-python/pull/3186#issuecomment-1860778334). - -!!! question "Why non-sensitive data?" - Because it is stored in plaintext in AWS CloudTrail; it could be seen by anyone with access to CloudTrail. Unless you [intentionally disabled KMS events in a custom trail](https://docs.aws.amazon.com/kms/latest/developerguide/logging-using-cloudtrail.html#filtering-kms-events){target="_blank"}. - -!!! todo "Explain decryption context not being a strict exact match due to random data." +???+ danger "Important considerations you should know" + 1. **Exact match verification on decrypt**. Be careful using random data like `timestamps` as encryption context if you can't provide them on decrypt. + 2. **Only `string` values are supported**. We will raise `DataMaskingUnsupportedTypeError` for non-string values. + 3. **Use non-sensitive data only**. When using KMS, encryption context is available as plaintext in AWS CloudTrail. Unless you [intentionally disabled KMS events](https://docs.aws.amazon.com/kms/latest/developerguide/logging-using-cloudtrail.html#filtering-kms-events){target="_blank"}. === "getting_started_encrypt_context.py" - ```python hl_lines="27-30" + ```python hl_lines="26-28" --8<-- "examples/data_masking/src/getting_started_encrypt_context.py" ``` + 1. They must match on `decrypt()` otherwise the operation will fail with `DataMaskingContextMismatchError`. + === "getting_started_decrypt_context.py" - ```python hl_lines="26" + ```python hl_lines="26-28" --8<-- "examples/data_masking/src/getting_started_decrypt_context.py" ``` - 1. We use `tenant_id` to be sure the data we're decrypting belongs to this tenant. + 1. They must match otherwise the operation will fail with `DataMaskingContextMismatchError`. ### Working with nested data diff --git a/examples/data_masking/src/getting_started_decrypt_context.py b/examples/data_masking/src/getting_started_decrypt_context.py index 56c515e718b..f0360403dd5 100644 --- a/examples/data_masking/src/getting_started_decrypt_context.py +++ b/examples/data_masking/src/getting_started_decrypt_context.py @@ -21,10 +21,12 @@ def lambda_handler(event: dict, context: LambdaContext) -> dict: logger.info("Decrypting email field") - decrypted = data_masker.decrypt( + decrypted: dict = data_masker.decrypt( data, fields=["email"], - tenant_id=event.get("tenant_id", ""), # (1)! + data_classification="confidential", # (1)! + data_type="customer-data", + tenant_id="a06bf973-0734-4b53-9072-39d7ac5b2cba", ) return decrypted diff --git a/examples/data_masking/src/getting_started_encrypt_context.py b/examples/data_masking/src/getting_started_encrypt_context.py index 1442f8d5805..4c18cff1a24 100644 --- a/examples/data_masking/src/getting_started_encrypt_context.py +++ b/examples/data_masking/src/getting_started_encrypt_context.py @@ -1,7 +1,7 @@ from __future__ import annotations import os -from datetime import datetime +from typing import Dict from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities._data_masking import DataMasking @@ -25,9 +25,8 @@ def lambda_handler(event: dict, context: LambdaContext) -> dict | str: encrypted: dict | str = data_masker.encrypt( data, fields=["email"], - data_classification="confidential", + data_classification="confidential", # (1)! data_type="customer-data", - timestamp=datetime.utcnow().isoformat(), tenant_id="a06bf973-0734-4b53-9072-39d7ac5b2cba", ) From a0921a3347286f138de6798f5b0881f31fdbdcbc Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 19 Dec 2023 13:17:55 +0100 Subject: [PATCH 087/151] docs: use Dict as return as that's the expected return type --- examples/data_masking/src/getting_started_encrypt_context.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/data_masking/src/getting_started_encrypt_context.py b/examples/data_masking/src/getting_started_encrypt_context.py index 4c18cff1a24..4e30301a11e 100644 --- a/examples/data_masking/src/getting_started_encrypt_context.py +++ b/examples/data_masking/src/getting_started_encrypt_context.py @@ -1,7 +1,6 @@ from __future__ import annotations import os -from typing import Dict from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities._data_masking import DataMasking @@ -17,12 +16,12 @@ @logger.inject_lambda_context -def lambda_handler(event: dict, context: LambdaContext) -> dict | str: +def lambda_handler(event: dict, context: LambdaContext) -> dict: data = event.get("body", {}) logger.info("Encrypting email field") - encrypted: dict | str = data_masker.encrypt( + encrypted: dict = data_masker.encrypt( data, fields=["email"], data_classification="confidential", # (1)! From d9d9e31741307872c94c110c0f26158b25e77aad Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 19 Dec 2023 15:03:42 +0100 Subject: [PATCH 088/151] docs: fix highlighting Signed-off-by: heitorlessa --- docs/utilities/data_masking.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 91de369fb44..7e39d956832 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -192,7 +192,7 @@ For a stronger security posture, you can add metadata to each encryption operati === "getting_started_encrypt_context.py" - ```python hl_lines="26-28" + ```python hl_lines="27-29" --8<-- "examples/data_masking/src/getting_started_encrypt_context.py" ``` From 93a20f8c4e6bf5e2382523913fd347222f8ce1ef Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 19 Dec 2023 17:53:38 +0100 Subject: [PATCH 089/151] docs: add choosing parts of your data section --- docs/utilities/data_masking.md | 83 +++++++++++++++++-- .../src/choosing_payload_all_nested_keys.json | 19 +++++ ...oosing_payload_all_nested_keys_output.json | 9 ++ .../src/choosing_payload_multiple_keys.json | 9 ++ ...choosing_payload_multiple_keys_output.json | 9 ++ .../src/choosing_payload_nested_key.json | 8 ++ .../choosing_payload_nested_key_output.json | 8 ++ .../src/choosing_payload_top_keys.json | 5 ++ .../src/choosing_payload_top_keys_output.json | 5 ++ 9 files changed, 149 insertions(+), 6 deletions(-) create mode 100644 examples/data_masking/src/choosing_payload_all_nested_keys.json create mode 100644 examples/data_masking/src/choosing_payload_all_nested_keys_output.json create mode 100644 examples/data_masking/src/choosing_payload_multiple_keys.json create mode 100644 examples/data_masking/src/choosing_payload_multiple_keys_output.json create mode 100644 examples/data_masking/src/choosing_payload_nested_key.json create mode 100644 examples/data_masking/src/choosing_payload_nested_key_output.json create mode 100644 examples/data_masking/src/choosing_payload_top_keys.json create mode 100644 examples/data_masking/src/choosing_payload_top_keys_output.json diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 7e39d956832..34adbbf7c6b 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -206,22 +206,93 @@ For a stronger security posture, you can add metadata to each encryption operati 1. They must match otherwise the operation will fail with `DataMaskingContextMismatchError`. -### Working with nested data +### Choosing parts of your data -!!! info "In Q1 2024, we plan to introduce support for Pydantic models, Dataclasses, and standard Python classes." +You can use the `fields` parameter to choose one or more parts of your data to `mask`, `encrypt`, or `decrypt`. This is helpful when you want to keep most of the data intact except the sensitive parts. + +Use the dot notation `.` to traverse `dict` or `JSON` and only apply `mask`, `encrypt`, or `decrypt` to that field. Here are a few examples: + +=== "Top keys only" + + Expression: `data_masker.mask(data, fields=["card_number"])` + + === "Data" + + ```json hl_lines="4" + --8<-- "examples/data_masking/src/choosing_payload_top_keys.json" + ``` + + === "Result" + + ```json hl_lines="4" + --8<-- "examples/data_masking/src/choosing_payload_top_keys_output.json" + ``` + +=== "Nested key" + + Expression: `data_masker.mask(data, fields=["address.postcode"])` + + === "Data" + + ```json hl_lines="6" + --8<-- "examples/data_masking/src/choosing_payload_nested_key.json" + ``` + + === "Result" + + ```json hl_lines="6" + --8<-- "examples/data_masking/src/choosing_payload_nested_key_output.json" + ``` + +=== "Multiple keys" + + Expression: `data_masker.mask(data, fields=["address.postcode", "address.street"])` + + === "Data" + + ```json hl_lines="6-7" + --8<-- "examples/data_masking/src/choosing_payload_multiple_keys.json" + ``` + + === "Result" + + ```json hl_lines="6-7" + --8<-- "examples/data_masking/src/choosing_payload_multiple_keys_output.json" + ``` + +=== "All key items" + + Expression: `data_masker.mask(data, fields=["address"])` + + === "Data" + + ```json hl_lines="6-17" + --8<-- "examples/data_masking/src/choosing_payload_all_nested_keys.json" + ``` + + === "Result" + + ```json hl_lines="6-7" + --8<-- "examples/data_masking/src/choosing_payload_all_nested_keys_output.json" + ``` + +!!! note "Current limitations" + 1. We don't support data slicing `field.subfield[0:2]`. + 2. Python classes, Dataclasses, and Pydantic models are not supported yet. #### JSON - -When using the data masking utility with dictionaries or JSON strings, you can provide a list of keys to obfuscate the corresponding values to the `fields` parameter. You can select values of nested keys by using dot notation. The `fields` parameter only supports selecting values using basic dot notation and does not provide support for wildcards or any other matching expressions. +!!! todo "Todo" + 1. Explain about data preservation + 2. Share example with JSON Strings + + If a `fields` parameter is provided along with a dictionary as the input data, then the rest of content of the dictionary will remain unchanged, and only the values corresponding to the keys given will be masked (or encrypted/decrypted). However, if there were any non-string keys in the original dictionary, they will be transformed into strings while perserving their original content. If a `fields` parameter is provided while the input data is a JSON string, the returned data structure will be a Python dictionary. The values corresponding to the keys given in the `fields` parameter will be accordingly obfuscated, and the content of everything else in the returned object will remain the same as the input data. -If `fields` is not provided, the entire data object will be masked (or encrypted/decrypted). - ## Advanced ### Providers diff --git a/examples/data_masking/src/choosing_payload_all_nested_keys.json b/examples/data_masking/src/choosing_payload_all_nested_keys.json new file mode 100644 index 00000000000..8f0c3cd1e11 --- /dev/null +++ b/examples/data_masking/src/choosing_payload_all_nested_keys.json @@ -0,0 +1,19 @@ +{ + "name": "Leandro", + "operation": "non sensitive", + "card_number": "1000 4444 333 2222", + "address": [ + { + "postcode": 81847, + "street": "38986 Joanne Stravenue", + "country": "United States", + "timezone": "America/La_Paz" + }, + { + "postcode": 94400, + "street": "623 Kraig Mall", + "country": "United States", + "timezone": "America/Mazatlan" + } + ] +} \ No newline at end of file diff --git a/examples/data_masking/src/choosing_payload_all_nested_keys_output.json b/examples/data_masking/src/choosing_payload_all_nested_keys_output.json new file mode 100644 index 00000000000..214638b2056 --- /dev/null +++ b/examples/data_masking/src/choosing_payload_all_nested_keys_output.json @@ -0,0 +1,9 @@ +{ + "name": "Leandro", + "operation": "non sensitive", + "card_number": "1000 4444 333 2222", + "address": [ + "*****", + "*****" + ] +} \ No newline at end of file diff --git a/examples/data_masking/src/choosing_payload_multiple_keys.json b/examples/data_masking/src/choosing_payload_multiple_keys.json new file mode 100644 index 00000000000..91fcba832fc --- /dev/null +++ b/examples/data_masking/src/choosing_payload_multiple_keys.json @@ -0,0 +1,9 @@ +{ + "name": "Lessa", + "operation": "non sensitive", + "card_number": "1000 4444 333 2222", + "address": { + "postcode": 81847, + "street": "38986 Joanne Stravenue" + } +} \ No newline at end of file diff --git a/examples/data_masking/src/choosing_payload_multiple_keys_output.json b/examples/data_masking/src/choosing_payload_multiple_keys_output.json new file mode 100644 index 00000000000..b638d74d80e --- /dev/null +++ b/examples/data_masking/src/choosing_payload_multiple_keys_output.json @@ -0,0 +1,9 @@ +{ + "name": "Lessa", + "operation": "non sensitive", + "card_number": "1000 4444 333 2222", + "address": { + "postcode": "*****", + "street": "*****" + } +} \ No newline at end of file diff --git a/examples/data_masking/src/choosing_payload_nested_key.json b/examples/data_masking/src/choosing_payload_nested_key.json new file mode 100644 index 00000000000..6c606fdd89f --- /dev/null +++ b/examples/data_masking/src/choosing_payload_nested_key.json @@ -0,0 +1,8 @@ +{ + "name": "Lessa", + "operation": "non sensitive", + "card_number": "1000 4444 333 2222", + "address": { + "postcode": 81847 + } +} \ No newline at end of file diff --git a/examples/data_masking/src/choosing_payload_nested_key_output.json b/examples/data_masking/src/choosing_payload_nested_key_output.json new file mode 100644 index 00000000000..946535f6094 --- /dev/null +++ b/examples/data_masking/src/choosing_payload_nested_key_output.json @@ -0,0 +1,8 @@ +{ + "name": "Lessa", + "operation": "non sensitive", + "card_number": "1000 4444 333 2222", + "address": { + "postcode": "*****" + } +} \ No newline at end of file diff --git a/examples/data_masking/src/choosing_payload_top_keys.json b/examples/data_masking/src/choosing_payload_top_keys.json new file mode 100644 index 00000000000..e7b5821f3d8 --- /dev/null +++ b/examples/data_masking/src/choosing_payload_top_keys.json @@ -0,0 +1,5 @@ +{ + "name": "Lessa", + "operation": "non sensitive", + "card_number": "1000 4444 333 2222" +} \ No newline at end of file diff --git a/examples/data_masking/src/choosing_payload_top_keys_output.json b/examples/data_masking/src/choosing_payload_top_keys_output.json new file mode 100644 index 00000000000..79fd1407916 --- /dev/null +++ b/examples/data_masking/src/choosing_payload_top_keys_output.json @@ -0,0 +1,5 @@ +{ + "name": "Lessa", + "operation": "non sensitive", + "card_number": "*****" +} \ No newline at end of file From e7ae1f1adb2ec43eed90ca7962e10d6923adcc2e Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 19 Dec 2023 18:06:28 +0100 Subject: [PATCH 090/151] docs: improve JSON section --- docs/utilities/data_masking.md | 27 ++++++++++++++++--- .../src/choosing_payload_simple_json.json | 1 + .../choosing_payload_simple_json_output.json | 8 ++++++ 3 files changed, 32 insertions(+), 4 deletions(-) create mode 100644 examples/data_masking/src/choosing_payload_simple_json.json create mode 100644 examples/data_masking/src/choosing_payload_simple_json_output.json diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 34adbbf7c6b..87d12705997 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -210,7 +210,11 @@ For a stronger security posture, you can add metadata to each encryption operati You can use the `fields` parameter to choose one or more parts of your data to `mask`, `encrypt`, or `decrypt`. This is helpful when you want to keep most of the data intact except the sensitive parts. -Use the dot notation `.` to traverse `dict` or `JSON` and only apply `mask`, `encrypt`, or `decrypt` to that field. Here are a few examples: +Use the dot notation `.` to traverse `dict` or `JSON` and only apply `mask`, `encrypt`, or `decrypt` to that field. + +There a + +Here are a few examples: === "Top keys only" @@ -282,15 +286,30 @@ Use the dot notation `.` to traverse `dict` or `JSON` and only apply `mask`, `en #### JSON +We also support data in JSON string format. We automatically deserialize and handle each field operation as expected. + +Note that the return will be a deserialized JSON and your desired fields updated. + +=== "Data" + + Expression: `data_masker.mask(data, fields=["card_number", "address.postcode"])` + + ```json + --8<-- "examples/data_masking/src/choosing_payload_simple_json.json" + ``` + +=== "Result" + + ```json + --8<-- "examples/data_masking/src/choosing_payload_simple_json_output.json" + ``` + !!! todo "Todo" 1. Explain about data preservation - 2. Share example with JSON Strings If a `fields` parameter is provided along with a dictionary as the input data, then the rest of content of the dictionary will remain unchanged, and only the values corresponding to the keys given will be masked (or encrypted/decrypted). However, if there were any non-string keys in the original dictionary, they will be transformed into strings while perserving their original content. -If a `fields` parameter is provided while the input data is a JSON string, the returned data structure will be a Python dictionary. The values corresponding to the keys given in the `fields` parameter will be accordingly obfuscated, and the content of everything else in the returned object will remain the same as the input data. - ## Advanced diff --git a/examples/data_masking/src/choosing_payload_simple_json.json b/examples/data_masking/src/choosing_payload_simple_json.json new file mode 100644 index 00000000000..daff4925f4e --- /dev/null +++ b/examples/data_masking/src/choosing_payload_simple_json.json @@ -0,0 +1 @@ +'{"name": "Ruben", "operation": "non sensitive", "card_number": "1000 4444 333 2222", "address": {"postcode": 81847}}' \ No newline at end of file diff --git a/examples/data_masking/src/choosing_payload_simple_json_output.json b/examples/data_masking/src/choosing_payload_simple_json_output.json new file mode 100644 index 00000000000..52fc8b3cc5a --- /dev/null +++ b/examples/data_masking/src/choosing_payload_simple_json_output.json @@ -0,0 +1,8 @@ +{ + "name": "Ruben", + "operation": "non sensitive", + "card_number": "*****", + "address": { + "postcode": "*****" + } +} \ No newline at end of file From ce37852d56fd0fac88fc93cc7c944480f8e88b7d Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 19 Dec 2023 20:04:44 +0100 Subject: [PATCH 091/151] docs: break down field syntax into two sections --- docs/utilities/data_masking.md | 49 +++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 87d12705997..066709179f7 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -208,20 +208,20 @@ For a stronger security posture, you can add metadata to each encryption operati ### Choosing parts of your data -You can use the `fields` parameter to choose one or more parts of your data to `mask`, `encrypt`, or `decrypt`. This is helpful when you want to keep most of the data intact except the sensitive parts. +!!! note "We support `JSON` data types only - see [data serialization for more details](#data-serialization-and-preservation)." -Use the dot notation `.` to traverse `dict` or `JSON` and only apply `mask`, `encrypt`, or `decrypt` to that field. +You can use the `fields` parameter with dot notation `.` to choose one or more parts of your data to `mask`, `encrypt`, or `decrypt`. This is useful when you want to keep data structure intact except the confidential fields. -There a +When the field is a `list`, we obfuscate their values to `str` while keeping the data structure and number of items intact. Obfuscating nested data structures from a given field is also supported. -Here are a few examples: +> Common scenarios === "Top keys only" - Expression: `data_masker.mask(data, fields=["card_number"])` - === "Data" + > Expression: `data_masker.mask(data, fields=["card_number"])` + ```json hl_lines="4" --8<-- "examples/data_masking/src/choosing_payload_top_keys.json" ``` @@ -234,10 +234,10 @@ Here are a few examples: === "Nested key" - Expression: `data_masker.mask(data, fields=["address.postcode"])` - === "Data" + > Expression: `data_masker.mask(data, fields=["address.postcode"])` + ```json hl_lines="6" --8<-- "examples/data_masking/src/choosing_payload_nested_key.json" ``` @@ -250,10 +250,10 @@ Here are a few examples: === "Multiple keys" - Expression: `data_masker.mask(data, fields=["address.postcode", "address.street"])` - === "Data" + > Expression: `data_masker.mask(data, fields=["address.postcode", "address.street"])` + ```json hl_lines="6-7" --8<-- "examples/data_masking/src/choosing_payload_multiple_keys.json" ``` @@ -266,10 +266,10 @@ Here are a few examples: === "All key items" - Expression: `data_masker.mask(data, fields=["address"])` - === "Data" + > Expression: `data_masker.mask(data, fields=["address"])` + ```json hl_lines="6-17" --8<-- "examples/data_masking/src/choosing_payload_all_nested_keys.json" ``` @@ -280,13 +280,9 @@ Here are a few examples: --8<-- "examples/data_masking/src/choosing_payload_all_nested_keys_output.json" ``` -!!! note "Current limitations" - 1. We don't support data slicing `field.subfield[0:2]`. - 2. Python classes, Dataclasses, and Pydantic models are not supported yet. - #### JSON -We also support data in JSON string format. We automatically deserialize and handle each field operation as expected. +We also support data in JSON string format as input. We automatically deserialize it, then handle each field operation as expected. Note that the return will be a deserialized JSON and your desired fields updated. @@ -307,12 +303,23 @@ Note that the return will be a deserialized JSON and your desired fields updated !!! todo "Todo" 1. Explain about data preservation - -If a `fields` parameter is provided along with a dictionary as the input data, then the rest of content of the dictionary will remain unchanged, and only the values corresponding to the keys given will be masked (or encrypted/decrypted). However, if there were any non-string keys in the original dictionary, they will be transformed into strings while perserving their original content. +## Advanced - +### Data serialization -## Advanced +???+ note "Current limitations" + 1. No support for data slicing `field.subfield[0:2]`. + 2. No support for accessing fields within a `list`. + 3. Python classes, `Dataclasses`, and `Pydantic models` are not supported yet. + +Before we traverse the data structure, we perform two important operations on input data: + +1. If `JSON string`, **deserialize** using default or provided deserializer. +2. If `dictionary`, **normalize** into `JSON` to prevent traversing unsupported data types. + +When decrypting, we revert the operation to restore the original data structure. + +!!! danger "TODO - Add an example using a custom serializer/deserializer." ### Providers From ea778129be13080dec92027aca39a5912e547e7d Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 19 Dec 2023 20:46:42 +0100 Subject: [PATCH 092/151] refactor: allow customers to bring custom serializer/deserializer Signed-off-by: heitorlessa --- .../utilities/_data_masking/base.py | 16 +++++++++++---- .../utilities/_data_masking/provider/base.py | 20 ------------------- .../provider/kms/aws_encryption_sdk.py | 16 ++++++++------- .../data_masking/test_aws_encryption_sdk.py | 16 ++++++--------- 4 files changed, 27 insertions(+), 41 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index 1d5c2bfb72c..ba0400e9ac4 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -1,5 +1,6 @@ from __future__ import annotations +import functools import json import logging from typing import Any, Callable, Iterable, Optional, Union @@ -41,8 +42,15 @@ def lambda_handler(event, context): ``` """ - def __init__(self, provider: Optional[BaseProvider] = None): + def __init__( + self, + provider: Optional[BaseProvider] = None, + json_serializer: Callable = functools.partial(json.dumps, ensure_ascii=False), + json_deserializer: Callable = json.loads, + ): self.provider = provider or BaseProvider() + self.json_serializer = json_serializer + self.json_deserializer = json_deserializer def encrypt(self, data, fields=None, **provider_options) -> str | dict: return self._apply_action(data, fields, self.provider.encrypt, **provider_options) @@ -140,7 +148,7 @@ def _apply_action_to_fields( # Ensure the nested field is represented as a string if not isinstance(nested_parsed_field, str): - nested_parsed_field = json.dumps(nested_parsed_field) + nested_parsed_field = self.json_serializer(nested_parsed_field) # Split the nested field into keys using dot, square brackets as separators # keys = re.split(r"\.|\[|\]", nested_field) # noqa ERA001 - REVIEW THIS @@ -185,11 +193,11 @@ def _normalize_data_to_parse(self, fields: list, data: str | dict) -> dict: if isinstance(data, str): # Parse JSON string as dictionary - data_parsed = json.loads(data) + data_parsed = self.json_deserializer(data) elif isinstance(data, dict): # Convert the data to a JSON string in case it contains non-string keys (e.g., ints) # Parse the JSON string back into a dictionary - data_parsed = json.loads(json.dumps(data)) + data_parsed = self.json_deserializer(self.json_serializer(data)) else: raise DataMaskingUnsupportedTypeError( f"Unsupported data type. Expected a traversable type (dict or str), but got {type(data)}.", diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/base.py b/aws_lambda_powertools/utilities/_data_masking/provider/base.py index 53788c6f8ae..abddbe1a988 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/base.py @@ -1,6 +1,5 @@ from __future__ import annotations -import json from typing import Any, Iterable, Union from aws_lambda_powertools.utilities._data_masking.constants import DATA_MASKING_STRING @@ -42,25 +41,6 @@ def lambda_handler(event, context): ``` """ - def __init__(self, json_serializer=None, json_deserializer=None) -> None: - self.json_serializer = json_serializer or self.default_json_serializer - self.json_deserializer = json_deserializer or self.default_json_deserializer - - """ - Parameters - ---------- - json_serializer : Callable - A callable function responsible for JSON serialization. - json_deserializer : Callable - A callable function responsible for JSON deserialization. - """ - - def default_json_serializer(self, data): - return json.dumps(data).encode("utf-8") - - def default_json_deserializer(self, data): - return json.loads(data.decode("utf-8")) - def encrypt(self, data) -> str | dict: """ Abstract method for encrypting data. Subclasses must implement this method. diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py index 86b02d8a05b..707f26916cd 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py @@ -1,6 +1,8 @@ from __future__ import annotations import base64 +import functools +import json import logging from binascii import Error from typing import Any, Callable, Dict, List @@ -69,10 +71,10 @@ def __init__( max_cache_age_seconds: float = MAX_CACHE_AGE_SECONDS, max_messages_encrypted: int = MAX_MESSAGES_ENCRYPTED, max_bytes_encrypted: int = MAX_BYTES_ENCRYPTED, - json_serializer: Callable | None = None, - json_deserializer: Callable | None = None, + json_serializer: Callable = functools.partial(json.dumps, ensure_ascii=False), + json_deserializer: Callable = json.loads, ): - super().__init__(json_serializer=json_serializer, json_deserializer=json_deserializer) + super().__init__() self._key_provider = key_provider or KMSKeyProvider( keys=keys, @@ -80,8 +82,8 @@ def __init__( max_cache_age_seconds=max_cache_age_seconds, max_messages_encrypted=max_messages_encrypted, max_bytes_encrypted=max_bytes_encrypted, - json_serializer=self.json_serializer, - json_deserializer=self.json_deserializer, + json_serializer=json_serializer, + json_deserializer=json_deserializer, ) def encrypt(self, data: bytes | str | Dict | int, **provider_options) -> str: @@ -141,7 +143,7 @@ def encrypt(self, data: bytes | str | Dict | float, **provider_options) -> str: ciphertext : str The encrypted data, as a base64-encoded string. """ - data_encoded = self.json_serializer(data) + data_encoded = self.json_serializer(data).encode("utf-8") try: ciphertext, _ = self.client.encrypt( source=data_encoded, @@ -201,5 +203,5 @@ def decrypt(self, data: str, **provider_options) -> Any: f"Encryption Context does not match expected value for key: {key}", ) - ciphertext = self.json_deserializer(ciphertext) + ciphertext = self.json_deserializer(ciphertext.decode("utf-8")) return ciphertext diff --git a/tests/functional/data_masking/test_aws_encryption_sdk.py b/tests/functional/data_masking/test_aws_encryption_sdk.py index 8b216781c93..9736a178e38 100644 --- a/tests/functional/data_masking/test_aws_encryption_sdk.py +++ b/tests/functional/data_masking/test_aws_encryption_sdk.py @@ -2,7 +2,7 @@ import base64 import json -from typing import Any, Callable, Dict, Union +from typing import Any import pytest @@ -15,21 +15,17 @@ class FakeEncryptionKeyProvider(BaseProvider): - def __init__( - self, - json_serializer: Callable[[Dict], str] | None = None, - json_deserializer: Callable[[Union[Dict, str, bool, int, float]], str] | None = None, - ): - super().__init__(json_serializer=json_serializer, json_deserializer=json_deserializer) + def __init__(self): + super().__init__() def encrypt(self, data: bytes | str, **kwargs) -> str: - data = self.json_serializer(data) + data = json.dumps(data).encode("utf-8") ciphertext = base64.b64encode(data).decode() return ciphertext def decrypt(self, data: bytes, **kwargs) -> Any: - ciphertext_decoded = base64.b64decode(data) - ciphertext = self.json_deserializer(ciphertext_decoded) + ciphertext_decoded = base64.b64decode(data).decode("utf-8") + ciphertext = json.loads(ciphertext_decoded) return ciphertext From f97ffb55a5b836dad6ab76713b697ebcc88497b9 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 19 Dec 2023 21:10:06 +0100 Subject: [PATCH 093/151] refactor: DataMasking depend on provider serializers Signed-off-by: heitorlessa --- .../utilities/_data_masking/base.py | 9 +++------ .../utilities/_data_masking/provider/base.py | 12 +++++++++++- .../provider/kms/aws_encryption_sdk.py | 2 +- .../data_masking/test_aws_encryption_sdk.py | 17 +++++++++++------ 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index ba0400e9ac4..06efbb636d8 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -1,7 +1,5 @@ from __future__ import annotations -import functools -import json import logging from typing import Any, Callable, Iterable, Optional, Union @@ -45,12 +43,11 @@ def lambda_handler(event, context): def __init__( self, provider: Optional[BaseProvider] = None, - json_serializer: Callable = functools.partial(json.dumps, ensure_ascii=False), - json_deserializer: Callable = json.loads, ): self.provider = provider or BaseProvider() - self.json_serializer = json_serializer - self.json_deserializer = json_deserializer + # NOTE: we depend on Provider to not confuse customers in passing the same 2 serializers in 2 places + self.json_serializer = self.provider.json_serializer + self.json_deserializer = self.provider.json_deserializer def encrypt(self, data, fields=None, **provider_options) -> str | dict: return self._apply_action(data, fields, self.provider.encrypt, **provider_options) diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/base.py b/aws_lambda_powertools/utilities/_data_masking/provider/base.py index abddbe1a988..ebf76a862a3 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/base.py @@ -1,6 +1,8 @@ from __future__ import annotations -from typing import Any, Iterable, Union +import functools +import json +from typing import Any, Callable, Iterable, Union from aws_lambda_powertools.utilities._data_masking.constants import DATA_MASKING_STRING @@ -41,6 +43,14 @@ def lambda_handler(event, context): ``` """ + def __init__( + self, + json_serializer: Callable = functools.partial(json.dumps, ensure_ascii=False), + json_deserializer: Callable = json.loads, + ) -> None: + self.json_serializer = json_serializer + self.json_deserializer = json_deserializer + def encrypt(self, data) -> str | dict: """ Abstract method for encrypting data. Subclasses must implement this method. diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py index 707f26916cd..1a5c4c9e302 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py @@ -74,7 +74,7 @@ def __init__( json_serializer: Callable = functools.partial(json.dumps, ensure_ascii=False), json_deserializer: Callable = json.loads, ): - super().__init__() + super().__init__(json_serializer=json_serializer, json_deserializer=json_deserializer) self._key_provider = key_provider or KMSKeyProvider( keys=keys, diff --git a/tests/functional/data_masking/test_aws_encryption_sdk.py b/tests/functional/data_masking/test_aws_encryption_sdk.py index 9736a178e38..9336c84e568 100644 --- a/tests/functional/data_masking/test_aws_encryption_sdk.py +++ b/tests/functional/data_masking/test_aws_encryption_sdk.py @@ -1,8 +1,9 @@ from __future__ import annotations import base64 +import functools import json -from typing import Any +from typing import Any, Callable import pytest @@ -15,17 +16,21 @@ class FakeEncryptionKeyProvider(BaseProvider): - def __init__(self): - super().__init__() + def __init__( + self, + json_serializer: Callable = functools.partial(json.dumps, ensure_ascii=False), + json_deserializer: Callable = json.loads, + ) -> None: + super().__init__(json_serializer, json_deserializer) def encrypt(self, data: bytes | str, **kwargs) -> str: - data = json.dumps(data).encode("utf-8") - ciphertext = base64.b64encode(data).decode() + encoded_data: str = self.json_serializer(data) + ciphertext = base64.b64encode(encoded_data.encode("utf-8")).decode() return ciphertext def decrypt(self, data: bytes, **kwargs) -> Any: ciphertext_decoded = base64.b64decode(data).decode("utf-8") - ciphertext = json.loads(ciphertext_decoded) + ciphertext = self.json_deserializer(ciphertext_decoded) return ciphertext From 8b3c6d2a9a89931a209b60312cf546f7c29ef22f Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 19 Dec 2023 21:38:40 +0100 Subject: [PATCH 094/151] docs: add custom serializer example Signed-off-by: heitorlessa --- .../utilities/_data_masking/provider/base.py | 4 +- .../provider/kms/aws_encryption_sdk.py | 8 ++-- docs/utilities/data_masking.md | 20 ++++----- .../src/advanced_custom_serializer.py | 26 +++++++++++ .../data_masking/tests/src/single_mock.py | 43 ------------------- mypy.ini | 3 ++ 6 files changed, 44 insertions(+), 60 deletions(-) create mode 100644 examples/data_masking/src/advanced_custom_serializer.py delete mode 100644 examples/data_masking/tests/src/single_mock.py diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/base.py b/aws_lambda_powertools/utilities/_data_masking/provider/base.py index ebf76a862a3..e3fb18729b1 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/base.py @@ -45,8 +45,8 @@ def lambda_handler(event, context): def __init__( self, - json_serializer: Callable = functools.partial(json.dumps, ensure_ascii=False), - json_deserializer: Callable = json.loads, + json_serializer: Callable[..., str] = functools.partial(json.dumps, ensure_ascii=False), + json_deserializer: Callable[[str], Any] = json.loads, ) -> None: self.json_serializer = json_serializer self.json_deserializer = json_deserializer diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py index 1a5c4c9e302..876ae8d4553 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py @@ -71,8 +71,8 @@ def __init__( max_cache_age_seconds: float = MAX_CACHE_AGE_SECONDS, max_messages_encrypted: int = MAX_MESSAGES_ENCRYPTED, max_bytes_encrypted: int = MAX_BYTES_ENCRYPTED, - json_serializer: Callable = functools.partial(json.dumps, ensure_ascii=False), - json_deserializer: Callable = json.loads, + json_serializer: Callable[..., str] = functools.partial(json.dumps, ensure_ascii=False), + json_deserializer: Callable[[str], Any] = json.loads, ): super().__init__(json_serializer=json_serializer, json_deserializer=json_deserializer) @@ -103,8 +103,8 @@ class KMSKeyProvider: def __init__( self, keys: List[str], - json_serializer: Callable, - json_deserializer: Callable, + json_serializer: Callable[..., str], + json_deserializer: Callable[[str], Any], local_cache_capacity: int = CACHE_CAPACITY, max_cache_age_seconds: float = MAX_CACHE_AGE_SECONDS, max_messages_encrypted: int = MAX_MESSAGES_ENCRYPTED, diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 066709179f7..fbd15e88b63 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -319,7 +319,13 @@ Before we traverse the data structure, we perform two important operations on in When decrypting, we revert the operation to restore the original data structure. -!!! danger "TODO - Add an example using a custom serializer/deserializer." +For compatibility or performance, you can optionally pass your own JSON serializer and deserializer to replace `json.dumps` and `json.loads` respectively: + +=== "advanced_custom_serializer.py" + + ```python hl_lines="16" + --8<-- "examples/data_masking/src/advanced_custom_serializer.py" + ``` ### Providers @@ -475,17 +481,9 @@ sequenceDiagram ## Testing your code -For unit testing your applications, you can mock the calls to the data masking utility to avoid calling AWS APIs. This can be achieved in a number of ways - in this example, we use the pytest monkeypatch fixture to patch the `data_masking.decrypt` method. - -=== "test_single_mock.py" - ```python hl_lines="4 8" - --8<-- "examples/data_masking/tests/test_data_masking_single_mock.py" - ``` +!!! danger "TODO - Refactor with a fake" -=== "single_mock.py" - ```python - --8<-- "examples/data_masking/tests/src/single_mock.py" - ``` +For unit testing your applications, you can mock the calls to the data masking utility to avoid calling AWS APIs. This can be achieved in a number of ways - in this example, we use the pytest monkeypatch fixture to patch the `data_masking.decrypt` method. If we need to use this pattern across multiple tests, we can avoid repetition by refactoring to use our own pytest fixture: diff --git a/examples/data_masking/src/advanced_custom_serializer.py b/examples/data_masking/src/advanced_custom_serializer.py new file mode 100644 index 00000000000..5e40a7f354f --- /dev/null +++ b/examples/data_masking/src/advanced_custom_serializer.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +import os + +import ujson + +from aws_lambda_powertools.utilities._data_masking import DataMasking +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import ( + AWSEncryptionSDKProvider, +) +from aws_lambda_powertools.utilities.typing import LambdaContext + +KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") + +encryption_provider = AWSEncryptionSDKProvider( + keys=[KMS_KEY_ARN], + json_serializer=ujson.dumps, + json_deserializer=ujson.loads, +) +data_masker = DataMasking(provider=encryption_provider) + + +def lambda_handler(event: dict, context: LambdaContext): + data = event.get("body", {}) + + return data_masker.encrypt(data) diff --git a/examples/data_masking/tests/src/single_mock.py b/examples/data_masking/tests/src/single_mock.py deleted file mode 100644 index 754d3308144..00000000000 --- a/examples/data_masking/tests/src/single_mock.py +++ /dev/null @@ -1,43 +0,0 @@ -from __future__ import annotations - -import base64 -from typing import Any, Callable, Dict, Union - -from aws_lambda_powertools.utilities._data_masking.base import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider - - -class FakeEncryptionKeyProvider(BaseProvider): - def __init__( - self, - json_serializer: Callable[[Dict], str] | None = None, - json_deserializer: Callable[[Union[Dict, str, bool, int, float]], str] | None = None, - ): - super().__init__(json_serializer=json_serializer, json_deserializer=json_deserializer) - - def encrypt(self, data, **kwargs) -> str: - data = self.json_serializer(data) - ciphertext = base64.b64encode(data).decode() - return ciphertext - - def decrypt(self, data, **kwargs) -> Any: - ciphertext_decoded = base64.b64decode(data) - ciphertext = self.json_deserializer(ciphertext_decoded) - return ciphertext - - -def handler(event, context): - data = "mock_value" - - fake_key_provider = FakeEncryptionKeyProvider() - provider = AWSEncryptionSDKProvider( - keys=["dummy"], - key_provider=fake_key_provider, - ) - data_masker = DataMasking(provider=provider) - - encrypted = data_masker.encrypt(data=data) - data_masker.decrypt(data=encrypted) - - return {"message": "mock_value"} diff --git a/mypy.ini b/mypy.ini index b32cfe4aabe..9e8ffb5f7a5 100644 --- a/mypy.ini +++ b/mypy.ini @@ -71,3 +71,6 @@ ignore_missing_imports = True [mypy-importlib.metadata] ignore_missing_imports = True + +[mypy-ujson] +ignore_missing_imports = True From f1c8dde72b31fa9bba317a3aad9708ea001d94fe Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 19 Dec 2023 22:09:13 +0100 Subject: [PATCH 095/151] refactor: allow arbitrary encryption context Signed-off-by: heitorlessa --- .../utilities/_data_masking/base.py | 113 ++++++++++++++---- .../utilities/_data_masking/provider/base.py | 6 +- .../provider/kms/aws_encryption_sdk.py | 23 ++-- 3 files changed, 107 insertions(+), 35 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index 06efbb636d8..652ac302eb3 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -49,16 +49,47 @@ def __init__( self.json_serializer = self.provider.json_serializer self.json_deserializer = self.provider.json_deserializer - def encrypt(self, data, fields=None, **provider_options) -> str | dict: - return self._apply_action(data, fields, self.provider.encrypt, **provider_options) - - def decrypt(self, data, fields=None, **provider_options) -> Any: - return self._apply_action(data, fields, self.provider.decrypt, **provider_options) - - def mask(self, data, fields=None, **provider_options) -> str | Iterable: - return self._apply_action(data, fields, self.provider.mask, **provider_options) - - def _apply_action(self, data: str | dict, fields, action: Callable, **provider_options): + def encrypt( + self, + data, + fields: list[str] | None = None, + provider_options: dict | None = None, + **encryption_context: str, + ) -> str | dict: + return self._apply_action( + data=data, + fields=fields, + action=self.provider.encrypt, + provider_options=provider_options or {}, + **encryption_context, + ) + + def decrypt( + self, + data, + fields: list[str] | None = None, + provider_options: dict | None = None, + **encryption_context: str, + ) -> Any: + return self._apply_action( + data=data, + fields=fields, + action=self.provider.decrypt, + provider_options=provider_options or {}, + **encryption_context, + ) + + def mask(self, data, fields=None) -> str | Iterable: + return self._apply_action(data=data, fields=fields, action=self.provider.mask) + + def _apply_action( + self, + data: str | dict, + fields: list[str] | None, + action: Callable, + provider_options: dict | None = None, + **encryption_context: str, + ): """ Helper method to determine whether to apply a given action to the entire input data or to specific fields if the 'fields' argument is specified. @@ -67,11 +98,15 @@ def _apply_action(self, data: str | dict, fields, action: Callable, **provider_o ---------- data : str | dict The input data to process. - fields : Optional[List[any]] = None + fields : Optional[List[str]] A list of fields to apply the action to. If 'None', the action is applied to the entire 'data'. action : Callable - The action to apply to the data. It should be a callable that performs an operation on the data - and returns the modified value. + The action to apply to the data. It should be a callable that performs an operation on the data + and returns the modified value. + provider_options : dict + Provider specific keyword arguments to propagate; used as an escape hatch. + encryption_context: str + Encryption context to use in encrypt and decrypt operations. Returns ------- @@ -81,17 +116,24 @@ def _apply_action(self, data: str | dict, fields, action: Callable, **provider_o if fields is not None: logger.debug(f"Running action {action.__name__} with fields {fields}") - return self._apply_action_to_fields(data, fields, action, **provider_options) + return self._apply_action_to_fields( + data=data, + fields=fields, + action=action, + options=provider_options, + **encryption_context, + ) else: logger.debug(f"Running action {action.__name__} with the entire data") - return action(data, **provider_options) + return action(data=data, provider_options=provider_options, **encryption_context) def _apply_action_to_fields( self, data: Union[dict, str], fields: list, action: Callable, - **provider_options, + provider_options: dict | None = None, + **encryption_context: str, ) -> Union[dict, str]: """ This method takes the input data, which can be either a dictionary or a JSON string, @@ -180,7 +222,13 @@ def _apply_action_to_fields( last_key = keys[-1] - current_dict = self._apply_action_to_specific_type(current_dict, action, last_key, **provider_options) + current_dict = self._apply_action_to_specific_type( + current_dict, + action, + last_key, + provider_options, + **encryption_context, + ) return data_parsed @@ -202,22 +250,43 @@ def _normalize_data_to_parse(self, fields: list, data: str | dict) -> dict: return data_parsed - def _apply_action_to_specific_type(self, current_dict: dict, action: Callable, last_key, **provider_options): + def _apply_action_to_specific_type( + self, + current_dict: dict, + action: Callable, + last_key, + provider_options: dict | None = None, + **encryption_context, + ): logger.debug("Processing the last fields to apply the action") # Apply the action to the last key (either a specific index or dictionary key) if isinstance(current_dict, dict) and last_key in current_dict: - current_dict[last_key] = action(current_dict[last_key], **provider_options) + current_dict[last_key] = action( + current_dict[last_key], + provider_options=provider_options, + **encryption_context, + ) elif isinstance(current_dict, list) and last_key.isdigit() and int(last_key) < len(current_dict): - current_dict[int(last_key)] = action(current_dict[int(last_key)], **provider_options) + current_dict[int(last_key)] = action( + current_dict[int(last_key)], + provider_options=provider_options, + **encryption_context, + ) elif isinstance(current_dict, tuple) and last_key.isdigit() and int(last_key) < len(current_dict): index = int(last_key) current_dict = ( - current_dict[:index] + (action(current_dict[index], **provider_options),) + current_dict[index + 1 :] + current_dict[:index] + + (action(current_dict[index], provider_options=provider_options, **encryption_context),) + + current_dict[index + 1 :] ) elif isinstance(current_dict, set): # Convert the set to a list, apply the action, and convert back to a set elements_list = list(current_dict) - elements_list[int(last_key)] = action(elements_list[int(last_key)], **provider_options) + elements_list[int(last_key)] = action( + elements_list[int(last_key)], + provider_options=provider_options, + **encryption_context, + ) current_dict = set(elements_list) else: # Handle the case when the last key doesn't exist diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/base.py b/aws_lambda_powertools/utilities/_data_masking/provider/base.py index e3fb18729b1..ea4d2ddb2ed 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/base.py @@ -51,19 +51,19 @@ def __init__( self.json_serializer = json_serializer self.json_deserializer = json_deserializer - def encrypt(self, data) -> str | dict: + def encrypt(self, data, provider_options: dict | None = None, **encryption_context: str) -> str | dict: """ Abstract method for encrypting data. Subclasses must implement this method. """ raise NotImplementedError("Subclasses must implement encrypt()") - def decrypt(self, data) -> Any: + def decrypt(self, data, provider_options: dict | None = None, **encryption_context: str) -> Any: """ Abstract method for decrypting data. Subclasses must implement this method. """ raise NotImplementedError("Subclasses must implement decrypt()") - def mask(self, data) -> Union[str, Iterable]: + def mask(self, data, **kwargs) -> Union[str, Iterable]: """ This method irreversibly masks data. diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py index 876ae8d4553..fd8cf054c1b 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py @@ -5,7 +5,7 @@ import json import logging from binascii import Error -from typing import Any, Callable, Dict, List +from typing import Any, Callable, List import botocore from aws_encryption_sdk import ( @@ -86,11 +86,11 @@ def __init__( json_deserializer=json_deserializer, ) - def encrypt(self, data: bytes | str | Dict | int, **provider_options) -> str: - return self._key_provider.encrypt(data=data, **provider_options) + def encrypt(self, data: Any, provider_options: dict | None = None, **encryption_context: str) -> str: + return self._key_provider.encrypt(data=data, provider_options=provider_options, **encryption_context) - def decrypt(self, data: str, **provider_options) -> Any: - return self._key_provider.decrypt(data=data, **provider_options) + def decrypt(self, data: str, provider_options: dict | None = None, **encryption_context: str) -> Any: + return self._key_provider.decrypt(data=data, provider_options=provider_options, **encryption_context) class KMSKeyProvider: @@ -127,7 +127,7 @@ def __init__( max_bytes_encrypted=max_bytes_encrypted, ) - def encrypt(self, data: bytes | str | Dict | float, **provider_options) -> str: + def encrypt(self, data: Any, provider_options: dict | None = None, **encryption_context: str) -> str: """ Encrypt data using the AWSEncryptionSDKProvider. @@ -143,11 +143,14 @@ def encrypt(self, data: bytes | str | Dict | float, **provider_options) -> str: ciphertext : str The encrypted data, as a base64-encoded string. """ + provider_options = provider_options or {} data_encoded = self.json_serializer(data).encode("utf-8") + try: ciphertext, _ = self.client.encrypt( source=data_encoded, materials_manager=self.cache_cmm, + encryption_context=encryption_context, **provider_options, ) except GenerateKeyError: @@ -157,7 +160,7 @@ def encrypt(self, data: bytes | str | Dict | float, **provider_options) -> str: ciphertext = base64.b64encode(ciphertext).decode() return ciphertext - def decrypt(self, data: str, **provider_options) -> Any: + def decrypt(self, data: str, provider_options: dict | None = None, **encryption_context: str) -> Any: """ Decrypt data using AWSEncryptionSDKProvider. @@ -173,6 +176,8 @@ def decrypt(self, data: str, **provider_options) -> Any: ciphertext : bytes The decrypted data in bytes """ + provider_options = provider_options or {} + try: ciphertext_decoded = base64.b64decode(data) except Error: @@ -180,8 +185,6 @@ def decrypt(self, data: str, **provider_options) -> Any: "Data decryption failed. Please ensure that you are attempting to decrypt data that was previously encrypted.", # noqa E501 ) - expected_context = provider_options.pop("encryption_context", {}) - try: ciphertext, decryptor_header = self.client.decrypt( source=ciphertext_decoded, @@ -197,7 +200,7 @@ def decrypt(self, data: str, **provider_options) -> Any: "Data decryption failed. Please ensure that you are attempting to decrypt data that was previously encrypted.", # noqa E501 ) - for key, value in expected_context.items(): + for key, value in encryption_context.items(): if decryptor_header.encryption_context.get(key) != value: raise DataMaskingContextMismatchError( f"Encryption Context does not match expected value for key: {key}", From 34f8e1b956839e92c25488d8e37c6703235db01a Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 19 Dec 2023 22:10:18 +0100 Subject: [PATCH 096/151] docs: rename encryption context example --- docs/utilities/data_masking.md | 8 ++++---- ...t_context.py => getting_started_decryption_context.py} | 0 ...t_context.py => getting_started_encryption_context.py} | 0 3 files changed, 4 insertions(+), 4 deletions(-) rename examples/data_masking/src/{getting_started_decrypt_context.py => getting_started_decryption_context.py} (100%) rename examples/data_masking/src/{getting_started_encrypt_context.py => getting_started_encryption_context.py} (100%) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index fbd15e88b63..1c6b0f39cad 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -190,18 +190,18 @@ For a stronger security posture, you can add metadata to each encryption operati 2. **Only `string` values are supported**. We will raise `DataMaskingUnsupportedTypeError` for non-string values. 3. **Use non-sensitive data only**. When using KMS, encryption context is available as plaintext in AWS CloudTrail. Unless you [intentionally disabled KMS events](https://docs.aws.amazon.com/kms/latest/developerguide/logging-using-cloudtrail.html#filtering-kms-events){target="_blank"}. -=== "getting_started_encrypt_context.py" +=== "getting_started_encryption_context.py" ```python hl_lines="27-29" - --8<-- "examples/data_masking/src/getting_started_encrypt_context.py" + --8<-- "examples/data_masking/src/getting_started_encryption_context.py" ``` 1. They must match on `decrypt()` otherwise the operation will fail with `DataMaskingContextMismatchError`. -=== "getting_started_decrypt_context.py" +=== "getting_started_decryption_context.py" ```python hl_lines="26-28" - --8<-- "examples/data_masking/src/getting_started_decrypt_context.py" + --8<-- "examples/data_masking/src/getting_started_decryption_context.py" ``` 1. They must match otherwise the operation will fail with `DataMaskingContextMismatchError`. diff --git a/examples/data_masking/src/getting_started_decrypt_context.py b/examples/data_masking/src/getting_started_decryption_context.py similarity index 100% rename from examples/data_masking/src/getting_started_decrypt_context.py rename to examples/data_masking/src/getting_started_decryption_context.py diff --git a/examples/data_masking/src/getting_started_encrypt_context.py b/examples/data_masking/src/getting_started_encryption_context.py similarity index 100% rename from examples/data_masking/src/getting_started_encrypt_context.py rename to examples/data_masking/src/getting_started_encryption_context.py From 8519896b649c88ef8696d1c52da85fe1a4a42d67 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 19 Dec 2023 22:18:16 +0100 Subject: [PATCH 097/151] fix: encryption context propagation Signed-off-by: heitorlessa --- aws_lambda_powertools/utilities/_data_masking/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index 652ac302eb3..ea221607129 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -120,7 +120,7 @@ def _apply_action( data=data, fields=fields, action=action, - options=provider_options, + provider_options=provider_options, **encryption_context, ) else: @@ -226,7 +226,7 @@ def _apply_action_to_fields( current_dict, action, last_key, - provider_options, + provider_options=provider_options, **encryption_context, ) From 7be95662ade64c76526f1a90af66dfc5f925ae16 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 19 Dec 2023 22:31:24 +0100 Subject: [PATCH 098/151] refactor: validate encryption context values --- .../provider/kms/aws_encryption_sdk.py | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py index fd8cf054c1b..5cc086fbc28 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py @@ -14,7 +14,11 @@ LocalCryptoMaterialsCache, StrictAwsKmsMasterKeyProvider, ) -from aws_encryption_sdk.exceptions import DecryptKeyError, GenerateKeyError, NotSupportedError +from aws_encryption_sdk.exceptions import ( + DecryptKeyError, + GenerateKeyError, + NotSupportedError, +) from aws_lambda_powertools.shared.user_agent import register_feature_to_botocore_session from aws_lambda_powertools.utilities._data_masking.constants import ( @@ -28,6 +32,7 @@ DataMaskingDecryptKeyError, DataMaskingDecryptValueError, DataMaskingEncryptKeyError, + DataMaskingUnsupportedTypeError, ) from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider @@ -144,8 +149,9 @@ def encrypt(self, data: Any, provider_options: dict | None = None, **encryption_ The encrypted data, as a base64-encoded string. """ provider_options = provider_options or {} - data_encoded = self.json_serializer(data).encode("utf-8") + self._validate_encryption_context(encryption_context) + data_encoded = self.json_serializer(data).encode("utf-8") try: ciphertext, _ = self.client.encrypt( source=data_encoded, @@ -177,6 +183,7 @@ def decrypt(self, data: str, provider_options: dict | None = None, **encryption_ The decrypted data in bytes """ provider_options = provider_options or {} + self._validate_encryption_context(encryption_context) try: ciphertext_decoded = base64.b64decode(data) @@ -208,3 +215,14 @@ def decrypt(self, data: str, provider_options: dict | None = None, **encryption_ ciphertext = self.json_deserializer(ciphertext.decode("utf-8")) return ciphertext + + @staticmethod + def _validate_encryption_context(context: dict): + if not context: + return + + for key, value in context.items(): + if not isinstance(value, str): + raise DataMaskingUnsupportedTypeError( + f"Encryption context values must be string. Received: {key}={value}", + ) From 1ab12a288d64d7bc608c36591bd94120b6654a65 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Tue, 19 Dec 2023 22:33:54 +0100 Subject: [PATCH 099/151] docs: remove todo Signed-off-by: heitorlessa --- docs/utilities/data_masking.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 1c6b0f39cad..18fc339fd42 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -300,9 +300,6 @@ Note that the return will be a deserialized JSON and your desired fields updated --8<-- "examples/data_masking/src/choosing_payload_simple_json_output.json" ``` -!!! todo "Todo" - 1. Explain about data preservation - ## Advanced ### Data serialization From e6889baf094fa159f9642276ba48ebf5d79bd790 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Tue, 19 Dec 2023 20:55:35 -0600 Subject: [PATCH 100/151] pull out comparing enc_ctx into new method --- .../provider/kms/aws_encryption_sdk.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py index 5cc086fbc28..8cf9355f887 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py @@ -207,11 +207,7 @@ def decrypt(self, data: str, provider_options: dict | None = None, **encryption_ "Data decryption failed. Please ensure that you are attempting to decrypt data that was previously encrypted.", # noqa E501 ) - for key, value in encryption_context.items(): - if decryptor_header.encryption_context.get(key) != value: - raise DataMaskingContextMismatchError( - f"Encryption Context does not match expected value for key: {key}", - ) + self._compare_encryption_context(encryption_context, decryptor_header) ciphertext = self.json_deserializer(ciphertext.decode("utf-8")) return ciphertext @@ -226,3 +222,14 @@ def _validate_encryption_context(context: dict): raise DataMaskingUnsupportedTypeError( f"Encryption context values must be string. Received: {key}={value}", ) + + @staticmethod + def _compare_encryption_context(context: dict, decryptor_header): + if not context: + return + + for key, value in context.items(): + if decryptor_header.encryption_context.get(key) != value: + raise DataMaskingContextMismatchError( + f"Encryption Context does not match expected value for key: {key}", + ) From 840b85b191963e3c5a5f67838131e5a0f77996b2 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Wed, 20 Dec 2023 09:37:47 +0000 Subject: [PATCH 101/151] Adding support to jsonpath-ng + refactoring tests --- .../utilities/_data_masking/base.py | 142 ++++++------------ mypy.ini | 3 + poetry.lock | 58 ++++--- pyproject.toml | 1 + .../data_masking/test_aws_encryption_sdk.py | 71 ++++++++- .../data_masking/test_unit_data_masking.py | 38 +---- 6 files changed, 148 insertions(+), 165 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index ea221607129..a38b6f0d9b2 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -3,6 +3,8 @@ import logging from typing import Any, Callable, Iterable, Optional, Union +from jsonpath_ng import parse + from aws_lambda_powertools.utilities._data_masking.exceptions import ( DataMaskingFieldNotFoundError, DataMaskingUnsupportedTypeError, @@ -180,58 +182,58 @@ def _apply_action_to_fields( data_parsed: dict = self._normalize_data_to_parse(fields, data) - for nested_field in fields: - logger.debug(f"Processing nested field: {nested_field}") - - nested_parsed_field = nested_field - - # Ensure the nested field is represented as a string - if not isinstance(nested_parsed_field, str): - nested_parsed_field = self.json_serializer(nested_parsed_field) - - # Split the nested field into keys using dot, square brackets as separators - # keys = re.split(r"\.|\[|\]", nested_field) # noqa ERA001 - REVIEW THIS - - keys = nested_parsed_field.replace("][", ".").replace("[", ".").replace("]", "").split(".") - keys = [key for key in keys if key] # Remove empty strings from the split - - # Traverse the dictionary hierarchy by iterating through the list of nested keys - current_dict = data_parsed - - for key in keys[:-1]: - # If enter here, the customer is passing potential list, set or tuple - # Example "payload[0]" - - logger.debug(f"Processing {key} in field {nested_field}") - - # It supports dict, list, set and tuple - try: - if isinstance(current_dict, dict) and key in current_dict: - # If enter heres, it captures the name of the key - # Example "payload" - current_dict = current_dict[key] - elif ( - isinstance(current_dict, (set, tuple, list)) and key.isdigit() and int(key) < len(current_dict) - ): - # If enter heres, it captures the index of the key - # Example "[0]" - current_dict = current_dict[int(key)] - except KeyError: - # Handle the case when the key doesn't exist - raise DataMaskingFieldNotFoundError(f"Key {key} not found in {current_dict}") - - last_key = keys[-1] - - current_dict = self._apply_action_to_specific_type( - current_dict, - action, - last_key, - provider_options=provider_options, - **encryption_context, + # Iterate over each field to be parsed. + for field_parse in fields: + # Parse the field expression using a 'parse' function. + json_parse = parse(field_parse) + # Find the corresponding data in the normalized data using the parsed expression. + result_parse = json_parse.find(data_parsed) + + # If the data for the field is not found, raise an exception. + if not result_parse: + raise DataMaskingFieldNotFoundError(f"Field or expression {field_parse} not found in {data_parsed}") + + # Update the parsed data using a callback function. + json_parse.update( + data_parsed, + lambda field_value, fields, field_name, action=action, provider_options=provider_options, encryption_context=encryption_context: self._call_action( # noqa + field_value, + fields, + field_name, + action, + provider_options, + **encryption_context, + ), ) return data_parsed + @staticmethod + def _call_action( + field_value: Any, + fields: dict[str, Any], + field_name: str, + action: Callable, + provider_options: dict | None = None, + **encryption_context, + ) -> None: + """ + Apply a specified action to a field value and update the fields dictionary. + + Params: + -------- + - field_value: Current value of the field being processed. + - fields: Dictionary representing the fields being processed (mutable). + - field_name: Name of the field being processed. + - action: Callable (function or method) to be applied to the field_value. + - provider_options: Optional dictionary representing additional options for the action. + - **encryption_context: Additional keyword arguments collected into a dictionary. + + Returns: + - None: The method does not return any value, as it updates the fields in-place. + """ + fields[field_name] = action(field_value, provider_options=provider_options, **encryption_context) + def _normalize_data_to_parse(self, fields: list, data: str | dict) -> dict: if not fields: raise ValueError("No fields specified.") @@ -249,47 +251,3 @@ def _normalize_data_to_parse(self, fields: list, data: str | dict) -> dict: ) return data_parsed - - def _apply_action_to_specific_type( - self, - current_dict: dict, - action: Callable, - last_key, - provider_options: dict | None = None, - **encryption_context, - ): - logger.debug("Processing the last fields to apply the action") - # Apply the action to the last key (either a specific index or dictionary key) - if isinstance(current_dict, dict) and last_key in current_dict: - current_dict[last_key] = action( - current_dict[last_key], - provider_options=provider_options, - **encryption_context, - ) - elif isinstance(current_dict, list) and last_key.isdigit() and int(last_key) < len(current_dict): - current_dict[int(last_key)] = action( - current_dict[int(last_key)], - provider_options=provider_options, - **encryption_context, - ) - elif isinstance(current_dict, tuple) and last_key.isdigit() and int(last_key) < len(current_dict): - index = int(last_key) - current_dict = ( - current_dict[:index] - + (action(current_dict[index], provider_options=provider_options, **encryption_context),) - + current_dict[index + 1 :] - ) - elif isinstance(current_dict, set): - # Convert the set to a list, apply the action, and convert back to a set - elements_list = list(current_dict) - elements_list[int(last_key)] = action( - elements_list[int(last_key)], - provider_options=provider_options, - **encryption_context, - ) - current_dict = set(elements_list) - else: - # Handle the case when the last key doesn't exist - raise DataMaskingFieldNotFoundError(f"Key {last_key} not found in {current_dict}") - - return current_dict diff --git a/mypy.ini b/mypy.ini index 9e8ffb5f7a5..5fcb1533707 100644 --- a/mypy.ini +++ b/mypy.ini @@ -18,6 +18,9 @@ ignore_missing_imports=True [mypy-sentry_sdk] ignore_missing_imports=True +[mypy-jsonpath_ng.*] +ignore_missing_imports=True + [mypy-jmespath.exceptions] ignore_missing_imports=True diff --git a/poetry.lock b/poetry.lock index 1e58a03e6fb..ca863e822de 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1200,17 +1200,6 @@ files = [ {file = "ijson-3.2.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4a3a6a2fbbe7550ffe52d151cf76065e6b89cfb3e9d0463e49a7e322a25d0426"}, {file = "ijson-3.2.3-cp311-cp311-win32.whl", hash = "sha256:6a4db2f7fb9acfb855c9ae1aae602e4648dd1f88804a0d5cfb78c3639bcf156c"}, {file = "ijson-3.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:ccd6be56335cbb845f3d3021b1766299c056c70c4c9165fb2fbe2d62258bae3f"}, - {file = "ijson-3.2.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:055b71bbc37af5c3c5861afe789e15211d2d3d06ac51ee5a647adf4def19c0ea"}, - {file = "ijson-3.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c075a547de32f265a5dd139ab2035900fef6653951628862e5cdce0d101af557"}, - {file = "ijson-3.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:457f8a5fc559478ac6b06b6d37ebacb4811f8c5156e997f0d87d708b0d8ab2ae"}, - {file = "ijson-3.2.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9788f0c915351f41f0e69ec2618b81ebfcf9f13d9d67c6d404c7f5afda3e4afb"}, - {file = "ijson-3.2.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fa234ab7a6a33ed51494d9d2197fb96296f9217ecae57f5551a55589091e7853"}, - {file = "ijson-3.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdd0dc5da4f9dc6d12ab6e8e0c57d8b41d3c8f9ceed31a99dae7b2baf9ea769a"}, - {file = "ijson-3.2.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c6beb80df19713e39e68dc5c337b5c76d36ccf69c30b79034634e5e4c14d6904"}, - {file = "ijson-3.2.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:a2973ce57afb142d96f35a14e9cfec08308ef178a2c76b8b5e1e98f3960438bf"}, - {file = "ijson-3.2.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:105c314fd624e81ed20f925271ec506523b8dd236589ab6c0208b8707d652a0e"}, - {file = "ijson-3.2.3-cp312-cp312-win32.whl", hash = "sha256:ac44781de5e901ce8339352bb5594fcb3b94ced315a34dbe840b4cff3450e23b"}, - {file = "ijson-3.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:0567e8c833825b119e74e10a7c29761dc65fcd155f5d4cb10f9d3b8916ef9912"}, {file = "ijson-3.2.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:eeb286639649fb6bed37997a5e30eefcacddac79476d24128348ec890b2a0ccb"}, {file = "ijson-3.2.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:396338a655fb9af4ac59dd09c189885b51fa0eefc84d35408662031023c110d1"}, {file = "ijson-3.2.3-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e0243d166d11a2a47c17c7e885debf3b19ed136be2af1f5d1c34212850236ac"}, @@ -1413,6 +1402,20 @@ files = [ [package.dependencies] jsonpointer = ">=1.9" +[[package]] +name = "jsonpath-ng" +version = "1.6.0" +description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming." +optional = false +python-versions = "*" +files = [ + {file = "jsonpath-ng-1.6.0.tar.gz", hash = "sha256:5483f8e9d74c39c9abfab554c070ae783c1c8cbadf5df60d561bc705ac68a07e"}, + {file = "jsonpath_ng-1.6.0-py3-none-any.whl", hash = "sha256:6fd04833412c4b3d9299edf369542f5e67095ca84efa17cbb7f06a34958adc9f"}, +] + +[package.dependencies] +ply = "*" + [[package]] name = "jsonpickle" version = "3.0.2" @@ -1587,16 +1590,6 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, - {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -2121,6 +2114,17 @@ importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "ply" +version = "3.11" +description = "Python Lex & Yacc" +optional = false +python-versions = "*" +files = [ + {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"}, + {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"}, +] + [[package]] name = "protobuf" version = "4.24.4" @@ -2476,7 +2480,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -2484,15 +2487,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -2509,7 +2505,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -2517,7 +2512,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -3228,4 +3222,4 @@ validation = ["fastjsonschema"] [metadata] lock-version = "2.0" python-versions = "^3.7.4" -content-hash = "90ce7567ea74d89a85e090d29e35fe66f91569f866b051f77781f25a018c2b25" +content-hash = "24070f50e79fbcf114853921620e55e17b8cd0c0f11b42829970a778934c9896" diff --git a/pyproject.toml b/pyproject.toml index 37245bdb355..764ecadd0ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ boto3 = { version = "^1.20.32", optional = true } typing-extensions = "^4.6.2" datadog-lambda = { version = ">=4.77,<6.0", optional = true } aws-encryption-sdk = { version = "^3.1.1", optional = true } +jsonpath-ng = "^1.6.0" [tool.poetry.dev-dependencies] coverage = {extras = ["toml"], version = "^7.2"} diff --git a/tests/functional/data_masking/test_aws_encryption_sdk.py b/tests/functional/data_masking/test_aws_encryption_sdk.py index 9336c84e568..eadee01f377 100644 --- a/tests/functional/data_masking/test_aws_encryption_sdk.py +++ b/tests/functional/data_masking/test_aws_encryption_sdk.py @@ -131,7 +131,7 @@ def test_mask_dict_with_fields(data_masker): } # WHEN mask is called with a list of fields specified - masked_string = data_masker.mask(data, fields=["a.1.None", "a.b.3.4"]) + masked_string = data_masker.mask(data, fields=["a.'1'.None", "a..'4'"]) # THEN the result is only the specified fields are masked assert masked_string == { @@ -154,7 +154,7 @@ def test_mask_json_dict_with_fields(data_masker): ) # WHEN mask is called with a list of fields specified - masked_json_string = data_masker.mask(data, fields=["a.1.None", "a.b.3.4"]) + masked_json_string = data_masker.mask(data, fields=["a.'1'.None", "a..'4'"]) # THEN the result is only the specified fields are masked assert masked_json_string == { @@ -258,8 +258,8 @@ def test_encrypt_dict_with_fields(data_masker): } # WHEN encrypting and then decrypting the encrypted data - encrypted_data = data_masker.encrypt(data, fields=["a.1.None", "a.b.3.4"]) - decrypted_data = data_masker.decrypt(encrypted_data, fields=["a.1.None", "a.b.3.4"]) + encrypted_data = data_masker.encrypt(data, fields=["a.'1'.None", "a..'4'"]) + decrypted_data = data_masker.decrypt(encrypted_data, fields=["a.'1'.None", "a..'4'"]) # THEN the result is only the specified fields are masked assert decrypted_data == data @@ -277,8 +277,8 @@ def test_encrypt_json_dict_with_fields(data_masker): ) # WHEN encrypting and then decrypting the encrypted data - encrypted_data = data_masker.encrypt(data, fields=["a.1.None", "a.b.3.4"]) - decrypted_data = data_masker.decrypt(encrypted_data, fields=["a.1.None", "a.b.3.4"]) + encrypted_data = data_masker.encrypt(data, fields=["a.'1'.None", "a..'4'"]) + decrypted_data = data_masker.decrypt(encrypted_data, fields=["a.'1'.None", "a..'4'"]) # THEN the result is only the specified fields are masked assert decrypted_data == json.loads(data) @@ -322,3 +322,62 @@ def test_encrypt_json_with_tuple_fields(data_masker): # THEN the result is only the specified fields are masked assert decrypted_data == json.loads(data) + + +def test_encrypt_with_encryption_context(data_masker): + # GIVEN the data type is a json representation of a dictionary with a list inside + data = json.dumps( + { + "payload": { + "first": ["value1", "value2"], + "second": (0, 1), + }, + }, + ) + + fields_operation = ["payload.first[0]", "payload.second[0]"] + # WHEN encrypting and then decrypting the encrypted data + encrypted_data = data_masker.encrypt(data, fields=fields_operation, data_classification="confidential") + decrypted_data = data_masker.decrypt(encrypted_data, fields=fields_operation, data_classification="confidential") + + # THEN the result is only the specified fields are masked + assert decrypted_data == json.loads(data) + + +def test_encrypt_with_complex_dict(data_masker): + # GIVEN the data type is a json representation of a dictionary with a list inside + data = json.dumps( + { + "name": "Leandro", + "operation": "non sensitive", + "card_number": "1000 4444 333 2222", + "address": [ + { + "postcode": 81847, + "street": "38986 Joanne Stravenue", + "country": "United States", + "timezone": "America/La_Paz", + }, + { + "postcode": 94400, + "street": "623 Kraig Mall", + "country": "United States", + "timezone": "America/Mazatlan", + }, + { + "postcode": 94480, + "street": "123 Kraig Mall", + "country": "United States", + "timezone": "America/Mazatlan", + }, + ], + }, + ) + + fields_operation = ["address[*].postcode"] + # WHEN encrypting and then decrypting the encrypted data + encrypted_data = data_masker.encrypt(data, fields=fields_operation) + decrypted_data = data_masker.decrypt(encrypted_data, fields=fields_operation) + + # THEN the result is only the specified fields are masked + assert decrypted_data == json.loads(data) diff --git a/tests/unit/data_masking/test_unit_data_masking.py b/tests/unit/data_masking/test_unit_data_masking.py index af176bfb53b..0407b6d7700 100644 --- a/tests/unit/data_masking/test_unit_data_masking.py +++ b/tests/unit/data_masking/test_unit_data_masking.py @@ -101,7 +101,7 @@ def test_mask_dict_with_fields(data_masker): } # WHEN mask is called with a list of fields specified - masked_string = data_masker.mask(data, fields=["a.1.None", "a.b.3.4"]) + masked_string = data_masker.mask(data, fields=["a.'1'.None", "a..'4'"]) # THEN the result is only the specified fields are masked assert masked_string == { @@ -124,7 +124,7 @@ def test_mask_json_dict_with_fields(data_masker): ) # WHEN mask is called with a list of fields specified - masked_json_string = data_masker.mask(data, fields=["a.1.None", "a.b.3.4"]) + masked_json_string = data_masker.mask(data, fields=["a.'1'.None", "a..'4'"]) # THEN the result is only the specified fields are masked assert masked_json_string == { @@ -174,36 +174,4 @@ def test_parsing_nonexistent_fields(data_masker): # WHEN attempting to pass in fields that do not exist in the input data with pytest.raises(DataMaskingFieldNotFoundError): # THEN the result is a KeyError - data_masker.mask(data, ["3.1.True"]) - - -def test_parsing_nonstring_fields(data_masker): - # GIVEN a dict data type - data = { - "3": { - "1": {"None": "hello", "four": "world"}, - "4": {"33": {"5": "goodbye", "e": "world"}}, - }, - } - - # WHEN attempting to pass in a list of fields that are not strings - masked = data_masker.mask(data, fields=[3.4]) - - # THEN the result is the value of the nested field should be masked as normal - assert masked == {"3": {"1": {"None": "hello", "four": "world"}, "4": DATA_MASKING_STRING}} - - -def test_parsing_nonstring_keys_and_fields(data_masker): - # GIVEN a dict data type with integer keys - data = { - 3: { - "1": {"None": "hello", "four": "world"}, - 4: {"33": {"5": "goodbye", "e": "world"}}, - }, - } - - # WHEN masked with a list of fields that are integer keys - masked = data_masker.mask(data, fields=[3.4]) - - # THEN the result is the value of the nested field should be masked - assert masked == {"3": {"1": {"None": "hello", "four": "world"}, "4": DATA_MASKING_STRING}} + data_masker.mask(data, ["'3'..True"]) From 2cad7722b83ca548077ce4aa6eb17c5e5636b546 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Wed, 20 Dec 2023 09:48:50 +0100 Subject: [PATCH 102/151] refactor: delegate encoding/decoding to shared fn as much as possible to prevent mistakes; exception handling Signed-off-by: heitorlessa --- aws_lambda_powertools/shared/functions.py | 12 ++++++++++-- .../provider/kms/aws_encryption_sdk.py | 18 ++++++++++++------ tests/unit/test_shared_functions.py | 15 +++++++++++++++ 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/aws_lambda_powertools/shared/functions.py b/aws_lambda_powertools/shared/functions.py index fb36b98dc34..2dd22d0de43 100644 --- a/aws_lambda_powertools/shared/functions.py +++ b/aws_lambda_powertools/shared/functions.py @@ -95,10 +95,18 @@ def resolve_env_var_choice( def base64_decode(value: str) -> bytes: try: - logger.debug("Decoding base64 record item before parsing") + logger.debug("Decoding base64 item to bytes") return base64.b64decode(value) except (BinAsciiError, TypeError): - raise ValueError("base64 decode failed") + raise ValueError("base64 decode failed - is this base64 encoded string?") + + +def bytes_to_base64_string(value: bytes) -> str: + try: + logger.debug("Encoding bytes to base64 string") + return base64.b64encode(value).decode() + except TypeError: + raise ValueError(f"base64 encoding failed - is this bytes data? type: {type(value)}") def bytes_to_string(value: bytes) -> str: diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py index 8cf9355f887..400abd4ff5e 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py @@ -1,6 +1,5 @@ from __future__ import annotations -import base64 import functools import json import logging @@ -20,6 +19,11 @@ NotSupportedError, ) +from aws_lambda_powertools.shared.functions import ( + base64_decode, + bytes_to_base64_string, + bytes_to_string, +) from aws_lambda_powertools.shared.user_agent import register_feature_to_botocore_session from aws_lambda_powertools.utilities._data_masking.constants import ( CACHE_CAPACITY, @@ -152,6 +156,7 @@ def encrypt(self, data: Any, provider_options: dict | None = None, **encryption_ self._validate_encryption_context(encryption_context) data_encoded = self.json_serializer(data).encode("utf-8") + try: ciphertext, _ = self.client.encrypt( source=data_encoded, @@ -163,8 +168,8 @@ def encrypt(self, data: Any, provider_options: dict | None = None, **encryption_ raise DataMaskingEncryptKeyError( "Failed to encrypt data. Please ensure you are using a valid Symmetric AWS KMS Key ARN, not KMS Key ID or alias.", # noqa E501 ) - ciphertext = base64.b64encode(ciphertext).decode() - return ciphertext + + return bytes_to_base64_string(ciphertext) def decrypt(self, data: str, provider_options: dict | None = None, **encryption_context: str) -> Any: """ @@ -186,7 +191,7 @@ def decrypt(self, data: str, provider_options: dict | None = None, **encryption_ self._validate_encryption_context(encryption_context) try: - ciphertext_decoded = base64.b64decode(data) + ciphertext_decoded = base64_decode(data) except Error: raise DataMaskingDecryptValueError( "Data decryption failed. Please ensure that you are attempting to decrypt data that was previously encrypted.", # noqa E501 @@ -209,8 +214,9 @@ def decrypt(self, data: str, provider_options: dict | None = None, **encryption_ self._compare_encryption_context(encryption_context, decryptor_header) - ciphertext = self.json_deserializer(ciphertext.decode("utf-8")) - return ciphertext + decoded_ciphertext = bytes_to_string(ciphertext) + + return self.json_deserializer(decoded_ciphertext) @staticmethod def _validate_encryption_context(context: dict): diff --git a/tests/unit/test_shared_functions.py b/tests/unit/test_shared_functions.py index 9232b72527b..ad6ef6a2e90 100644 --- a/tests/unit/test_shared_functions.py +++ b/tests/unit/test_shared_functions.py @@ -1,3 +1,4 @@ +import base64 import os import warnings from dataclasses import dataclass @@ -7,6 +8,7 @@ from aws_lambda_powertools.shared import constants from aws_lambda_powertools.shared.functions import ( + bytes_to_base64_string, extract_event_from_common_models, powertools_debug_is_set, powertools_dev_is_set, @@ -138,3 +140,16 @@ def test_resolve_max_age_env_var_wins_over_default_value(monkeypatch: pytest.Mon # THEN the result must be the environment variable value assert max_age == 20 + + +def test_bytes_to_base64_string(): + value = b"test" + + assert bytes_to_base64_string(value) == base64.b64encode(value).decode() + + +def test_bytes_to_base64_string_invalid_type(): + value = "test" + + with pytest.raises(ValueError, match="is this bytes data?"): + bytes_to_base64_string(value) From a770fa1bac1b1c576fe79c5204f155260a59512c Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Wed, 20 Dec 2023 11:03:50 +0100 Subject: [PATCH 103/151] refactor: correct typing for encrypt Signed-off-by: heitorlessa --- .../utilities/_data_masking/base.py | 29 ++++++++++++++++--- .../utilities/_data_masking/provider/base.py | 2 +- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index a38b6f0d9b2..68723e3d215 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -1,7 +1,8 @@ from __future__ import annotations import logging -from typing import Any, Callable, Iterable, Optional, Union +from numbers import Number +from typing import Any, Callable, Iterable, Mapping, Optional, Sequence, Union, overload from jsonpath_ng import parse @@ -51,13 +52,33 @@ def __init__( self.json_serializer = self.provider.json_serializer self.json_deserializer = self.provider.json_deserializer + @overload def encrypt( self, - data, + data: dict, + fields: list[str], + provider_options: dict | None = None, + **encryption_context: str, + ) -> dict: + ... + + @overload + def encrypt( + self, + data: Mapping | Sequence | Number, + fields: None = None, + provider_options: dict | None = None, + **encryption_context: str, + ) -> str: + ... + + def encrypt( + self, + data: Mapping | Sequence | Number, fields: list[str] | None = None, provider_options: dict | None = None, **encryption_context: str, - ) -> str | dict: + ) -> str | Mapping: return self._apply_action( data=data, fields=fields, @@ -86,7 +107,7 @@ def mask(self, data, fields=None) -> str | Iterable: def _apply_action( self, - data: str | dict, + data, fields: list[str] | None, action: Callable, provider_options: dict | None = None, diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/base.py b/aws_lambda_powertools/utilities/_data_masking/provider/base.py index ea4d2ddb2ed..5527fadd23c 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/base.py @@ -51,7 +51,7 @@ def __init__( self.json_serializer = json_serializer self.json_deserializer = json_deserializer - def encrypt(self, data, provider_options: dict | None = None, **encryption_context: str) -> str | dict: + def encrypt(self, data, provider_options: dict | None = None, **encryption_context: str) -> str: """ Abstract method for encrypting data. Subclasses must implement this method. """ From a918ced4dd771971b116d1f35e79aac2c638b0fa Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Wed, 20 Dec 2023 11:15:25 +0100 Subject: [PATCH 104/151] refactor: correct typing for mask Signed-off-by: heitorlessa --- .../utilities/_data_masking/base.py | 20 +++++++++++++++++-- .../utilities/_data_masking/provider/base.py | 4 ++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index 68723e3d215..8bd58d55c11 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -2,7 +2,7 @@ import logging from numbers import Number -from typing import Any, Callable, Iterable, Mapping, Optional, Sequence, Union, overload +from typing import Any, Callable, Mapping, Optional, Sequence, Union, overload from jsonpath_ng import parse @@ -102,7 +102,23 @@ def decrypt( **encryption_context, ) - def mask(self, data, fields=None) -> str | Iterable: + @overload + def mask(self, data, fields: None) -> str: + ... + + @overload + def mask(self, data: list, fields: list[str]) -> list[str]: + ... + + @overload + def mask(self, data: tuple, fields: list[str]) -> tuple[str]: + ... + + @overload + def mask(self, data: dict, fields: list[str]) -> dict: + ... + + def mask(self, data: Sequence | Mapping, fields: list[str] | None = None) -> str | list[str] | tuple[str] | dict: return self._apply_action(data=data, fields=fields, action=self.provider.mask) def _apply_action( diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/base.py b/aws_lambda_powertools/utilities/_data_masking/provider/base.py index 5527fadd23c..1a4d4dd65cc 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/base.py @@ -2,7 +2,7 @@ import functools import json -from typing import Any, Callable, Iterable, Union +from typing import Any, Callable, Iterable from aws_lambda_powertools.utilities._data_masking.constants import DATA_MASKING_STRING @@ -63,7 +63,7 @@ def decrypt(self, data, provider_options: dict | None = None, **encryption_conte """ raise NotImplementedError("Subclasses must implement decrypt()") - def mask(self, data, **kwargs) -> Union[str, Iterable]: + def mask(self, data, **kwargs) -> Iterable[str]: """ This method irreversibly masks data. From f937d8041d06e433fd85dc40a57be2ee15cce739 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Wed, 20 Dec 2023 11:20:30 +0100 Subject: [PATCH 105/151] docs: correct code snippets typing Signed-off-by: heitorlessa --- docs/utilities/data_masking.md | 4 +--- .../data_masking/src/advanced_custom_serializer.py | 4 ++-- .../data_masking/src/getting_started_decrypt_data.py | 2 +- .../data_masking/src/getting_started_encrypt_data.py | 11 ++++++----- .../data_masking/src/getting_started_mask_data.py | 8 +++----- 5 files changed, 13 insertions(+), 16 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 18fc339fd42..c722d6722fe 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -99,8 +99,6 @@ Before you start, you will need a KMS symmetric key to encrypt and decrypt your ### Masking data -!!! note "You can mask data without [installing any dependency](#install)." - Masking will erase the original data and replace with `*****`. This means you cannot recover masked data, and its type will change to `str`. === "getting_started_mask_data.py" @@ -131,7 +129,7 @@ Under the hood, we delegate a [number of operations](#encrypt-operation-with-enc === "getting_started_encrypt_data.py" - ```python hl_lines="5-6 11-12 23" + ```python hl_lines="6-8 14-15 26" --8<-- "examples/data_masking/src/getting_started_encrypt_data.py" ``` diff --git a/examples/data_masking/src/advanced_custom_serializer.py b/examples/data_masking/src/advanced_custom_serializer.py index 5e40a7f354f..2c5a5025f78 100644 --- a/examples/data_masking/src/advanced_custom_serializer.py +++ b/examples/data_masking/src/advanced_custom_serializer.py @@ -20,7 +20,7 @@ data_masker = DataMasking(provider=encryption_provider) -def lambda_handler(event: dict, context: LambdaContext): - data = event.get("body", {}) +def lambda_handler(event: dict, context: LambdaContext) -> str: + data: dict = event.get("body", {}) return data_masker.encrypt(data) diff --git a/examples/data_masking/src/getting_started_decrypt_data.py b/examples/data_masking/src/getting_started_decrypt_data.py index 582a06af1b7..cf7b58bb3f5 100644 --- a/examples/data_masking/src/getting_started_decrypt_data.py +++ b/examples/data_masking/src/getting_started_decrypt_data.py @@ -17,7 +17,7 @@ @logger.inject_lambda_context def lambda_handler(event: dict, context: LambdaContext) -> dict: - data = event.get("body") + data: dict = event.get("body", {}) logger.info("Decrypting fields email, address.street, and company_address") diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py index 2246662e4e4..d6307a196fe 100644 --- a/examples/data_masking/src/getting_started_encrypt_data.py +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -1,11 +1,12 @@ from __future__ import annotations import os -from typing import Iterable from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import ( + AWSEncryptionSDKProvider, +) from aws_lambda_powertools.utilities.typing import LambdaContext KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") @@ -17,11 +18,11 @@ @logger.inject_lambda_context -def lambda_handler(event: dict, context: LambdaContext) -> Iterable | str: - data = event.get("body", {}) +def lambda_handler(event: dict, context: LambdaContext) -> dict: + data: dict = event.get("body", {}) logger.info("Encrypting fields email, address.street, and company_address") - encrypted: Iterable = data_masker.encrypt(data, fields=["email", "address.street", "company_address"]) # (2)! + encrypted = data_masker.encrypt(data, fields=["email", "address.street", "company_address"]) # (2)! return encrypted diff --git a/examples/data_masking/src/getting_started_mask_data.py b/examples/data_masking/src/getting_started_mask_data.py index 84e6dde59ca..dc9526253a0 100644 --- a/examples/data_masking/src/getting_started_mask_data.py +++ b/examples/data_masking/src/getting_started_mask_data.py @@ -1,7 +1,5 @@ from __future__ import annotations -from typing import Iterable - from aws_lambda_powertools import Logger from aws_lambda_powertools.utilities._data_masking import DataMasking from aws_lambda_powertools.utilities.typing import LambdaContext @@ -11,11 +9,11 @@ @logger.inject_lambda_context -def lambda_handler(event: dict, context: LambdaContext) -> Iterable | str: - data = event.get("body") +def lambda_handler(event: dict, context: LambdaContext) -> dict: + data: dict = event.get("body", {}) logger.info("Masking fields email, address.street, and company_address") - masked: Iterable = data_masker.mask(data, fields=["email", "address.street", "company_address"]) # (1)! + masked = data_masker.mask(data, fields=["email", "address.street", "company_address"]) # (1)! return masked From e9735d575ed579d7bcaa55a82a9011976daa8c4d Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Wed, 20 Dec 2023 10:47:43 +0000 Subject: [PATCH 106/151] Adding flag to support raise on non existing field --- .../utilities/_data_masking/base.py | 15 ++++++++----- .../data_masking/test_unit_data_masking.py | 22 ++++++++++++++++++- 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index 8bd58d55c11..d6bf8828736 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -46,11 +46,13 @@ def lambda_handler(event, context): def __init__( self, provider: Optional[BaseProvider] = None, + raise_on_missing_field: bool = False, ): self.provider = provider or BaseProvider() # NOTE: we depend on Provider to not confuse customers in passing the same 2 serializers in 2 places self.json_serializer = self.provider.json_serializer self.json_deserializer = self.provider.json_deserializer + self.raise_on_missing_field = raise_on_missing_field @overload def encrypt( @@ -223,12 +225,15 @@ def _apply_action_to_fields( for field_parse in fields: # Parse the field expression using a 'parse' function. json_parse = parse(field_parse) - # Find the corresponding data in the normalized data using the parsed expression. - result_parse = json_parse.find(data_parsed) - # If the data for the field is not found, raise an exception. - if not result_parse: - raise DataMaskingFieldNotFoundError(f"Field or expression {field_parse} not found in {data_parsed}") + if self.raise_on_missing_field: + # Customer wants to raise exception when field is not found + # Find the corresponding data in the normalized data using the parsed expression. + result_parse = json_parse.find(data_parsed) + + # If the data for the field is not found, raise an exception. + if not result_parse: + raise DataMaskingFieldNotFoundError(f"Field or expression {field_parse} not found in {data_parsed}") # Update the parsed data using a callback function. json_parse.update( diff --git a/tests/unit/data_masking/test_unit_data_masking.py b/tests/unit/data_masking/test_unit_data_masking.py index 0407b6d7700..b142cfad926 100644 --- a/tests/unit/data_masking/test_unit_data_masking.py +++ b/tests/unit/data_masking/test_unit_data_masking.py @@ -162,8 +162,10 @@ def test_parsing_unsupported_data_type(data_masker): data_masker.mask(42, ["this.field"]) -def test_parsing_nonexistent_fields(data_masker): +def test_parsing_nonexistent_fields_with_raise_on_missing_field(): # GIVEN a dict data type + + data_masker = DataMasking(raise_on_missing_field=True) data = { "3": { "1": {"None": "hello", "four": "world"}, @@ -175,3 +177,21 @@ def test_parsing_nonexistent_fields(data_masker): with pytest.raises(DataMaskingFieldNotFoundError): # THEN the result is a KeyError data_masker.mask(data, ["'3'..True"]) + + +def test_parsing_nonexistent_fields_without_raise_on_missing_field(): + # GIVEN a dict data type + + data_masker = DataMasking(raise_on_missing_field=False) + data = { + "3": { + "1": {"None": "hello", "four": "world"}, + "4": {"33": {"5": "goodbye", "e": "world"}}, + }, + } + + # WHEN mask is called with a non-existing field + masked_json_string = data_masker.mask(data, fields=["non-existing"]) + + # THEN the "masked" payload is the same of the original + assert masked_json_string == data From f763a1153f8c7b77b6179eab2860ef299f6f2e9b Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Wed, 20 Dec 2023 11:55:58 +0100 Subject: [PATCH 107/151] docs: make it explicit the behaviour diff in encrypt & mask --- docs/utilities/data_masking.md | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index c722d6722fe..a453d9177ce 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -210,12 +210,19 @@ For a stronger security posture, you can add metadata to each encryption operati You can use the `fields` parameter with dot notation `.` to choose one or more parts of your data to `mask`, `encrypt`, or `decrypt`. This is useful when you want to keep data structure intact except the confidential fields. -When the field is a `list`, we obfuscate their values to `str` while keeping the data structure and number of items intact. Obfuscating nested data structures from a given field is also supported. +When `fields` is present, `mask` and `encrypt` behave differently: -> Common scenarios +| Operation | Behavior | Example | Obfuscated | +| --------- | ----------------------------------------------------------- | ----------------------- | ------------------------------- | +| `encrypt` | Obfuscate entire data and replacing with ciphertext string. | `{"cards": ["a", "b"]}` | `{"cards": "ciphertext"}` | +| `mask` | Replace data while keeping collections type intact. | `{"cards": ["a", "b"]}` | `{"cards": ["*****", "*****"]}` | + +Here are common scenarios to best visualize how to use `fields`. === "Top keys only" + You want to obfuscate data in the `card_number` field. + === "Data" > Expression: `data_masker.mask(data, fields=["card_number"])` @@ -232,6 +239,8 @@ When the field is a `list`, we obfuscate their values to `str` while keeping the === "Nested key" + You want to obfuscate data in the `postcode` field. + === "Data" > Expression: `data_masker.mask(data, fields=["address.postcode"])` @@ -248,6 +257,8 @@ When the field is a `list`, we obfuscate their values to `str` while keeping the === "Multiple keys" + You want to obfuscate data in both `postcode` and `street` fields. + === "Data" > Expression: `data_masker.mask(data, fields=["address.postcode", "address.street"])` @@ -264,6 +275,8 @@ When the field is a `list`, we obfuscate their values to `str` while keeping the === "All key items" + You want to obfuscate data any data under `address` field. + === "Data" > Expression: `data_masker.mask(data, fields=["address"])` From 634cfc2fbee44990f5190bbb865510304170662d Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Wed, 20 Dec 2023 11:55:17 +0000 Subject: [PATCH 108/151] Adding examples on how to access data --- docs/utilities/data_masking.md | 80 ++++++++++++++++++- .../choosing_payload_complex_nested_keys.json | 11 +++ ...ng_payload_complex_nested_keys_output.json | 11 +++ .../src/choosing_payload_list_all_index.json | 15 ++++ ...hoosing_payload_list_all_index_output.json | 16 ++++ .../src/choosing_payload_list_index.json | 15 ++++ .../choosing_payload_list_index_output.json | 16 ++++ .../src/choosing_payload_list_slice.json | 19 +++++ .../choosing_payload_list_slice_output.json | 19 +++++ 9 files changed, 198 insertions(+), 4 deletions(-) create mode 100644 examples/data_masking/src/choosing_payload_complex_nested_keys.json create mode 100644 examples/data_masking/src/choosing_payload_complex_nested_keys_output.json create mode 100644 examples/data_masking/src/choosing_payload_list_all_index.json create mode 100644 examples/data_masking/src/choosing_payload_list_all_index_output.json create mode 100644 examples/data_masking/src/choosing_payload_list_index.json create mode 100644 examples/data_masking/src/choosing_payload_list_index_output.json create mode 100644 examples/data_masking/src/choosing_payload_list_slice.json create mode 100644 examples/data_masking/src/choosing_payload_list_slice_output.json diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index a453d9177ce..c89943748d3 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -275,7 +275,7 @@ Here are common scenarios to best visualize how to use `fields`. === "All key items" - You want to obfuscate data any data under `address` field. + You want to obfuscate data under `address` field. === "Data" @@ -291,6 +291,80 @@ Here are common scenarios to best visualize how to use `fields`. --8<-- "examples/data_masking/src/choosing_payload_all_nested_keys_output.json" ``` +=== "Complex nested key" + + You want to obfuscate data under `name` field. + + === "Data" + + > Expression: `data_masker.mask(data, fields=["category..name"])` + + ```json hl_lines="6" + --8<-- "examples/data_masking/src/choosing_payload_complex_nested_keys.json" + ``` + + === "Result" + + ```json hl_lines="6" + --8<-- "examples/data_masking/src/choosing_payload_complex_nested_keys_output.json" + ``` + +=== "Accessing list index" + + You want to obfuscate data under `street` field located at the initial index of the address list. + + === "Data" + + > Expression: `data_masker.mask(data, fields=["address[1].street"])` + + ```json hl_lines="12" + --8<-- "examples/data_masking/src/choosing_payload_list_index.json" + ``` + + === "Result" + + ```json hl_lines="12" + --8<-- "examples/data_masking/src/choosing_payload_list_index_output.json" + ``` + +=== "All fields in a list" + + You want to obfuscate data under `street` field located at the any index of the address list. + + === "Data" + + > Expression: `data_masker.mask(data, fields=["address[*].street"])` + + ```json hl_lines="8 12" + --8<-- "examples/data_masking/src/choosing_payload_list_all_index.json" + ``` + + === "Result" + + ```json hl_lines="8 12" + --8<-- "examples/data_masking/src/choosing_payload_list_all_index_output.json" + ``` + +=== "Slicing a list" + + You want to obfuscate data by slicing a list. + + === "Data" + + > Expression: `data_masker.mask(data, fields=["address[-1].street"])` + + ```json hl_lines="8 12 16" + --8<-- "examples/data_masking/src/choosing_payload_list_slice.json" + ``` + + === "Result" + + ```json hl_lines="16" + --8<-- "examples/data_masking/src/choosing_payload_list_slice_output.json" + ``` + +For comprehensive guidance on using JSONPath syntax, please refer to the official documentation available at [jsonpath-ng](https://github.com/h2non/jsonpath-ng#jsonpath-syntax){target="_blank" rel="nofollow"} + #### JSON We also support data in JSON string format as input. We automatically deserialize it, then handle each field operation as expected. @@ -316,9 +390,7 @@ Note that the return will be a deserialized JSON and your desired fields updated ### Data serialization ???+ note "Current limitations" - 1. No support for data slicing `field.subfield[0:2]`. - 2. No support for accessing fields within a `list`. - 3. Python classes, `Dataclasses`, and `Pydantic models` are not supported yet. + 1. Python classes, `Dataclasses`, and `Pydantic models` are not supported yet. Before we traverse the data structure, we perform two important operations on input data: diff --git a/examples/data_masking/src/choosing_payload_complex_nested_keys.json b/examples/data_masking/src/choosing_payload_complex_nested_keys.json new file mode 100644 index 00000000000..7096e0074d9 --- /dev/null +++ b/examples/data_masking/src/choosing_payload_complex_nested_keys.json @@ -0,0 +1,11 @@ +{ + "category": { + "subcategory": { + "brand" : { + "product": { + "name": "Car" + } + } + } + } +} diff --git a/examples/data_masking/src/choosing_payload_complex_nested_keys_output.json b/examples/data_masking/src/choosing_payload_complex_nested_keys_output.json new file mode 100644 index 00000000000..843c8c7e1ce --- /dev/null +++ b/examples/data_masking/src/choosing_payload_complex_nested_keys_output.json @@ -0,0 +1,11 @@ +{ + "category": { + "subcategory": { + "brand" : { + "product": { + "name": "*****" + } + } + } + } +} diff --git a/examples/data_masking/src/choosing_payload_list_all_index.json b/examples/data_masking/src/choosing_payload_list_all_index.json new file mode 100644 index 00000000000..d1ca2654ef6 --- /dev/null +++ b/examples/data_masking/src/choosing_payload_list_all_index.json @@ -0,0 +1,15 @@ +{ + "name": "Lessa", + "operation": "non sensitive", + "card_number": "1000 4444 333 2222", + "address": [ + { + "postcode": 81847, + "street": "38986 Joanne Stravenue" + }, + { + "postcode": 91034, + "street": "14987 Avenue 1" + } + ] +} diff --git a/examples/data_masking/src/choosing_payload_list_all_index_output.json b/examples/data_masking/src/choosing_payload_list_all_index_output.json new file mode 100644 index 00000000000..e00d92728f7 --- /dev/null +++ b/examples/data_masking/src/choosing_payload_list_all_index_output.json @@ -0,0 +1,16 @@ + +{ + "name": "Lessa", + "operation": "non sensitive", + "card_number": "1000 4444 333 2222", + "address": [ + { + "postcode": 81847, + "street": "*****" + }, + { + "postcode": 91034, + "street": "*****" + } + ] +} diff --git a/examples/data_masking/src/choosing_payload_list_index.json b/examples/data_masking/src/choosing_payload_list_index.json new file mode 100644 index 00000000000..d1ca2654ef6 --- /dev/null +++ b/examples/data_masking/src/choosing_payload_list_index.json @@ -0,0 +1,15 @@ +{ + "name": "Lessa", + "operation": "non sensitive", + "card_number": "1000 4444 333 2222", + "address": [ + { + "postcode": 81847, + "street": "38986 Joanne Stravenue" + }, + { + "postcode": 91034, + "street": "14987 Avenue 1" + } + ] +} diff --git a/examples/data_masking/src/choosing_payload_list_index_output.json b/examples/data_masking/src/choosing_payload_list_index_output.json new file mode 100644 index 00000000000..f780e8d16b9 --- /dev/null +++ b/examples/data_masking/src/choosing_payload_list_index_output.json @@ -0,0 +1,16 @@ + +{ + "name": "Lessa", + "operation": "non sensitive", + "card_number": "1000 4444 333 2222", + "address": [ + { + "postcode": 81847, + "street": "38986 Joanne Stravenue" + }, + { + "postcode": 91034, + "street": "*****" + } + ] +} diff --git a/examples/data_masking/src/choosing_payload_list_slice.json b/examples/data_masking/src/choosing_payload_list_slice.json new file mode 100644 index 00000000000..ebb371b8686 --- /dev/null +++ b/examples/data_masking/src/choosing_payload_list_slice.json @@ -0,0 +1,19 @@ +{ + "name": "Lessa", + "operation": "non sensitive", + "card_number": "1000 4444 333 2222", + "address": [ + { + "postcode": 81847, + "street": "38986 Joanne Stravenue" + }, + { + "postcode": 91034, + "street": "14987 Avenue 1" + }, + { + "postcode": 78495, + "street": "34452 Avenue 10" + } + ] +} diff --git a/examples/data_masking/src/choosing_payload_list_slice_output.json b/examples/data_masking/src/choosing_payload_list_slice_output.json new file mode 100644 index 00000000000..9a81d19b654 --- /dev/null +++ b/examples/data_masking/src/choosing_payload_list_slice_output.json @@ -0,0 +1,19 @@ +{ + "name": "Lessa", + "operation": "non sensitive", + "card_number": "1000 4444 333 2222", + "address": [ + { + "postcode": 81847, + "street": "38986 Joanne Stravenue" + }, + { + "postcode": 91034, + "street": "14987 Avenue 1" + }, + { + "postcode": 78495, + "street": "*****" + } + ] +} From 4eef61ec723122bfb5c641765d0291e9bce9c168 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Wed, 20 Dec 2023 12:04:11 +0000 Subject: [PATCH 109/151] Modifying perf test --- tests/performance/data_masking/test_perf_data_masking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/performance/data_masking/test_perf_data_masking.py b/tests/performance/data_masking/test_perf_data_masking.py index 688e36c7a64..8898e12bba2 100644 --- a/tests/performance/data_masking/test_perf_data_masking.py +++ b/tests/performance/data_masking/test_perf_data_masking.py @@ -7,7 +7,7 @@ DATA_MASKING_PACKAGE = "aws_lambda_powertools.utilities._data_masking" DATA_MASKING_INIT_SLA: float = 0.002 -DATA_MASKING_NESTED_ENCRYPT_SLA: float = 0.001 +DATA_MASKING_NESTED_ENCRYPT_SLA: float = 0.01 json_blob = { "id": 1, From c284d4ab77140dce0b90ccb8515fefd8ee0194df Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Wed, 20 Dec 2023 13:32:25 +0100 Subject: [PATCH 110/151] refactor: split lambda in partial + lambda to ease maintenance --- .../utilities/_data_masking/base.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index d6bf8828736..824d16b34cf 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -1,4 +1,5 @@ from __future__ import annotations +import functools import logging from numbers import Number @@ -235,17 +236,14 @@ def _apply_action_to_fields( if not result_parse: raise DataMaskingFieldNotFoundError(f"Field or expression {field_parse} not found in {data_parsed}") - # Update the parsed data using a callback function. + # For in-place updates, json_parse accepts a callback function that receives 3 args: field_value, fields, field_name + # We create a partial callback to pre-populate known provider options (action, provider opts, enc ctx) + update_callback = functools.partial( + self._call_action, action=action, provider_options=provider_options, **encryption_context + ) + json_parse.update( - data_parsed, - lambda field_value, fields, field_name, action=action, provider_options=provider_options, encryption_context=encryption_context: self._call_action( # noqa - field_value, - fields, - field_name, - action, - provider_options, - **encryption_context, - ), + data_parsed, lambda field_value, fields, field_name: update_callback(field_value, fields, field_name) ) return data_parsed From b1da92ebf1e624c9864cdc96662895e23ec55615 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Wed, 20 Dec 2023 14:37:21 +0100 Subject: [PATCH 111/151] refactor: decryption context for exact match Signed-off-by: heitorlessa --- .../utilities/_data_masking/constants.py | 2 ++ .../provider/kms/aws_encryption_sdk.py | 24 +++++++++++-------- tests/unit/data_masking/test_kms_provider.py | 21 ++++++++++++++++ 3 files changed, 37 insertions(+), 10 deletions(-) create mode 100644 tests/unit/data_masking/test_kms_provider.py diff --git a/aws_lambda_powertools/utilities/_data_masking/constants.py b/aws_lambda_powertools/utilities/_data_masking/constants.py index b44c7a60857..70eb0baf354 100644 --- a/aws_lambda_powertools/utilities/_data_masking/constants.py +++ b/aws_lambda_powertools/utilities/_data_masking/constants.py @@ -7,3 +7,5 @@ MAX_MESSAGES_ENCRYPTED: int = 4294967296 # 2 ** 32 # Maximum number of bytes which are allowed to be encrypted under a single cached data key MAX_BYTES_ENCRYPTED: int = 9223372036854775807 # 2 ** 63 - 1 + +ENCRYPTED_DATA_KEY_CTX_KEY = "aws-crypto-public-key" diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py index 400abd4ff5e..2c6a84f91d9 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py @@ -18,6 +18,7 @@ GenerateKeyError, NotSupportedError, ) +from aws_encryption_sdk.structures import MessageHeader from aws_lambda_powertools.shared.functions import ( base64_decode, @@ -27,6 +28,7 @@ from aws_lambda_powertools.shared.user_agent import register_feature_to_botocore_session from aws_lambda_powertools.utilities._data_masking.constants import ( CACHE_CAPACITY, + ENCRYPTED_DATA_KEY_CTX_KEY, MAX_BYTES_ENCRYPTED, MAX_CACHE_AGE_SECONDS, MAX_MESSAGES_ENCRYPTED, @@ -198,6 +200,8 @@ def decrypt(self, data: str, provider_options: dict | None = None, **encryption_ ) try: + decryptor_header: MessageHeader + ciphertext, decryptor_header = self.client.decrypt( source=ciphertext_decoded, key_provider=self.key_provider, @@ -212,7 +216,7 @@ def decrypt(self, data: str, provider_options: dict | None = None, **encryption_ "Data decryption failed. Please ensure that you are attempting to decrypt data that was previously encrypted.", # noqa E501 ) - self._compare_encryption_context(encryption_context, decryptor_header) + self._compare_encryption_context(decryptor_header.encryption_context, encryption_context) decoded_ciphertext = bytes_to_string(ciphertext) @@ -230,12 +234,12 @@ def _validate_encryption_context(context: dict): ) @staticmethod - def _compare_encryption_context(context: dict, decryptor_header): - if not context: - return - - for key, value in context.items(): - if decryptor_header.encryption_context.get(key) != value: - raise DataMaskingContextMismatchError( - f"Encryption Context does not match expected value for key: {key}", - ) + def _compare_encryption_context(actual_context: dict, expected_context: dict): + # We can safely remove encrypted data key after decryption for exact match verification + actual_context.pop(ENCRYPTED_DATA_KEY_CTX_KEY, None) + + # Encryption context could be out of order hence a set + if set(actual_context.items()) != set(expected_context.items()): + raise DataMaskingContextMismatchError( + "Encryption context does not match. You must use the exact same context used during encryption", + ) diff --git a/tests/unit/data_masking/test_kms_provider.py b/tests/unit/data_masking/test_kms_provider.py new file mode 100644 index 00000000000..f9f4e31bf18 --- /dev/null +++ b/tests/unit/data_masking/test_kms_provider.py @@ -0,0 +1,21 @@ +import pytest + +from aws_lambda_powertools.utilities._data_masking.exceptions import DataMaskingContextMismatchError +from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import ( + KMSKeyProvider, +) + + +def test_encryption_context_exact_match(): + ctx = {"data_classification": "confidential", "data_type": "customer_data"} + ctx_two = {"data_type": "customer_data", "data_classification": "confidential"} + + KMSKeyProvider._compare_encryption_context(ctx, ctx_two) + + +def test_encryption_context_partial_match(): + ctx = {"data_classification": "confidential", "data_type": "customer_data"} + ctx_two = {"data_type": "customer_data"} + + with pytest.raises(DataMaskingContextMismatchError): + KMSKeyProvider._compare_encryption_context(ctx, ctx_two) From 65b3d6a3125ca0eee9b400a016e7d514adacbdba Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Wed, 20 Dec 2023 14:45:07 +0100 Subject: [PATCH 112/151] chore: test encryption ctx validation --- tests/unit/data_masking/test_kms_provider.py | 23 +++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tests/unit/data_masking/test_kms_provider.py b/tests/unit/data_masking/test_kms_provider.py index f9f4e31bf18..f8cbdf6b346 100644 --- a/tests/unit/data_masking/test_kms_provider.py +++ b/tests/unit/data_masking/test_kms_provider.py @@ -1,6 +1,9 @@ import pytest -from aws_lambda_powertools.utilities._data_masking.exceptions import DataMaskingContextMismatchError +from aws_lambda_powertools.utilities._data_masking.exceptions import ( + DataMaskingContextMismatchError, + DataMaskingUnsupportedTypeError, +) from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import ( KMSKeyProvider, ) @@ -19,3 +22,21 @@ def test_encryption_context_partial_match(): with pytest.raises(DataMaskingContextMismatchError): KMSKeyProvider._compare_encryption_context(ctx, ctx_two) + + +def test_encryption_context_supported_values(): + ctx = {"a": "b", "c": "d"} + KMSKeyProvider._validate_encryption_context(ctx) + KMSKeyProvider._validate_encryption_context({}) + + +@pytest.mark.parametrize( + "ctx", + [ + pytest.param({"a": 10, "b": True, "c": []}, id="non_string_values"), + pytest.param({"a": {"b": "c"}}, id="nested_dict"), + ], +) +def test_encryption_context_non_str_validation(ctx): + with pytest.raises(DataMaskingUnsupportedTypeError): + KMSKeyProvider._validate_encryption_context(ctx) From e0f4a3a983b6c8e999cfe9974d28089f03fef742 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Wed, 20 Dec 2023 14:50:55 +0100 Subject: [PATCH 113/151] chore: fix linting on loop variable Signed-off-by: heitorlessa --- .../utilities/_data_masking/base.py | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index 824d16b34cf..cad46474ea4 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -1,6 +1,6 @@ from __future__ import annotations -import functools +import functools import logging from numbers import Number from typing import Any, Callable, Mapping, Optional, Sequence, Union, overload @@ -222,6 +222,16 @@ def _apply_action_to_fields( data_parsed: dict = self._normalize_data_to_parse(fields, data) + # For in-place updates, json_parse accepts a callback function + # this function must receive 3 args: field_value, fields, field_name + # We create a partial callback to pre-populate known options (action, provider opts, enc ctx) + update_callback = functools.partial( + self._call_action, + action=action, + provider_options=provider_options, + **encryption_context, + ) + # Iterate over each field to be parsed. for field_parse in fields: # Parse the field expression using a 'parse' function. @@ -236,14 +246,9 @@ def _apply_action_to_fields( if not result_parse: raise DataMaskingFieldNotFoundError(f"Field or expression {field_parse} not found in {data_parsed}") - # For in-place updates, json_parse accepts a callback function that receives 3 args: field_value, fields, field_name - # We create a partial callback to pre-populate known provider options (action, provider opts, enc ctx) - update_callback = functools.partial( - self._call_action, action=action, provider_options=provider_options, **encryption_context - ) - json_parse.update( - data_parsed, lambda field_value, fields, field_name: update_callback(field_value, fields, field_name) + data_parsed, + lambda field_value, fields, field_name: update_callback(field_value, fields, field_name), ) return data_parsed From fbed1a1f47837ff8fb5cbecb3ba62d721393437e Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Wed, 20 Dec 2023 14:07:27 +0000 Subject: [PATCH 114/151] Adding complex examples + using ext ngjson --- .../utilities/_data_masking/base.py | 32 +++++--- docs/utilities/data_masking.md | 37 +++++---- .../src/choosing_payload_complex_search.json | 19 +++++ ...hoosing_payload_complex_search_output.json | 19 +++++ .../data_masking/test_aws_encryption_sdk.py | 78 +++++++++++++++++++ .../data_masking/test_unit_data_masking.py | 5 +- 6 files changed, 159 insertions(+), 31 deletions(-) create mode 100644 examples/data_masking/src/choosing_payload_complex_search.json create mode 100644 examples/data_masking/src/choosing_payload_complex_search_output.json diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/_data_masking/base.py index cad46474ea4..f4ab69d2098 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/_data_masking/base.py @@ -2,10 +2,11 @@ import functools import logging +import warnings from numbers import Number from typing import Any, Callable, Mapping, Optional, Sequence, Union, overload -from jsonpath_ng import parse +from jsonpath_ng.ext import parse from aws_lambda_powertools.utilities._data_masking.exceptions import ( DataMaskingFieldNotFoundError, @@ -47,7 +48,7 @@ def lambda_handler(event, context): def __init__( self, provider: Optional[BaseProvider] = None, - raise_on_missing_field: bool = False, + raise_on_missing_field: bool = True, ): self.provider = provider or BaseProvider() # NOTE: we depend on Provider to not confuse customers in passing the same 2 serializers in 2 places @@ -236,19 +237,30 @@ def _apply_action_to_fields( for field_parse in fields: # Parse the field expression using a 'parse' function. json_parse = parse(field_parse) + # Find the corresponding keys in the normalized data using the parsed expression. + result_parse = json_parse.find(data_parsed) - if self.raise_on_missing_field: - # Customer wants to raise exception when field is not found - # Find the corresponding data in the normalized data using the parsed expression. - result_parse = json_parse.find(data_parsed) - - # If the data for the field is not found, raise an exception. - if not result_parse: + if not result_parse: + if self.raise_on_missing_field: + # If the data for the field is not found, raise an exception. raise DataMaskingFieldNotFoundError(f"Field or expression {field_parse} not found in {data_parsed}") + else: + # If the data for the field is not found, warning. + warnings.warn(f"Field or expression {field_parse} not found in {data_parsed}", stacklevel=2) + + # For in-place updates, json_parse accepts a callback function + # that receives 3 args: field_value, fields, field_name + # We create a partial callback to pre-populate known provider options (action, provider opts, enc ctx) + update_callback = functools.partial( + self._call_action, + action=action, + provider_options=provider_options, + **encryption_context, + ) json_parse.update( data_parsed, - lambda field_value, fields, field_name: update_callback(field_value, fields, field_name), + lambda field_value, fields, field_name: update_callback(field_value, fields, field_name), # noqa: B023 ) return data_parsed diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index c89943748d3..9e80efaaf32 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -309,24 +309,6 @@ Here are common scenarios to best visualize how to use `fields`. --8<-- "examples/data_masking/src/choosing_payload_complex_nested_keys_output.json" ``` -=== "Accessing list index" - - You want to obfuscate data under `street` field located at the initial index of the address list. - - === "Data" - - > Expression: `data_masker.mask(data, fields=["address[1].street"])` - - ```json hl_lines="12" - --8<-- "examples/data_masking/src/choosing_payload_list_index.json" - ``` - - === "Result" - - ```json hl_lines="12" - --8<-- "examples/data_masking/src/choosing_payload_list_index_output.json" - ``` - === "All fields in a list" You want to obfuscate data under `street` field located at the any index of the address list. @@ -353,7 +335,7 @@ Here are common scenarios to best visualize how to use `fields`. > Expression: `data_masker.mask(data, fields=["address[-1].street"])` - ```json hl_lines="8 12 16" + ```json hl_lines="16" --8<-- "examples/data_masking/src/choosing_payload_list_slice.json" ``` @@ -363,6 +345,23 @@ Here are common scenarios to best visualize how to use `fields`. --8<-- "examples/data_masking/src/choosing_payload_list_slice_output.json" ``` +=== "Complex expressions" + + You want to obfuscate data by finding for a field with conditional expression. + + === "Data" + + > Expression: `data_masker.mask(data, fields=["$.address[?(@.postcode > 81846)]"])` + + ```json hl_lines="8 12" + --8<-- "examples/data_masking/src/choosing_payload_complex_search.json" + ``` + + === "Result" + + ```json hl_lines="8 12" + --8<-- "examples/data_masking/src/choosing_payload_complex_search_output.json" + ``` For comprehensive guidance on using JSONPath syntax, please refer to the official documentation available at [jsonpath-ng](https://github.com/h2non/jsonpath-ng#jsonpath-syntax){target="_blank" rel="nofollow"} #### JSON diff --git a/examples/data_masking/src/choosing_payload_complex_search.json b/examples/data_masking/src/choosing_payload_complex_search.json new file mode 100644 index 00000000000..ebb371b8686 --- /dev/null +++ b/examples/data_masking/src/choosing_payload_complex_search.json @@ -0,0 +1,19 @@ +{ + "name": "Lessa", + "operation": "non sensitive", + "card_number": "1000 4444 333 2222", + "address": [ + { + "postcode": 81847, + "street": "38986 Joanne Stravenue" + }, + { + "postcode": 91034, + "street": "14987 Avenue 1" + }, + { + "postcode": 78495, + "street": "34452 Avenue 10" + } + ] +} diff --git a/examples/data_masking/src/choosing_payload_complex_search_output.json b/examples/data_masking/src/choosing_payload_complex_search_output.json new file mode 100644 index 00000000000..e186d0720ed --- /dev/null +++ b/examples/data_masking/src/choosing_payload_complex_search_output.json @@ -0,0 +1,19 @@ +{ + "name": "Lessa", + "operation": "non sensitive", + "card_number": "1000 4444 333 2222", + "address": [ + { + "postcode": 81847, + "street": "*****" + }, + { + "postcode": 91034, + "street": "*****" + }, + { + "postcode": 78495, + "street": "34452 Avenue 10" + } + ] +} diff --git a/tests/functional/data_masking/test_aws_encryption_sdk.py b/tests/functional/data_masking/test_aws_encryption_sdk.py index eadee01f377..e409235be5f 100644 --- a/tests/functional/data_masking/test_aws_encryption_sdk.py +++ b/tests/functional/data_masking/test_aws_encryption_sdk.py @@ -381,3 +381,81 @@ def test_encrypt_with_complex_dict(data_masker): # THEN the result is only the specified fields are masked assert decrypted_data == json.loads(data) + + +def test_encrypt_with_slice(data_masker): + # GIVEN the data type is a json representation of a dictionary with a list inside + data = json.dumps( + { + "name": "Leandro", + "operation": "non sensitive", + "card_number": "1000 4444 333 2222", + "address": [ + { + "postcode": 81847, + "street": "38986 Joanne Stravenue", + "country": "United States", + "timezone": "America/La_Paz", + }, + { + "postcode": 94400, + "street": "623 Kraig Mall", + "country": "United States", + "timezone": "America/Mazatlan", + }, + { + "postcode": 94480, + "street": "123 Kraig Mall", + "country": "United States", + "timezone": "America/Mazatlan", + }, + ], + }, + ) + + fields_operation = ["address[-1]"] + # WHEN encrypting and then decrypting the encrypted data + encrypted_data = data_masker.encrypt(data, fields=fields_operation) + decrypted_data = data_masker.decrypt(encrypted_data, fields=fields_operation) + + # THEN the result is only the specified fields are masked + assert decrypted_data == json.loads(data) + + +def test_encrypt_with_complex_search(data_masker): + # GIVEN the data type is a json representation of a dictionary with a list inside + data = json.dumps( + { + "name": "Leandro", + "operation": "non sensitive", + "card_number": "1000 4444 333 2222", + "address": [ + { + "postcode": 81847, + "street": "38986 Joanne Stravenue", + "country": "United States", + "timezone": "America/La_Paz", + }, + { + "postcode": 94400, + "street": "623 Kraig Mall", + "country": "United States", + "timezone": "America/Mazatlan", + }, + { + "postcode": 94480, + "street": "123 Kraig Mall", + "country": "United States", + "timezone": "America/Mazatlan", + }, + ], + }, + ) + + fields_operation = ["$.address[?(@.postcode > 81847)]"] + # WHEN encrypting and then decrypting the encrypted data + encrypted_data = data_masker.encrypt(data, fields=fields_operation) + decrypted_data = data_masker.decrypt(encrypted_data, fields=["address[1:3]"]) + + # THEN the result is only the specified fields are masked + assert decrypted_data == json.loads(data) diff --git a/tests/unit/data_masking/test_unit_data_masking.py b/tests/unit/data_masking/test_unit_data_masking.py index b142cfad926..5b389f51e03 100644 --- a/tests/unit/data_masking/test_unit_data_masking.py +++ b/tests/unit/data_masking/test_unit_data_masking.py @@ -179,7 +179,7 @@ def test_parsing_nonexistent_fields_with_raise_on_missing_field(): data_masker.mask(data, ["'3'..True"]) -def test_parsing_nonexistent_fields_without_raise_on_missing_field(): +def test_parsing_nonexistent_fields_warning_on_missing_field(): # GIVEN a dict data type data_masker = DataMasking(raise_on_missing_field=False) @@ -191,7 +191,8 @@ def test_parsing_nonexistent_fields_without_raise_on_missing_field(): } # WHEN mask is called with a non-existing field - masked_json_string = data_masker.mask(data, fields=["non-existing"]) + with pytest.warns(UserWarning, match="Field or expression*"): + masked_json_string = data_masker.mask(data, fields=["non-existing"]) # THEN the "masked" payload is the same of the original assert masked_json_string == data From d89cbccb9b8a9f72907a86633b0f8d34854ff08e Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Wed, 20 Dec 2023 15:36:01 +0000 Subject: [PATCH 115/151] Adding text + increasing perf threshold --- docs/utilities/data_masking.md | 6 ++++++ tests/performance/data_masking/test_perf_data_masking.py | 2 +- tests/unit/data_masking/test_unit_data_masking.py | 9 +++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 9e80efaaf32..e7d6a1c112f 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -353,6 +353,12 @@ Here are common scenarios to best visualize how to use `fields`. > Expression: `data_masker.mask(data, fields=["$.address[?(@.postcode > 81846)]"])` + > `$`: Represents the root of the JSON structure. + + > `.address`: Selects the "address" property within the JSON structure. + + > `(@.postcode > 81846)`: Specifies the condition that elements should meet. It selects elements where the value of the `postcode` property is `greater than 81846`. + ```json hl_lines="8 12" --8<-- "examples/data_masking/src/choosing_payload_complex_search.json" ``` diff --git a/tests/performance/data_masking/test_perf_data_masking.py b/tests/performance/data_masking/test_perf_data_masking.py index 8898e12bba2..53eae937f08 100644 --- a/tests/performance/data_masking/test_perf_data_masking.py +++ b/tests/performance/data_masking/test_perf_data_masking.py @@ -7,7 +7,7 @@ DATA_MASKING_PACKAGE = "aws_lambda_powertools.utilities._data_masking" DATA_MASKING_INIT_SLA: float = 0.002 -DATA_MASKING_NESTED_ENCRYPT_SLA: float = 0.01 +DATA_MASKING_NESTED_ENCRYPT_SLA: float = 0.05 json_blob = { "id": 1, diff --git a/tests/unit/data_masking/test_unit_data_masking.py b/tests/unit/data_masking/test_unit_data_masking.py index 5b389f51e03..681b197e2dd 100644 --- a/tests/unit/data_masking/test_unit_data_masking.py +++ b/tests/unit/data_masking/test_unit_data_masking.py @@ -162,6 +162,15 @@ def test_parsing_unsupported_data_type(data_masker): data_masker.mask(42, ["this.field"]) +def test_parsing_with_empty_field(data_masker): + # GIVEN an initialization of the DataMasking class + + # WHEN attempting to pass in a list of fields with input data that is not a dict + with pytest.raises(ValueError): + # THEN the result is a TypeError + data_masker.mask(42, []) + + def test_parsing_nonexistent_fields_with_raise_on_missing_field(): # GIVEN a dict data type From 5d1687ed1dc525fafda404c16b5e8240d0432a48 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Wed, 20 Dec 2023 15:42:15 +0000 Subject: [PATCH 116/151] Making utility public --- .../utilities/_data_masking/provider/__init__.py | 5 ----- .../utilities/_data_masking/provider/kms/__init__.py | 5 ----- .../utilities/{_data_masking => data_masking}/__init__.py | 2 +- .../utilities/{_data_masking => data_masking}/base.py | 4 ++-- .../{_data_masking => data_masking}/constants.py | 0 .../{_data_masking => data_masking}/exceptions.py | 0 .../utilities/data_masking/provider/__init__.py | 5 +++++ .../{_data_masking => data_masking}/provider/base.py | 2 +- .../utilities/data_masking/provider/kms/__init__.py | 5 +++++ .../provider/kms/aws_encryption_sdk.py | 6 +++--- examples/data_masking/src/advanced_custom_serializer.py | 4 ++-- .../data_masking/src/data_masking_function_example.py | 4 ++-- examples/data_masking/src/getting_started_decrypt_data.py | 4 ++-- .../src/getting_started_decryption_context.py | 4 ++-- examples/data_masking/src/getting_started_encrypt_data.py | 4 ++-- .../src/getting_started_encryption_context.py | 4 ++-- examples/data_masking/src/getting_started_mask_data.py | 2 +- tests/e2e/data_masking/handlers/basic_handler.py | 4 ++-- tests/e2e/data_masking/test_e2e_data_masking.py | 6 +++--- tests/functional/data_masking/test_aws_encryption_sdk.py | 8 ++++---- .../pt-load-test-stack/function_1024/app.py | 4 ++-- .../pt-load-test-stack/function_128/app.py | 4 ++-- .../pt-load-test-stack/function_1769/app.py | 4 ++-- tests/performance/data_masking/test_perf_data_masking.py | 4 ++-- tests/unit/data_masking/test_kms_provider.py | 4 ++-- tests/unit/data_masking/test_unit_data_masking.py | 6 +++--- 26 files changed, 52 insertions(+), 52 deletions(-) delete mode 100644 aws_lambda_powertools/utilities/_data_masking/provider/__init__.py delete mode 100644 aws_lambda_powertools/utilities/_data_masking/provider/kms/__init__.py rename aws_lambda_powertools/utilities/{_data_masking => data_masking}/__init__.py (81%) rename aws_lambda_powertools/utilities/{_data_masking => data_masking}/base.py (98%) rename aws_lambda_powertools/utilities/{_data_masking => data_masking}/constants.py (100%) rename aws_lambda_powertools/utilities/{_data_masking => data_masking}/exceptions.py (100%) create mode 100644 aws_lambda_powertools/utilities/data_masking/provider/__init__.py rename aws_lambda_powertools/utilities/{_data_masking => data_masking}/provider/base.py (96%) create mode 100644 aws_lambda_powertools/utilities/data_masking/provider/kms/__init__.py rename aws_lambda_powertools/utilities/{_data_masking => data_masking}/provider/kms/aws_encryption_sdk.py (97%) diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/__init__.py b/aws_lambda_powertools/utilities/_data_masking/provider/__init__.py deleted file mode 100644 index 7ee07f964b1..00000000000 --- a/aws_lambda_powertools/utilities/_data_masking/provider/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from aws_lambda_powertools.utilities._data_masking.provider.base import BaseProvider - -__all__ = [ - "BaseProvider", -] diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/__init__.py b/aws_lambda_powertools/utilities/_data_masking/provider/kms/__init__.py deleted file mode 100644 index dfcf1f163ff..00000000000 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider - -__all__ = [ - "AWSEncryptionSDKProvider", -] diff --git a/aws_lambda_powertools/utilities/_data_masking/__init__.py b/aws_lambda_powertools/utilities/data_masking/__init__.py similarity index 81% rename from aws_lambda_powertools/utilities/_data_masking/__init__.py rename to aws_lambda_powertools/utilities/data_masking/__init__.py index 806c856ba75..4d767e83ce1 100644 --- a/aws_lambda_powertools/utilities/_data_masking/__init__.py +++ b/aws_lambda_powertools/utilities/data_masking/__init__.py @@ -4,7 +4,7 @@ Keep in mind that when we transition to General Availability (GA), there might be breaking changes introduced. """ -from aws_lambda_powertools.utilities._data_masking.base import DataMasking +from aws_lambda_powertools.utilities.data_masking.base import DataMasking __all__ = [ "DataMasking", diff --git a/aws_lambda_powertools/utilities/_data_masking/base.py b/aws_lambda_powertools/utilities/data_masking/base.py similarity index 98% rename from aws_lambda_powertools/utilities/_data_masking/base.py rename to aws_lambda_powertools/utilities/data_masking/base.py index f4ab69d2098..8c43800769e 100644 --- a/aws_lambda_powertools/utilities/_data_masking/base.py +++ b/aws_lambda_powertools/utilities/data_masking/base.py @@ -8,11 +8,11 @@ from jsonpath_ng.ext import parse -from aws_lambda_powertools.utilities._data_masking.exceptions import ( +from aws_lambda_powertools.utilities.data_masking.exceptions import ( DataMaskingFieldNotFoundError, DataMaskingUnsupportedTypeError, ) -from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider +from aws_lambda_powertools.utilities.data_masking.provider import BaseProvider logger = logging.getLogger(__name__) diff --git a/aws_lambda_powertools/utilities/_data_masking/constants.py b/aws_lambda_powertools/utilities/data_masking/constants.py similarity index 100% rename from aws_lambda_powertools/utilities/_data_masking/constants.py rename to aws_lambda_powertools/utilities/data_masking/constants.py diff --git a/aws_lambda_powertools/utilities/_data_masking/exceptions.py b/aws_lambda_powertools/utilities/data_masking/exceptions.py similarity index 100% rename from aws_lambda_powertools/utilities/_data_masking/exceptions.py rename to aws_lambda_powertools/utilities/data_masking/exceptions.py diff --git a/aws_lambda_powertools/utilities/data_masking/provider/__init__.py b/aws_lambda_powertools/utilities/data_masking/provider/__init__.py new file mode 100644 index 00000000000..5a0180eb82b --- /dev/null +++ b/aws_lambda_powertools/utilities/data_masking/provider/__init__.py @@ -0,0 +1,5 @@ +from aws_lambda_powertools.utilities.data_masking.provider.base import BaseProvider + +__all__ = [ + "BaseProvider", +] diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/base.py b/aws_lambda_powertools/utilities/data_masking/provider/base.py similarity index 96% rename from aws_lambda_powertools/utilities/_data_masking/provider/base.py rename to aws_lambda_powertools/utilities/data_masking/provider/base.py index 1a4d4dd65cc..23958f90eba 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/base.py +++ b/aws_lambda_powertools/utilities/data_masking/provider/base.py @@ -4,7 +4,7 @@ import json from typing import Any, Callable, Iterable -from aws_lambda_powertools.utilities._data_masking.constants import DATA_MASKING_STRING +from aws_lambda_powertools.utilities.data_masking.constants import DATA_MASKING_STRING class BaseProvider: diff --git a/aws_lambda_powertools/utilities/data_masking/provider/kms/__init__.py b/aws_lambda_powertools/utilities/data_masking/provider/kms/__init__.py new file mode 100644 index 00000000000..c1353094144 --- /dev/null +++ b/aws_lambda_powertools/utilities/data_masking/provider/kms/__init__.py @@ -0,0 +1,5 @@ +from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider + +__all__ = [ + "AWSEncryptionSDKProvider", +] diff --git a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/data_masking/provider/kms/aws_encryption_sdk.py similarity index 97% rename from aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py rename to aws_lambda_powertools/utilities/data_masking/provider/kms/aws_encryption_sdk.py index 2c6a84f91d9..7a992aae248 100644 --- a/aws_lambda_powertools/utilities/_data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/data_masking/provider/kms/aws_encryption_sdk.py @@ -26,21 +26,21 @@ bytes_to_string, ) from aws_lambda_powertools.shared.user_agent import register_feature_to_botocore_session -from aws_lambda_powertools.utilities._data_masking.constants import ( +from aws_lambda_powertools.utilities.data_masking.constants import ( CACHE_CAPACITY, ENCRYPTED_DATA_KEY_CTX_KEY, MAX_BYTES_ENCRYPTED, MAX_CACHE_AGE_SECONDS, MAX_MESSAGES_ENCRYPTED, ) -from aws_lambda_powertools.utilities._data_masking.exceptions import ( +from aws_lambda_powertools.utilities.data_masking.exceptions import ( DataMaskingContextMismatchError, DataMaskingDecryptKeyError, DataMaskingDecryptValueError, DataMaskingEncryptKeyError, DataMaskingUnsupportedTypeError, ) -from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider +from aws_lambda_powertools.utilities.data_masking.provider import BaseProvider logger = logging.getLogger(__name__) diff --git a/examples/data_masking/src/advanced_custom_serializer.py b/examples/data_masking/src/advanced_custom_serializer.py index 2c5a5025f78..f870624bccb 100644 --- a/examples/data_masking/src/advanced_custom_serializer.py +++ b/examples/data_masking/src/advanced_custom_serializer.py @@ -4,8 +4,8 @@ import ujson -from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import ( +from aws_lambda_powertools.utilities.data_masking import DataMasking +from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import ( AWSEncryptionSDKProvider, ) from aws_lambda_powertools.utilities.typing import LambdaContext diff --git a/examples/data_masking/src/data_masking_function_example.py b/examples/data_masking/src/data_masking_function_example.py index b25449d8ed4..bb9a1ed5f06 100644 --- a/examples/data_masking/src/data_masking_function_example.py +++ b/examples/data_masking/src/data_masking_function_example.py @@ -3,8 +3,8 @@ import os from aws_lambda_powertools import Logger, Tracer -from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider +from aws_lambda_powertools.utilities.data_masking import DataMasking +from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider from aws_lambda_powertools.utilities.typing import LambdaContext KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") diff --git a/examples/data_masking/src/getting_started_decrypt_data.py b/examples/data_masking/src/getting_started_decrypt_data.py index cf7b58bb3f5..eaab64097c1 100644 --- a/examples/data_masking/src/getting_started_decrypt_data.py +++ b/examples/data_masking/src/getting_started_decrypt_data.py @@ -3,8 +3,8 @@ import os from aws_lambda_powertools import Logger -from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider +from aws_lambda_powertools.utilities.data_masking import DataMasking +from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider from aws_lambda_powertools.utilities.typing import LambdaContext KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") # (1)! diff --git a/examples/data_masking/src/getting_started_decryption_context.py b/examples/data_masking/src/getting_started_decryption_context.py index f0360403dd5..fd6540ebac8 100644 --- a/examples/data_masking/src/getting_started_decryption_context.py +++ b/examples/data_masking/src/getting_started_decryption_context.py @@ -3,8 +3,8 @@ import os from aws_lambda_powertools import Logger -from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider +from aws_lambda_powertools.utilities.data_masking import DataMasking +from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider from aws_lambda_powertools.utilities.typing import LambdaContext KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py index d6307a196fe..aeba96a8395 100644 --- a/examples/data_masking/src/getting_started_encrypt_data.py +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -3,8 +3,8 @@ import os from aws_lambda_powertools import Logger -from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import ( +from aws_lambda_powertools.utilities.data_masking import DataMasking +from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import ( AWSEncryptionSDKProvider, ) from aws_lambda_powertools.utilities.typing import LambdaContext diff --git a/examples/data_masking/src/getting_started_encryption_context.py b/examples/data_masking/src/getting_started_encryption_context.py index 4e30301a11e..1cf6ce882a4 100644 --- a/examples/data_masking/src/getting_started_encryption_context.py +++ b/examples/data_masking/src/getting_started_encryption_context.py @@ -3,8 +3,8 @@ import os from aws_lambda_powertools import Logger -from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider +from aws_lambda_powertools.utilities.data_masking import DataMasking +from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider from aws_lambda_powertools.utilities.typing import LambdaContext KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") diff --git a/examples/data_masking/src/getting_started_mask_data.py b/examples/data_masking/src/getting_started_mask_data.py index dc9526253a0..37dfd7b4dab 100644 --- a/examples/data_masking/src/getting_started_mask_data.py +++ b/examples/data_masking/src/getting_started_mask_data.py @@ -1,7 +1,7 @@ from __future__ import annotations from aws_lambda_powertools import Logger -from aws_lambda_powertools.utilities._data_masking import DataMasking +from aws_lambda_powertools.utilities.data_masking import DataMasking from aws_lambda_powertools.utilities.typing import LambdaContext logger = Logger() diff --git a/tests/e2e/data_masking/handlers/basic_handler.py b/tests/e2e/data_masking/handlers/basic_handler.py index 2ad3b379436..6f696391822 100644 --- a/tests/e2e/data_masking/handlers/basic_handler.py +++ b/tests/e2e/data_masking/handlers/basic_handler.py @@ -1,6 +1,6 @@ from aws_lambda_powertools import Logger -from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider +from aws_lambda_powertools.utilities.data_masking import DataMasking +from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider logger = Logger() diff --git a/tests/e2e/data_masking/test_e2e_data_masking.py b/tests/e2e/data_masking/test_e2e_data_masking.py index b5e5203b4ce..5664858d5d8 100644 --- a/tests/e2e/data_masking/test_e2e_data_masking.py +++ b/tests/e2e/data_masking/test_e2e_data_masking.py @@ -4,9 +4,9 @@ import pytest from aws_encryption_sdk.exceptions import DecryptKeyError -from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.exceptions import DataMaskingContextMismatchError -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import ( +from aws_lambda_powertools.utilities.data_masking import DataMasking +from aws_lambda_powertools.utilities.data_masking.exceptions import DataMaskingContextMismatchError +from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import ( AWSEncryptionSDKProvider, ) from tests.e2e.utils import data_fetcher diff --git a/tests/functional/data_masking/test_aws_encryption_sdk.py b/tests/functional/data_masking/test_aws_encryption_sdk.py index e409235be5f..10b09894b80 100644 --- a/tests/functional/data_masking/test_aws_encryption_sdk.py +++ b/tests/functional/data_masking/test_aws_encryption_sdk.py @@ -7,10 +7,10 @@ import pytest -from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.constants import DATA_MASKING_STRING -from aws_lambda_powertools.utilities._data_masking.provider import BaseProvider -from aws_lambda_powertools.utilities._data_masking.provider.kms import ( +from aws_lambda_powertools.utilities.data_masking import DataMasking +from aws_lambda_powertools.utilities.data_masking.constants import DATA_MASKING_STRING +from aws_lambda_powertools.utilities.data_masking.provider import BaseProvider +from aws_lambda_powertools.utilities.data_masking.provider.kms import ( AWSEncryptionSDKProvider, ) diff --git a/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_1024/app.py b/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_1024/app.py index 33b9673154a..76081b20392 100644 --- a/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_1024/app.py +++ b/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_1024/app.py @@ -3,8 +3,8 @@ from aws_lambda_powertools import Logger, Tracer from aws_lambda_powertools.event_handler import APIGatewayRestResolver from aws_lambda_powertools.logging import correlation_paths -from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider +from aws_lambda_powertools.utilities.data_masking import DataMasking +from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider from aws_lambda_powertools.utilities.typing import LambdaContext KMS_KEY_ARN = os.environ["KMS_KEY_ARN"] diff --git a/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_128/app.py b/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_128/app.py index 81fffefb2ee..b191ade241a 100644 --- a/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_128/app.py +++ b/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_128/app.py @@ -3,8 +3,8 @@ from aws_lambda_powertools import Logger, Tracer from aws_lambda_powertools.event_handler import APIGatewayRestResolver from aws_lambda_powertools.logging import correlation_paths -from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider +from aws_lambda_powertools.utilities.data_masking import DataMasking +from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider from aws_lambda_powertools.utilities.typing import LambdaContext KMS_KEY_ARN = os.environ["KMS_KEY_ARN"] diff --git a/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_1769/app.py b/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_1769/app.py index 3fbe0d60116..19d287e6011 100644 --- a/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_1769/app.py +++ b/tests/performance/data_masking/load_test_data_masking/pt-load-test-stack/function_1769/app.py @@ -3,8 +3,8 @@ from aws_lambda_powertools import Logger, Tracer from aws_lambda_powertools.event_handler import APIGatewayRestResolver from aws_lambda_powertools.logging import correlation_paths -from aws_lambda_powertools.utilities._data_masking import DataMasking -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider +from aws_lambda_powertools.utilities.data_masking import DataMasking +from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider from aws_lambda_powertools.utilities.typing import LambdaContext KMS_KEY_ARN = os.environ["KMS_KEY_ARN"] diff --git a/tests/performance/data_masking/test_perf_data_masking.py b/tests/performance/data_masking/test_perf_data_masking.py index 53eae937f08..9fae80b3a00 100644 --- a/tests/performance/data_masking/test_perf_data_masking.py +++ b/tests/performance/data_masking/test_perf_data_masking.py @@ -3,9 +3,9 @@ import pytest -from aws_lambda_powertools.utilities._data_masking.base import DataMasking +from aws_lambda_powertools.utilities.data_masking.base import DataMasking -DATA_MASKING_PACKAGE = "aws_lambda_powertools.utilities._data_masking" +DATA_MASKING_PACKAGE = "aws_lambda_powertools.utilities.data_masking" DATA_MASKING_INIT_SLA: float = 0.002 DATA_MASKING_NESTED_ENCRYPT_SLA: float = 0.05 diff --git a/tests/unit/data_masking/test_kms_provider.py b/tests/unit/data_masking/test_kms_provider.py index f8cbdf6b346..5fe9b2e53ed 100644 --- a/tests/unit/data_masking/test_kms_provider.py +++ b/tests/unit/data_masking/test_kms_provider.py @@ -1,10 +1,10 @@ import pytest -from aws_lambda_powertools.utilities._data_masking.exceptions import ( +from aws_lambda_powertools.utilities.data_masking.exceptions import ( DataMaskingContextMismatchError, DataMaskingUnsupportedTypeError, ) -from aws_lambda_powertools.utilities._data_masking.provider.kms.aws_encryption_sdk import ( +from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import ( KMSKeyProvider, ) diff --git a/tests/unit/data_masking/test_unit_data_masking.py b/tests/unit/data_masking/test_unit_data_masking.py index 681b197e2dd..552abab4161 100644 --- a/tests/unit/data_masking/test_unit_data_masking.py +++ b/tests/unit/data_masking/test_unit_data_masking.py @@ -2,9 +2,9 @@ import pytest -from aws_lambda_powertools.utilities._data_masking.base import DataMasking -from aws_lambda_powertools.utilities._data_masking.constants import DATA_MASKING_STRING -from aws_lambda_powertools.utilities._data_masking.exceptions import ( +from aws_lambda_powertools.utilities.data_masking.base import DataMasking +from aws_lambda_powertools.utilities.data_masking.constants import DATA_MASKING_STRING +from aws_lambda_powertools.utilities.data_masking.exceptions import ( DataMaskingFieldNotFoundError, DataMaskingUnsupportedTypeError, ) From b0213a10cd97ef2dfc28cec9e28cb8c0d63427ad Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Wed, 20 Dec 2023 17:00:37 +0000 Subject: [PATCH 117/151] Adding chaging algorithm section --- docs/utilities/data_masking.md | 10 ++++++ .../src/changing_default_algorithm.py | 34 +++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 examples/data_masking/src/changing_default_algorithm.py diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index e7d6a1c112f..f839e71ab17 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -425,6 +425,16 @@ You can modify the following values when initializing the `AWSEncryptionSDKProvi | **max_messages_encrypted** | | `4294967296` | The maximum number of messages that may be encrypted under a cache entry | | **max_bytes_encrypted** | | `9223372036854775807` | The maximum number of bytes that may be encrypted under a cache entry | +**Changing the default algorithm** + +The AWS Encryption SDK defaults to using the `AES_256_GCM_HKDF_SHA512_COMMIT_KEY_ECDSA_P384` algorithm for encrypting your Data Key. If you want, you have the flexibility to customize and choose a different encryption algorithm. + +=== "changing_default_algorithm.py" + + ```python hl_lines="5 29" + --8<-- "examples/data_masking/src/changing_default_algorithm.py" + ``` + #### Creating your own provider !!! info "In Q1 2024, we plan to add support for bringing your own encryption provider." diff --git a/examples/data_masking/src/changing_default_algorithm.py b/examples/data_masking/src/changing_default_algorithm.py new file mode 100644 index 00000000000..5fa9e41a16c --- /dev/null +++ b/examples/data_masking/src/changing_default_algorithm.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +import os + +from aws_encryption_sdk.identifiers import Algorithm + +from aws_lambda_powertools import Logger +from aws_lambda_powertools.utilities.data_masking import DataMasking +from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import AWSEncryptionSDKProvider +from aws_lambda_powertools.utilities.typing import LambdaContext + +KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") + +encryption_provider = AWSEncryptionSDKProvider(keys=[KMS_KEY_ARN]) +data_masker = DataMasking(provider=encryption_provider) + +logger = Logger() + + +@logger.inject_lambda_context +def lambda_handler(event: dict, context: LambdaContext) -> dict: + data: dict = event.get("body", {}) + + logger.info("Encrypting fields email, address.street, and company_address with a different algorithm") + + provider_options = {"algorithm": Algorithm.AES_256_GCM_HKDF_SHA512_COMMIT_KEY} + + decrypted = data_masker.encrypt( + data, + fields=["email", "address.street", "company_address"], + provider_options=provider_options, + ) + + return decrypted From 95bc7a17e20066440f8cbbda4f6881ceae079be2 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Wed, 20 Dec 2023 19:32:44 +0100 Subject: [PATCH 118/151] docs: address initial feedback --- docs/utilities/data_masking.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index f839e71ab17..0110cddb57e 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -17,7 +17,7 @@ stateDiagram-v2 Mask: Mask Encrypt: Encrypt Decrypt: Decrypt - Provider: Encryption provider + Provider: AWS Encryption SDK provider Result: Data transformed (masked, encrypted, or decrypted) LambdaFn --> DataMasking From ace1c28dd89f323fb4673648edaf072160bc1148 Mon Sep 17 00:00:00 2001 From: heitorlessa Date: Wed, 20 Dec 2023 19:42:29 +0100 Subject: [PATCH 119/151] docs: re-incorporate initial feedback --- docs/utilities/data_masking.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 0110cddb57e..2c33b2b3091 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -435,10 +435,6 @@ The AWS Encryption SDK defaults to using the `AES_256_GCM_HKDF_SHA512_COMMIT_KEY --8<-- "examples/data_masking/src/changing_default_algorithm.py" ``` -#### Creating your own provider - -!!! info "In Q1 2024, we plan to add support for bringing your own encryption provider." - ### Data masking request flow The following sequence diagrams explain how `DataMasking` behaves under different scenarios. From 429eb8a30ecd5495226621f40d90208cb6e8aa4d Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Wed, 20 Dec 2023 19:43:40 +0000 Subject: [PATCH 120/151] Adding test --- docs/utilities/data_masking.md | 15 ++++++---- examples/data_masking/tests/lambda_mask.py | 14 +++++++++ .../tests/test_data_masking_single_mock.py | 10 ------- .../tests/test_data_masking_with_fixture.py | 16 ---------- .../data_masking/tests/test_lambda_mask.py | 30 +++++++++++++++++++ 5 files changed, 53 insertions(+), 32 deletions(-) create mode 100644 examples/data_masking/tests/lambda_mask.py delete mode 100644 examples/data_masking/tests/test_data_masking_single_mock.py delete mode 100644 examples/data_masking/tests/test_data_masking_with_fixture.py create mode 100644 examples/data_masking/tests/test_lambda_mask.py diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 2c33b2b3091..4a265126ddc 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -572,13 +572,16 @@ sequenceDiagram ## Testing your code -!!! danger "TODO - Refactor with a fake" +### Testing mask operation -For unit testing your applications, you can mock the calls to the data masking utility to avoid calling AWS APIs. This can be achieved in a number of ways - in this example, we use the pytest monkeypatch fixture to patch the `data_masking.decrypt` method. +Testing your code with a simple mask operation -If we need to use this pattern across multiple tests, we can avoid repetition by refactoring to use our own pytest fixture: +=== "test_lambda_mask.py" + ```python hl_lines="22" + --8<-- "examples/data_masking/tests/test_lambda_mask.py" + ``` -=== "test_with_fixture.py" - ```python hl_lines="5 10" - --8<-- "examples/data_masking/tests/test_data_masking_with_fixture.py" +=== "lambda_mask.py" + ```python hl_lines="3 12" + --8<-- "examples/data_masking/tests/lambda_mask.py" ``` diff --git a/examples/data_masking/tests/lambda_mask.py b/examples/data_masking/tests/lambda_mask.py new file mode 100644 index 00000000000..64833464b37 --- /dev/null +++ b/examples/data_masking/tests/lambda_mask.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +from aws_lambda_powertools.utilities.data_masking import DataMasking +from aws_lambda_powertools.utilities.typing import LambdaContext + +data_masker = DataMasking() + + +def lambda_handler(event: dict, context: LambdaContext) -> dict: + data = event + + masked = data_masker.mask(data, fields=["testkey"]) + + return masked diff --git a/examples/data_masking/tests/test_data_masking_single_mock.py b/examples/data_masking/tests/test_data_masking_single_mock.py deleted file mode 100644 index 46265309c25..00000000000 --- a/examples/data_masking/tests/test_data_masking_single_mock.py +++ /dev/null @@ -1,10 +0,0 @@ -from src import single_mock - - -def test_handler(monkeypatch): - def mockreturn(name): - return "mock_value" - - monkeypatch.setattr(single_mock.DataMasking, "decrypt", mockreturn) - return_val = single_mock.handler({}, {}) - assert return_val.get("message") == "mock_value" diff --git a/examples/data_masking/tests/test_data_masking_with_fixture.py b/examples/data_masking/tests/test_data_masking_with_fixture.py deleted file mode 100644 index 57c39b0af39..00000000000 --- a/examples/data_masking/tests/test_data_masking_with_fixture.py +++ /dev/null @@ -1,16 +0,0 @@ -import pytest -from src import single_mock - - -@pytest.fixture -def mock_data_masking_response(monkeypatch): - def mockreturn(name): - return "mock_value" - - monkeypatch.setattr(single_mock.DataMasking, "decrypt", mockreturn) - - -# Pass our fixture as an argument to all tests where we want to mock the decrypt response -def test_handler(mock_data_masking_response): - return_val = single_mock.handler({}, {}) - assert return_val.get("message") == "mock_value" diff --git a/examples/data_masking/tests/test_lambda_mask.py b/examples/data_masking/tests/test_lambda_mask.py new file mode 100644 index 00000000000..47e0f09cfde --- /dev/null +++ b/examples/data_masking/tests/test_lambda_mask.py @@ -0,0 +1,30 @@ +from dataclasses import dataclass + +import pytest +import test_lambda_mask + + +@pytest.fixture +def lambda_context(): + @dataclass + class LambdaContext: + function_name: str = "test" + memory_limit_in_mb: int = 128 + invoked_function_arn: str = "arn:aws:lambda:eu-west-1:809313241:function:test" + aws_request_id: str = "52fdfc07-2182-154f-163f-5f0f9a621d72" + + def get_remaining_time_in_millis(self) -> int: + return 5 + + return LambdaContext() + + +def test_encrypt_lambda(lambda_context): + # GIVEN: A sample event for testing + event = {"testkey": "testvalue"} + + # WHEN: Invoking the lambda_handler function with the sample event and Lambda context + result = test_lambda_mask.lambda_handler(event, lambda_context) + + # THEN: Assert that the result matches the expected output + assert result == {"testkey": "*****"} From 85766bf8e8dd9b334bea7bae15ca3e62a18d7966 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Tue, 23 Jan 2024 10:34:42 -0800 Subject: [PATCH 121/151] Fixed docstrings, added a test --- .../utilities/data_masking/base.py | 6 +++-- .../provider/kms/aws_encryption_sdk.py | 4 +++- .../data_masking/test_aws_encryption_sdk.py | 22 +++++++++++++++++++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/aws_lambda_powertools/utilities/data_masking/base.py b/aws_lambda_powertools/utilities/data_masking/base.py index 8c43800769e..da086de9d1e 100644 --- a/aws_lambda_powertools/utilities/data_masking/base.py +++ b/aws_lambda_powertools/utilities/data_masking/base.py @@ -194,8 +194,10 @@ def _apply_action_to_fields( value of the field as the first argument and any additional arguments that might be required for the action. It performs an operation on the current value using the provided arguments and returns the modified value. - **provider_options: - Additional keyword arguments to pass to the 'action' function. + provider_options : dict + Optional dictionary representing additional options for the action. + **encryption_context: str + Additional keyword arguments collected into a dictionary. Returns ------- diff --git a/aws_lambda_powertools/utilities/data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/data_masking/provider/kms/aws_encryption_sdk.py index 7a992aae248..ba9aeb4c9a1 100644 --- a/aws_lambda_powertools/utilities/data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/data_masking/provider/kms/aws_encryption_sdk.py @@ -146,8 +146,10 @@ def encrypt(self, data: Any, provider_options: dict | None = None, **encryption_ ------- data : Union[bytes, str] The data to be encrypted. - provider_options + provider_options : dict Additional options for the aws_encryption_sdk.EncryptionSDKClient + **encryption_context : str + Additional keyword arguments collected into a dictionary. Returns ------- diff --git a/tests/functional/data_masking/test_aws_encryption_sdk.py b/tests/functional/data_masking/test_aws_encryption_sdk.py index 10b09894b80..7dc594b2db8 100644 --- a/tests/functional/data_masking/test_aws_encryption_sdk.py +++ b/tests/functional/data_masking/test_aws_encryption_sdk.py @@ -7,6 +7,8 @@ import pytest +from aws_encryption_sdk.identifiers import Algorithm + from aws_lambda_powertools.utilities.data_masking import DataMasking from aws_lambda_powertools.utilities.data_masking.constants import DATA_MASKING_STRING from aws_lambda_powertools.utilities.data_masking.provider import BaseProvider @@ -459,3 +461,23 @@ def test_encrypt_with_complex_search(data_masker): # THEN the result is only the specified fields are masked assert decrypted_data == json.loads(data) + +def test_encrypt_with_provider_options(data_masker): + # GIVEN the data type is a json representation of a dictionary with a list inside + data = json.dumps( + { + "payload": { + "first": ["value1", "value2"], + "second": (0, 1), + }, + }, + ) + + fields_operation = ["payload.first[0]", "payload.second[0]"] + provider_options = {"algorithm": Algorithm.AES_256_GCM_HKDF_SHA512_COMMIT_KEY} + # WHEN encrypting and then decrypting the encrypted data + encrypted_data = data_masker.encrypt(data, fields=fields_operation, provider_options=provider_options) + decrypted_data = data_masker.decrypt(encrypted_data, fields=fields_operation) + + # THEN the result is only the specified fields are masked + assert decrypted_data == json.loads(data) From 95098e5987f4232721fd898b762f71e07955a360 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Mon, 29 Jan 2024 11:48:55 -0800 Subject: [PATCH 122/151] Removed fields param from enc+dec methods --- .../utilities/data_masking/base.py | 16 ++-- .../utilities/data_masking/provider/base.py | 10 +-- .../e2e/data_masking/test_e2e_data_masking.py | 2 +- .../data_masking/test_aws_encryption_sdk.py | 79 ++++++++----------- .../data_masking/test_unit_data_masking.py | 26 +++--- 5 files changed, 62 insertions(+), 71 deletions(-) diff --git a/aws_lambda_powertools/utilities/data_masking/base.py b/aws_lambda_powertools/utilities/data_masking/base.py index da086de9d1e..91b60f36a30 100644 --- a/aws_lambda_powertools/utilities/data_masking/base.py +++ b/aws_lambda_powertools/utilities/data_masking/base.py @@ -38,7 +38,7 @@ def lambda_handler(event, context): "sensitive": "password" } - masked = masker.mask(data,fields=["sensitive"]) + masked = masker.erase(data,fields=["sensitive"]) return masked @@ -60,7 +60,7 @@ def __init__( def encrypt( self, data: dict, - fields: list[str], + fields: None = None, provider_options: dict | None = None, **encryption_context: str, ) -> dict: @@ -107,23 +107,23 @@ def decrypt( ) @overload - def mask(self, data, fields: None) -> str: + def erase(self, data, fields: None) -> str: ... @overload - def mask(self, data: list, fields: list[str]) -> list[str]: + def erase(self, data: list, fields: list[str]) -> list[str]: ... @overload - def mask(self, data: tuple, fields: list[str]) -> tuple[str]: + def erase(self, data: tuple, fields: list[str]) -> tuple[str]: ... @overload - def mask(self, data: dict, fields: list[str]) -> dict: + def erase(self, data: dict, fields: list[str]) -> dict: ... - def mask(self, data: Sequence | Mapping, fields: list[str] | None = None) -> str | list[str] | tuple[str] | dict: - return self._apply_action(data=data, fields=fields, action=self.provider.mask) + def erase(self, data: Sequence | Mapping, fields: list[str] | None = None) -> str | list[str] | tuple[str] | dict: + return self._apply_action(data=data, fields=fields, action=self.provider.erase) def _apply_action( self, diff --git a/aws_lambda_powertools/utilities/data_masking/provider/base.py b/aws_lambda_powertools/utilities/data_masking/provider/base.py index 23958f90eba..1f894c9c169 100644 --- a/aws_lambda_powertools/utilities/data_masking/provider/base.py +++ b/aws_lambda_powertools/utilities/data_masking/provider/base.py @@ -24,7 +24,7 @@ def encrypt(self, data) -> str: def decrypt(self, data) -> Any: # Implementation logic for data decryption - def mask(self, data) -> Union[str, Iterable]: + def erase(self, data) -> Union[str, Iterable]: # Implementation logic for data masking pass @@ -63,14 +63,14 @@ def decrypt(self, data, provider_options: dict | None = None, **encryption_conte """ raise NotImplementedError("Subclasses must implement decrypt()") - def mask(self, data, **kwargs) -> Iterable[str]: + def erase(self, data, **kwargs) -> Iterable[str]: """ - This method irreversibly masks data. + This method irreversibly erases data. - If the data to be masked is of type `str`, `dict`, or `bytes`, + If the data to be erased is of type `str`, `dict`, or `bytes`, this method will return a masked string, i.e. "*****". - If the data to be masked is of an iterable type like `list`, `tuple`, + If the data to be erased is of an iterable type like `list`, `tuple`, or `set`, this method will return a new object of the same type as the input data but with each element replaced by the string "*****". """ diff --git a/tests/e2e/data_masking/test_e2e_data_masking.py b/tests/e2e/data_masking/test_e2e_data_masking.py index 5664858d5d8..dfa8504a3c9 100644 --- a/tests/e2e/data_masking/test_e2e_data_masking.py +++ b/tests/e2e/data_masking/test_e2e_data_masking.py @@ -11,7 +11,7 @@ ) from tests.e2e.utils import data_fetcher -pytest.skip(reason="Data masking tests disabled until we go GA.", allow_module_level=True) +# pytest.skip(reason="Data masking tests disabled until we go GA.", allow_module_level=True) @pytest.fixture diff --git a/tests/functional/data_masking/test_aws_encryption_sdk.py b/tests/functional/data_masking/test_aws_encryption_sdk.py index 7dc594b2db8..cda6437f67f 100644 --- a/tests/functional/data_masking/test_aws_encryption_sdk.py +++ b/tests/functional/data_masking/test_aws_encryption_sdk.py @@ -51,7 +51,7 @@ def test_mask_int(data_masker): # GIVEN an int data type # WHEN mask is called with no fields argument - masked_string = data_masker.mask(42) + masked_string = data_masker.erase(42) # THEN the result is the data masked assert masked_string == DATA_MASKING_STRING @@ -61,7 +61,7 @@ def test_mask_float(data_masker): # GIVEN a float data type # WHEN mask is called with no fields argument - masked_string = data_masker.mask(4.2) + masked_string = data_masker.erase(4.2) # THEN the result is the data masked assert masked_string == DATA_MASKING_STRING @@ -71,7 +71,7 @@ def test_mask_bool(data_masker): # GIVEN a bool data type # WHEN mask is called with no fields argument - masked_string = data_masker.mask(True) + masked_string = data_masker.erase(True) # THEN the result is the data masked assert masked_string == DATA_MASKING_STRING @@ -81,7 +81,7 @@ def test_mask_none(data_masker): # GIVEN a None data type # WHEN mask is called with no fields argument - masked_string = data_masker.mask(None) + masked_string = data_masker.erase(None) # THEN the result is the data masked assert masked_string == DATA_MASKING_STRING @@ -91,7 +91,7 @@ def test_mask_str(data_masker): # GIVEN a str data type # WHEN mask is called with no fields argument - masked_string = data_masker.mask("this is a string") + masked_string = data_masker.erase("this is a string") # THEN the result is the data masked assert masked_string == DATA_MASKING_STRING @@ -101,7 +101,7 @@ def test_mask_list(data_masker): # GIVEN a list data type # WHEN mask is called with no fields argument - masked_string = data_masker.mask([1, 2, "string", 3]) + masked_string = data_masker.erase([1, 2, "string", 3]) # THEN the result is the data masked, while maintaining type list assert masked_string == [DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING] @@ -117,7 +117,7 @@ def test_mask_dict(data_masker): } # WHEN mask is called with no fields argument - masked_string = data_masker.mask(data) + masked_string = data_masker.erase(data) # THEN the result is the data masked assert masked_string == DATA_MASKING_STRING @@ -133,7 +133,7 @@ def test_mask_dict_with_fields(data_masker): } # WHEN mask is called with a list of fields specified - masked_string = data_masker.mask(data, fields=["a.'1'.None", "a..'4'"]) + masked_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) # THEN the result is only the specified fields are masked assert masked_string == { @@ -156,7 +156,7 @@ def test_mask_json_dict_with_fields(data_masker): ) # WHEN mask is called with a list of fields specified - masked_json_string = data_masker.mask(data, fields=["a.'1'.None", "a..'4'"]) + masked_json_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) # THEN the result is only the specified fields are masked assert masked_json_string == { @@ -260,8 +260,8 @@ def test_encrypt_dict_with_fields(data_masker): } # WHEN encrypting and then decrypting the encrypted data - encrypted_data = data_masker.encrypt(data, fields=["a.'1'.None", "a..'4'"]) - decrypted_data = data_masker.decrypt(encrypted_data, fields=["a.'1'.None", "a..'4'"]) + encrypted_data = data_masker.encrypt(data) + decrypted_data = data_masker.decrypt(encrypted_data) # THEN the result is only the specified fields are masked assert decrypted_data == data @@ -279,11 +279,11 @@ def test_encrypt_json_dict_with_fields(data_masker): ) # WHEN encrypting and then decrypting the encrypted data - encrypted_data = data_masker.encrypt(data, fields=["a.'1'.None", "a..'4'"]) - decrypted_data = data_masker.decrypt(encrypted_data, fields=["a.'1'.None", "a..'4'"]) + encrypted_data = data_masker.encrypt(data) + decrypted_data = data_masker.decrypt(encrypted_data) # THEN the result is only the specified fields are masked - assert decrypted_data == json.loads(data) + assert decrypted_data == data def test_encrypt_json_with_list_fields(data_masker): @@ -297,13 +297,12 @@ def test_encrypt_json_with_list_fields(data_masker): }, ) - fields_operation = ["payload.first[0]", "payload.second[0].key1[0]"] # WHEN encrypting and then decrypting the encrypted data - encrypted_data = data_masker.encrypt(data, fields=fields_operation) - decrypted_data = data_masker.decrypt(encrypted_data, fields=fields_operation) + encrypted_data = data_masker.encrypt(data) + decrypted_data = data_masker.decrypt(encrypted_data) # THEN the result is only the specified fields are masked - assert decrypted_data == json.loads(data) + assert decrypted_data == data def test_encrypt_json_with_tuple_fields(data_masker): @@ -317,14 +316,12 @@ def test_encrypt_json_with_tuple_fields(data_masker): }, ) - fields_operation = ["payload.first[0]", "payload.second[0]"] # WHEN encrypting and then decrypting the encrypted data - encrypted_data = data_masker.encrypt(data, fields=fields_operation) - decrypted_data = data_masker.decrypt(encrypted_data, fields=fields_operation) + encrypted_data = data_masker.encrypt(data) + decrypted_data = data_masker.decrypt(encrypted_data) # THEN the result is only the specified fields are masked - assert decrypted_data == json.loads(data) - + assert decrypted_data == data def test_encrypt_with_encryption_context(data_masker): # GIVEN the data type is a json representation of a dictionary with a list inside @@ -337,13 +334,12 @@ def test_encrypt_with_encryption_context(data_masker): }, ) - fields_operation = ["payload.first[0]", "payload.second[0]"] # WHEN encrypting and then decrypting the encrypted data - encrypted_data = data_masker.encrypt(data, fields=fields_operation, data_classification="confidential") - decrypted_data = data_masker.decrypt(encrypted_data, fields=fields_operation, data_classification="confidential") + encrypted_data = data_masker.encrypt(data, data_classification="confidential") + decrypted_data = data_masker.decrypt(encrypted_data, data_classification="confidential") # THEN the result is only the specified fields are masked - assert decrypted_data == json.loads(data) + assert decrypted_data == data def test_encrypt_with_complex_dict(data_masker): @@ -376,14 +372,12 @@ def test_encrypt_with_complex_dict(data_masker): }, ) - fields_operation = ["address[*].postcode"] # WHEN encrypting and then decrypting the encrypted data - encrypted_data = data_masker.encrypt(data, fields=fields_operation) - decrypted_data = data_masker.decrypt(encrypted_data, fields=fields_operation) + encrypted_data = data_masker.encrypt(data) + decrypted_data = data_masker.decrypt(encrypted_data) # THEN the result is only the specified fields are masked - assert decrypted_data == json.loads(data) - + assert decrypted_data == data def test_encrypt_with_slice(data_masker): # GIVEN the data type is a json representation of a dictionary with a list inside @@ -415,13 +409,12 @@ def test_encrypt_with_slice(data_masker): }, ) - fields_operation = ["address[-1]"] # WHEN encrypting and then decrypting the encrypted data - encrypted_data = data_masker.encrypt(data, fields=fields_operation) - decrypted_data = data_masker.decrypt(encrypted_data, fields=fields_operation) + encrypted_data = data_masker.encrypt(data) + decrypted_data = data_masker.decrypt(encrypted_data) # THEN the result is only the specified fields are masked - assert decrypted_data == json.loads(data) + assert decrypted_data == data def test_encrypt_with_complex_search(data_masker): @@ -454,13 +447,12 @@ def test_encrypt_with_complex_search(data_masker): }, ) - fields_operation = ["$.address[?(@.postcode > 81847)]"] # WHEN encrypting and then decrypting the encrypted data - encrypted_data = data_masker.encrypt(data, fields=fields_operation) - decrypted_data = data_masker.decrypt(encrypted_data, fields=["address[1:3]"]) + encrypted_data = data_masker.encrypt(data) + decrypted_data = data_masker.decrypt(encrypted_data) # THEN the result is only the specified fields are masked - assert decrypted_data == json.loads(data) + assert decrypted_data == data def test_encrypt_with_provider_options(data_masker): # GIVEN the data type is a json representation of a dictionary with a list inside @@ -473,11 +465,10 @@ def test_encrypt_with_provider_options(data_masker): }, ) - fields_operation = ["payload.first[0]", "payload.second[0]"] provider_options = {"algorithm": Algorithm.AES_256_GCM_HKDF_SHA512_COMMIT_KEY} # WHEN encrypting and then decrypting the encrypted data - encrypted_data = data_masker.encrypt(data, fields=fields_operation, provider_options=provider_options) - decrypted_data = data_masker.decrypt(encrypted_data, fields=fields_operation) + encrypted_data = data_masker.encrypt(data, provider_options=provider_options) + decrypted_data = data_masker.decrypt(encrypted_data) # THEN the result is only the specified fields are masked - assert decrypted_data == json.loads(data) + assert decrypted_data == data \ No newline at end of file diff --git a/tests/unit/data_masking/test_unit_data_masking.py b/tests/unit/data_masking/test_unit_data_masking.py index 552abab4161..350c9f01eb0 100644 --- a/tests/unit/data_masking/test_unit_data_masking.py +++ b/tests/unit/data_masking/test_unit_data_masking.py @@ -19,7 +19,7 @@ def test_mask_int(data_masker): # GIVEN an int data type # WHEN mask is called with no fields argument - masked_string = data_masker.mask(42) + masked_string = data_masker.erase(42) # THEN the result is the data masked assert masked_string == DATA_MASKING_STRING @@ -29,7 +29,7 @@ def test_mask_float(data_masker): # GIVEN a float data type # WHEN mask is called with no fields argument - masked_string = data_masker.mask(4.2) + masked_string = data_masker.erase(4.2) # THEN the result is the data masked assert masked_string == DATA_MASKING_STRING @@ -39,7 +39,7 @@ def test_mask_bool(data_masker): # GIVEN a bool data type # WHEN mask is called with no fields argument - masked_string = data_masker.mask(True) + masked_string = data_masker.erase(True) # THEN the result is the data masked assert masked_string == DATA_MASKING_STRING @@ -49,7 +49,7 @@ def test_mask_none(data_masker): # GIVEN a None data type # WHEN mask is called with no fields argument - masked_string = data_masker.mask(None) + masked_string = data_masker.erase(None) # THEN the result is the data masked assert masked_string == DATA_MASKING_STRING @@ -59,7 +59,7 @@ def test_mask_str(data_masker): # GIVEN a str data type # WHEN mask is called with no fields argument - masked_string = data_masker.mask("this is a string") + masked_string = data_masker.erase("this is a string") # THEN the result is the data masked assert masked_string == DATA_MASKING_STRING @@ -69,7 +69,7 @@ def test_mask_list(data_masker): # GIVEN a list data type # WHEN mask is called with no fields argument - masked_string = data_masker.mask([1, 2, "string", 3]) + masked_string = data_masker.erase([1, 2, "string", 3]) # THEN the result is the data masked, while maintaining type list assert masked_string == [DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING] @@ -85,7 +85,7 @@ def test_mask_dict(data_masker): } # WHEN mask is called with no fields argument - masked_string = data_masker.mask(data) + masked_string = data_masker.erase(data) # THEN the result is the data masked assert masked_string == DATA_MASKING_STRING @@ -101,7 +101,7 @@ def test_mask_dict_with_fields(data_masker): } # WHEN mask is called with a list of fields specified - masked_string = data_masker.mask(data, fields=["a.'1'.None", "a..'4'"]) + masked_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) # THEN the result is only the specified fields are masked assert masked_string == { @@ -124,7 +124,7 @@ def test_mask_json_dict_with_fields(data_masker): ) # WHEN mask is called with a list of fields specified - masked_json_string = data_masker.mask(data, fields=["a.'1'.None", "a..'4'"]) + masked_json_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) # THEN the result is only the specified fields are masked assert masked_json_string == { @@ -159,7 +159,7 @@ def test_parsing_unsupported_data_type(data_masker): # WHEN attempting to pass in a list of fields with input data that is not a dict with pytest.raises(DataMaskingUnsupportedTypeError): # THEN the result is a TypeError - data_masker.mask(42, ["this.field"]) + data_masker.erase(42, ["this.field"]) def test_parsing_with_empty_field(data_masker): @@ -168,7 +168,7 @@ def test_parsing_with_empty_field(data_masker): # WHEN attempting to pass in a list of fields with input data that is not a dict with pytest.raises(ValueError): # THEN the result is a TypeError - data_masker.mask(42, []) + data_masker.erase(42, []) def test_parsing_nonexistent_fields_with_raise_on_missing_field(): @@ -185,7 +185,7 @@ def test_parsing_nonexistent_fields_with_raise_on_missing_field(): # WHEN attempting to pass in fields that do not exist in the input data with pytest.raises(DataMaskingFieldNotFoundError): # THEN the result is a KeyError - data_masker.mask(data, ["'3'..True"]) + data_masker.erase(data, ["'3'..True"]) def test_parsing_nonexistent_fields_warning_on_missing_field(): @@ -201,7 +201,7 @@ def test_parsing_nonexistent_fields_warning_on_missing_field(): # WHEN mask is called with a non-existing field with pytest.warns(UserWarning, match="Field or expression*"): - masked_json_string = data_masker.mask(data, fields=["non-existing"]) + masked_json_string = data_masker.erase(data, fields=["non-existing"]) # THEN the "masked" payload is the same of the original assert masked_json_string == data From af6932a13dfc5d33f33b5e19d9e078ef7fd8781a Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Mon, 29 Jan 2024 12:03:44 -0800 Subject: [PATCH 123/151] trying to fix pytest --- docs/utilities/data_masking.md | 20 +++++++++---------- .../src/getting_started_mask_data.py | 2 +- examples/data_masking/tests/lambda_mask.py | 2 +- .../e2e/data_masking/test_e2e_data_masking.py | 2 +- .../data_masking/test_perf_data_masking.py | 2 +- .../data_masking/test_unit_data_masking.py | 2 ++ 6 files changed, 16 insertions(+), 14 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 4a265126ddc..31575098b1d 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -225,7 +225,7 @@ Here are common scenarios to best visualize how to use `fields`. === "Data" - > Expression: `data_masker.mask(data, fields=["card_number"])` + > Expression: `data_masker.erase(data, fields=["card_number"])` ```json hl_lines="4" --8<-- "examples/data_masking/src/choosing_payload_top_keys.json" @@ -243,7 +243,7 @@ Here are common scenarios to best visualize how to use `fields`. === "Data" - > Expression: `data_masker.mask(data, fields=["address.postcode"])` + > Expression: `data_masker.erase(data, fields=["address.postcode"])` ```json hl_lines="6" --8<-- "examples/data_masking/src/choosing_payload_nested_key.json" @@ -261,7 +261,7 @@ Here are common scenarios to best visualize how to use `fields`. === "Data" - > Expression: `data_masker.mask(data, fields=["address.postcode", "address.street"])` + > Expression: `data_masker.erase(data, fields=["address.postcode", "address.street"])` ```json hl_lines="6-7" --8<-- "examples/data_masking/src/choosing_payload_multiple_keys.json" @@ -279,7 +279,7 @@ Here are common scenarios to best visualize how to use `fields`. === "Data" - > Expression: `data_masker.mask(data, fields=["address"])` + > Expression: `data_masker.erase(data, fields=["address"])` ```json hl_lines="6-17" --8<-- "examples/data_masking/src/choosing_payload_all_nested_keys.json" @@ -297,7 +297,7 @@ Here are common scenarios to best visualize how to use `fields`. === "Data" - > Expression: `data_masker.mask(data, fields=["category..name"])` + > Expression: `data_masker.erase(data, fields=["category..name"])` ```json hl_lines="6" --8<-- "examples/data_masking/src/choosing_payload_complex_nested_keys.json" @@ -315,7 +315,7 @@ Here are common scenarios to best visualize how to use `fields`. === "Data" - > Expression: `data_masker.mask(data, fields=["address[*].street"])` + > Expression: `data_masker.erase(data, fields=["address[*].street"])` ```json hl_lines="8 12" --8<-- "examples/data_masking/src/choosing_payload_list_all_index.json" @@ -333,7 +333,7 @@ Here are common scenarios to best visualize how to use `fields`. === "Data" - > Expression: `data_masker.mask(data, fields=["address[-1].street"])` + > Expression: `data_masker.erase(data, fields=["address[-1].street"])` ```json hl_lines="16" --8<-- "examples/data_masking/src/choosing_payload_list_slice.json" @@ -351,7 +351,7 @@ Here are common scenarios to best visualize how to use `fields`. === "Data" - > Expression: `data_masker.mask(data, fields=["$.address[?(@.postcode > 81846)]"])` + > Expression: `data_masker.erase(data, fields=["$.address[?(@.postcode > 81846)]"])` > `$`: Represents the root of the JSON structure. @@ -378,7 +378,7 @@ Note that the return will be a deserialized JSON and your desired fields updated === "Data" - Expression: `data_masker.mask(data, fields=["card_number", "address.postcode"])` + Expression: `data_masker.erase(data, fields=["card_number", "address.postcode"])` ```json --8<-- "examples/data_masking/src/choosing_payload_simple_json.json" @@ -451,7 +451,7 @@ sequenceDiagram participant Lambda participant DataMasking as Data Masking (in memory) Client->>Lambda: Invoke (event) - Lambda->>DataMasking: mask(data) + Lambda->>DataMasking: erase(data) DataMasking->>DataMasking: replaces data with ***** Note over Lambda,DataMasking: No encryption providers involved. DataMasking->>Lambda: data masked diff --git a/examples/data_masking/src/getting_started_mask_data.py b/examples/data_masking/src/getting_started_mask_data.py index 37dfd7b4dab..3a676f8ee0a 100644 --- a/examples/data_masking/src/getting_started_mask_data.py +++ b/examples/data_masking/src/getting_started_mask_data.py @@ -14,6 +14,6 @@ def lambda_handler(event: dict, context: LambdaContext) -> dict: logger.info("Masking fields email, address.street, and company_address") - masked = data_masker.mask(data, fields=["email", "address.street", "company_address"]) # (1)! + masked = data_masker.erase(data, fields=["email", "address.street", "company_address"]) # (1)! return masked diff --git a/examples/data_masking/tests/lambda_mask.py b/examples/data_masking/tests/lambda_mask.py index 64833464b37..339a771beab 100644 --- a/examples/data_masking/tests/lambda_mask.py +++ b/examples/data_masking/tests/lambda_mask.py @@ -9,6 +9,6 @@ def lambda_handler(event: dict, context: LambdaContext) -> dict: data = event - masked = data_masker.mask(data, fields=["testkey"]) + masked = data_masker.erase(data, fields=["testkey"]) return masked diff --git a/tests/e2e/data_masking/test_e2e_data_masking.py b/tests/e2e/data_masking/test_e2e_data_masking.py index dfa8504a3c9..9689de13885 100644 --- a/tests/e2e/data_masking/test_e2e_data_masking.py +++ b/tests/e2e/data_masking/test_e2e_data_masking.py @@ -11,7 +11,7 @@ ) from tests.e2e.utils import data_fetcher -# pytest.skip(reason="Data masking tests disabled until we go GA.", allow_module_level=True) +pytest.skip(reason="Data masking tests disabled until we go GA.", allow_module_level=True) #??? @pytest.fixture diff --git a/tests/performance/data_masking/test_perf_data_masking.py b/tests/performance/data_masking/test_perf_data_masking.py index 9fae80b3a00..65f1d892646 100644 --- a/tests/performance/data_masking/test_perf_data_masking.py +++ b/tests/performance/data_masking/test_perf_data_masking.py @@ -57,7 +57,7 @@ def test_data_masking_init(benchmark): def mask_json_blob(): data_masker = DataMasking() - data_masker.mask(json_blob, json_blob_fields) + data_masker.erase(json_blob, json_blob_fields) @pytest.mark.perf diff --git a/tests/unit/data_masking/test_unit_data_masking.py b/tests/unit/data_masking/test_unit_data_masking.py index 350c9f01eb0..e519706ce26 100644 --- a/tests/unit/data_masking/test_unit_data_masking.py +++ b/tests/unit/data_masking/test_unit_data_masking.py @@ -9,6 +9,7 @@ DataMaskingUnsupportedTypeError, ) +pytest.skip(reason="Data masking tests disabled until we go GA.", allow_module_level=True) #??? @pytest.fixture def data_masker() -> DataMasking: @@ -102,6 +103,7 @@ def test_mask_dict_with_fields(data_masker): # WHEN mask is called with a list of fields specified masked_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) + print('masked_string:', masked_string) # THEN the result is only the specified fields are masked assert masked_string == { From 9116ba65e3451c055408b869fbb2ee5623c6ba7b Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Mon, 29 Jan 2024 12:18:09 -0800 Subject: [PATCH 124/151] trying to fix pytest --- tests/e2e/data_masking/test_e2e_data_masking.py | 2 +- tests/functional/data_masking/test_aws_encryption_sdk.py | 6 ++++-- tests/unit/data_masking/test_unit_data_masking.py | 3 +-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/e2e/data_masking/test_e2e_data_masking.py b/tests/e2e/data_masking/test_e2e_data_masking.py index 9689de13885..6fd67109507 100644 --- a/tests/e2e/data_masking/test_e2e_data_masking.py +++ b/tests/e2e/data_masking/test_e2e_data_masking.py @@ -11,7 +11,7 @@ ) from tests.e2e.utils import data_fetcher -pytest.skip(reason="Data masking tests disabled until we go GA.", allow_module_level=True) #??? +pytest.skip(reason="Data masking tests disabled until we go GA.", allow_module_level=True) # ??? @pytest.fixture diff --git a/tests/functional/data_masking/test_aws_encryption_sdk.py b/tests/functional/data_masking/test_aws_encryption_sdk.py index cda6437f67f..f1386465591 100644 --- a/tests/functional/data_masking/test_aws_encryption_sdk.py +++ b/tests/functional/data_masking/test_aws_encryption_sdk.py @@ -6,7 +6,6 @@ from typing import Any, Callable import pytest - from aws_encryption_sdk.identifiers import Algorithm from aws_lambda_powertools.utilities.data_masking import DataMasking @@ -323,6 +322,7 @@ def test_encrypt_json_with_tuple_fields(data_masker): # THEN the result is only the specified fields are masked assert decrypted_data == data + def test_encrypt_with_encryption_context(data_masker): # GIVEN the data type is a json representation of a dictionary with a list inside data = json.dumps( @@ -379,6 +379,7 @@ def test_encrypt_with_complex_dict(data_masker): # THEN the result is only the specified fields are masked assert decrypted_data == data + def test_encrypt_with_slice(data_masker): # GIVEN the data type is a json representation of a dictionary with a list inside data = json.dumps( @@ -454,6 +455,7 @@ def test_encrypt_with_complex_search(data_masker): # THEN the result is only the specified fields are masked assert decrypted_data == data + def test_encrypt_with_provider_options(data_masker): # GIVEN the data type is a json representation of a dictionary with a list inside data = json.dumps( @@ -471,4 +473,4 @@ def test_encrypt_with_provider_options(data_masker): decrypted_data = data_masker.decrypt(encrypted_data) # THEN the result is only the specified fields are masked - assert decrypted_data == data \ No newline at end of file + assert decrypted_data == data diff --git a/tests/unit/data_masking/test_unit_data_masking.py b/tests/unit/data_masking/test_unit_data_masking.py index e519706ce26..0d29a4c1781 100644 --- a/tests/unit/data_masking/test_unit_data_masking.py +++ b/tests/unit/data_masking/test_unit_data_masking.py @@ -9,7 +9,6 @@ DataMaskingUnsupportedTypeError, ) -pytest.skip(reason="Data masking tests disabled until we go GA.", allow_module_level=True) #??? @pytest.fixture def data_masker() -> DataMasking: @@ -103,7 +102,7 @@ def test_mask_dict_with_fields(data_masker): # WHEN mask is called with a list of fields specified masked_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) - print('masked_string:', masked_string) + print("masked_string:", masked_string) # THEN the result is only the specified fields are masked assert masked_string == { From 5251e0cad30a09f413f865252ec44470ae73bc70 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Mon, 29 Jan 2024 12:23:38 -0800 Subject: [PATCH 125/151] Fix linting --- tests/unit/data_masking/test_unit_data_masking.py | 1 - tests/unit/test_shared_functions.py | 1 - 2 files changed, 2 deletions(-) diff --git a/tests/unit/data_masking/test_unit_data_masking.py b/tests/unit/data_masking/test_unit_data_masking.py index 0d29a4c1781..350c9f01eb0 100644 --- a/tests/unit/data_masking/test_unit_data_masking.py +++ b/tests/unit/data_masking/test_unit_data_masking.py @@ -102,7 +102,6 @@ def test_mask_dict_with_fields(data_masker): # WHEN mask is called with a list of fields specified masked_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) - print("masked_string:", masked_string) # THEN the result is only the specified fields are masked assert masked_string == { diff --git a/tests/unit/test_shared_functions.py b/tests/unit/test_shared_functions.py index b5558be7b53..c8c4bb2afb2 100644 --- a/tests/unit/test_shared_functions.py +++ b/tests/unit/test_shared_functions.py @@ -1,4 +1,3 @@ -import base64 import os import warnings from dataclasses import dataclass From 284d34083a460cf662b1424c8130aeb7e0c8ebb6 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Mon, 29 Jan 2024 12:59:07 -0800 Subject: [PATCH 126/151] debug linting --- aws_lambda_powertools/utilities/data_masking/base.py | 1 + tests/unit/data_masking/test_unit_data_masking.py | 1 + 2 files changed, 2 insertions(+) diff --git a/aws_lambda_powertools/utilities/data_masking/base.py b/aws_lambda_powertools/utilities/data_masking/base.py index 91b60f36a30..cb28b8892e8 100644 --- a/aws_lambda_powertools/utilities/data_masking/base.py +++ b/aws_lambda_powertools/utilities/data_masking/base.py @@ -222,6 +222,7 @@ def _apply_action_to_fields( new_dict = {'a': {'b': {'c': 'transformed_value'}}, 'x': {'y': 'transformed_value'}} ``` """ + logger.debug('IN APPLY ACTION TO FIELDS!!') data_parsed: dict = self._normalize_data_to_parse(fields, data) diff --git a/tests/unit/data_masking/test_unit_data_masking.py b/tests/unit/data_masking/test_unit_data_masking.py index 350c9f01eb0..6d240bced8d 100644 --- a/tests/unit/data_masking/test_unit_data_masking.py +++ b/tests/unit/data_masking/test_unit_data_masking.py @@ -101,6 +101,7 @@ def test_mask_dict_with_fields(data_masker): } # WHEN mask is called with a list of fields specified + # masked_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) masked_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) # THEN the result is only the specified fields are masked From d01b657379da0a64887ac9d6b9b3df5d39ce8e2f Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Mon, 29 Jan 2024 13:01:02 -0800 Subject: [PATCH 127/151] debug linting --- tests/unit/data_masking/test_unit_data_masking.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/data_masking/test_unit_data_masking.py b/tests/unit/data_masking/test_unit_data_masking.py index 6d240bced8d..350c9f01eb0 100644 --- a/tests/unit/data_masking/test_unit_data_masking.py +++ b/tests/unit/data_masking/test_unit_data_masking.py @@ -101,7 +101,6 @@ def test_mask_dict_with_fields(data_masker): } # WHEN mask is called with a list of fields specified - # masked_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) masked_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) # THEN the result is only the specified fields are masked From 00f61658041d32156ae59f265cb3c4c37b90bb81 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Mon, 29 Jan 2024 14:05:16 -0800 Subject: [PATCH 128/151] Changing mask to erase in docs, comments, etc --- .../utilities/data_masking/base.py | 7 +- .../utilities/data_masking/provider/base.py | 2 +- docs/utilities/data_masking.md | 38 +++++----- .../src/getting_started_mask_data.py | 6 +- examples/data_masking/tests/lambda_mask.py | 4 +- .../data_masking/test_aws_encryption_sdk.py | 68 ++++++++--------- .../data_masking/test_perf_data_masking.py | 4 +- .../data_masking/test_unit_data_masking.py | 76 +++++++++---------- 8 files changed, 102 insertions(+), 103 deletions(-) diff --git a/aws_lambda_powertools/utilities/data_masking/base.py b/aws_lambda_powertools/utilities/data_masking/base.py index cb28b8892e8..4dea3a2fdea 100644 --- a/aws_lambda_powertools/utilities/data_masking/base.py +++ b/aws_lambda_powertools/utilities/data_masking/base.py @@ -38,9 +38,9 @@ def lambda_handler(event, context): "sensitive": "password" } - masked = masker.erase(data,fields=["sensitive"]) + erased = masker.erase(data,fields=["sensitive"]) - return masked + return erased ``` """ @@ -180,7 +180,7 @@ def _apply_action_to_fields( ) -> Union[dict, str]: """ This method takes the input data, which can be either a dictionary or a JSON string, - and applies a mask, an encryption, or a decryption to the specified fields. + and erases, encrypts, or decrypts the specified fields. Parameters ---------- @@ -222,7 +222,6 @@ def _apply_action_to_fields( new_dict = {'a': {'b': {'c': 'transformed_value'}}, 'x': {'y': 'transformed_value'}} ``` """ - logger.debug('IN APPLY ACTION TO FIELDS!!') data_parsed: dict = self._normalize_data_to_parse(fields, data) diff --git a/aws_lambda_powertools/utilities/data_masking/provider/base.py b/aws_lambda_powertools/utilities/data_masking/provider/base.py index 1f894c9c169..9f5a3974548 100644 --- a/aws_lambda_powertools/utilities/data_masking/provider/base.py +++ b/aws_lambda_powertools/utilities/data_masking/provider/base.py @@ -68,7 +68,7 @@ def erase(self, data, **kwargs) -> Iterable[str]: This method irreversibly erases data. If the data to be erased is of type `str`, `dict`, or `bytes`, - this method will return a masked string, i.e. "*****". + this method will return an erased string, i.e. "*****". If the data to be erased is of an iterable type like `list`, `tuple`, or `set`, this method will return a new object of the same type as the diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 31575098b1d..354b73f2549 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -5,7 +5,7 @@ description: Utility -The data masking utility can encrypt, decrypt, or irreversibly mask sensitive information to protect data confidentiality. +The data masking utility can encrypt, decrypt, or irreversibly erase sensitive information to protect data confidentiality. ```mermaid stateDiagram-v2 @@ -14,18 +14,18 @@ stateDiagram-v2 DataMasking: DataMasking Operation: Possible operations Input: Sensitive value - Mask: Mask + Erase: Erase Encrypt: Encrypt Decrypt: Decrypt Provider: AWS Encryption SDK provider - Result: Data transformed (masked, encrypted, or decrypted) + Result: Data transformed (erased, encrypted, or decrypted) LambdaFn --> DataMasking DataMasking --> Operation state Operation { [*] --> Input - Input --> Mask: Irreversible + Input --> Erase: Irreversible Input --> Encrypt Input --> Decrypt Encrypt --> Provider @@ -37,13 +37,13 @@ stateDiagram-v2 ## Key features -* Encrypt, decrypt, or irreversibly mask data with ease +* Encrypt, decrypt, or irreversibly erase data with ease * Remove sensitive information in one or more fields within nested data * Seamless integration with [AWS Encryption SDK](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/introduction.html){target="_blank"} for industry and AWS security best practices ## Terminology -**Masking** replaces sensitive information **irreversibly** with a non-sensitive placeholder _(`*****`)_. It replaces data in-memory, hence why being irreversible. +**Erasing** replaces sensitive information **irreversibly** with a non-sensitive placeholder _(`*****`)_. It replaces data in-memory, hence why being irreversible. **Encrypting** transforms plaintext into ciphertext using an encryption algorithm and a cryptographic key. It allows you to encrypt any sensitive data, so only allowed personnel to decrypt it. @@ -97,13 +97,13 @@ Before you start, you will need a KMS symmetric key to encrypt and decrypt your 1. [Key policy examples using IAM Roles](https://docs.aws.amazon.com/kms/latest/developerguide/key-policy-default.html#key-policy-default-allow-administrators){target="_blank"} 2. [SAM generated CloudFormation Resources](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-specification-generated-resources-function.html#sam-specification-generated-resources-function-not-role){target="_blank"} -### Masking data +### Erasing data -Masking will erase the original data and replace with `*****`. This means you cannot recover masked data, and its type will change to `str`. +Erasing will erase the original data and replace with `*****`. This means you cannot recover erased data, and its type will change to `str`. -=== "getting_started_mask_data.py" +=== "getting_started_erase_data.py" ```python hl_lines="4 8 17" - --8<-- "examples/data_masking/src/getting_started_mask_data.py" + --8<-- "examples/data_masking/src/getting_started_erase_data.py" ``` 1. See [working with nested data](#working-with-nested-data) to learn more about the `fields` parameter.

If we omit `fields` parameter, the entire dictionary will be erased with `*****`. @@ -113,9 +113,9 @@ Masking will erase the original data and replace with `*****`. This means you ca --8<-- "examples/data_masking/src/generic_data_input.json" ``` -=== "getting_started_mask_data_output.json" +=== "getting_started_erase_data_output.json" ```json hl_lines="5 7 12" - --8<-- "examples/data_masking/src/getting_started_mask_data_output.json" + --8<-- "examples/data_masking/src/getting_started_erase_data_output.json" ``` ### Encrypting data @@ -208,14 +208,14 @@ For a stronger security posture, you can add metadata to each encryption operati !!! note "We support `JSON` data types only - see [data serialization for more details](#data-serialization-and-preservation)." -You can use the `fields` parameter with dot notation `.` to choose one or more parts of your data to `mask`, `encrypt`, or `decrypt`. This is useful when you want to keep data structure intact except the confidential fields. +You can use the `fields` parameter with dot notation `.` to choose one or more parts of your data to `erase`, `encrypt`, or `decrypt`. This is useful when you want to keep data structure intact except the confidential fields. -When `fields` is present, `mask` and `encrypt` behave differently: +When `fields` is present, `erase` and `encrypt` behave differently: | Operation | Behavior | Example | Obfuscated | | --------- | ----------------------------------------------------------- | ----------------------- | ------------------------------- | | `encrypt` | Obfuscate entire data and replacing with ciphertext string. | `{"cards": ["a", "b"]}` | `{"cards": "ciphertext"}` | -| `mask` | Replace data while keeping collections type intact. | `{"cards": ["a", "b"]}` | `{"cards": ["*****", "*****"]}` | +| `erase` | Replace data while keeping collections type intact. | `{"cards": ["a", "b"]}` | `{"cards": ["*****", "*****"]}` | Here are common scenarios to best visualize how to use `fields`. @@ -439,9 +439,9 @@ The AWS Encryption SDK defaults to using the `AES_256_GCM_HKDF_SHA512_COMMIT_KEY The following sequence diagrams explain how `DataMasking` behaves under different scenarios. -#### Mask operation +#### Erase operation -Masking operations occur in-memory and we cannot recover the original value. +Erasing operations occur in-memory and we cannot recover the original value.
```mermaid @@ -572,9 +572,9 @@ sequenceDiagram ## Testing your code -### Testing mask operation +### Testing erase operation -Testing your code with a simple mask operation +Testing your code with a simple erase operation === "test_lambda_mask.py" ```python hl_lines="22" diff --git a/examples/data_masking/src/getting_started_mask_data.py b/examples/data_masking/src/getting_started_mask_data.py index 3a676f8ee0a..a3e9fc7217e 100644 --- a/examples/data_masking/src/getting_started_mask_data.py +++ b/examples/data_masking/src/getting_started_mask_data.py @@ -12,8 +12,8 @@ def lambda_handler(event: dict, context: LambdaContext) -> dict: data: dict = event.get("body", {}) - logger.info("Masking fields email, address.street, and company_address") + logger.info("Erasing fields email, address.street, and company_address") - masked = data_masker.erase(data, fields=["email", "address.street", "company_address"]) # (1)! + erased = data_masker.erase(data, fields=["email", "address.street", "company_address"]) # (1)! - return masked + return erased diff --git a/examples/data_masking/tests/lambda_mask.py b/examples/data_masking/tests/lambda_mask.py index 339a771beab..6b2f461e663 100644 --- a/examples/data_masking/tests/lambda_mask.py +++ b/examples/data_masking/tests/lambda_mask.py @@ -9,6 +9,6 @@ def lambda_handler(event: dict, context: LambdaContext) -> dict: data = event - masked = data_masker.erase(data, fields=["testkey"]) + erased = data_masker.erase(data, fields=["testkey"]) - return masked + return erased diff --git a/tests/functional/data_masking/test_aws_encryption_sdk.py b/tests/functional/data_masking/test_aws_encryption_sdk.py index f1386465591..c1dfd22c6b9 100644 --- a/tests/functional/data_masking/test_aws_encryption_sdk.py +++ b/tests/functional/data_masking/test_aws_encryption_sdk.py @@ -46,67 +46,67 @@ def data_masker(monkeypatch) -> DataMasking: return DataMasking(provider=provider) -def test_mask_int(data_masker): +def test_erase_int(data_masker): # GIVEN an int data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(42) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(42) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_float(data_masker): +def test_erase_float(data_masker): # GIVEN a float data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(4.2) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(4.2) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_bool(data_masker): +def test_erase_bool(data_masker): # GIVEN a bool data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(True) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(True) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_none(data_masker): +def test_erase_none(data_masker): # GIVEN a None data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(None) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(None) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_str(data_masker): +def test_erase_str(data_masker): # GIVEN a str data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase("this is a string") + # WHEN erase is called with no fields argument + erased_string = data_masker.erase("this is a string") # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_list(data_masker): +def test_erase_list(data_masker): # GIVEN a list data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase([1, 2, "string", 3]) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase([1, 2, "string", 3]) # THEN the result is the data masked, while maintaining type list - assert masked_string == [DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING] + assert erased_string == [DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING] -def test_mask_dict(data_masker): +def test_erase_dict(data_masker): # GIVEN a dict data type data = { "a": { @@ -115,14 +115,14 @@ def test_mask_dict(data_masker): }, } - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(data) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(data) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_dict_with_fields(data_masker): +def test_erase_dict_with_fields(data_masker): # GIVEN a dict data type data = { "a": { @@ -131,11 +131,11 @@ def test_mask_dict_with_fields(data_masker): }, } - # WHEN mask is called with a list of fields specified - masked_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) + # WHEN erase is called with a list of fields specified + erased_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) # THEN the result is only the specified fields are masked - assert masked_string == { + assert erased_string == { "a": { "1": {"None": DATA_MASKING_STRING, "four": "world"}, "b": {"3": {"4": DATA_MASKING_STRING, "e": "world"}}, @@ -143,7 +143,7 @@ def test_mask_dict_with_fields(data_masker): } -def test_mask_json_dict_with_fields(data_masker): +def test_erase_json_dict_with_fields(data_masker): # GIVEN the data type is a json representation of a dictionary data = json.dumps( { @@ -154,7 +154,7 @@ def test_mask_json_dict_with_fields(data_masker): }, ) - # WHEN mask is called with a list of fields specified + # WHEN erase is called with a list of fields specified masked_json_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) # THEN the result is only the specified fields are masked diff --git a/tests/performance/data_masking/test_perf_data_masking.py b/tests/performance/data_masking/test_perf_data_masking.py index 65f1d892646..668da32a6e9 100644 --- a/tests/performance/data_masking/test_perf_data_masking.py +++ b/tests/performance/data_masking/test_perf_data_masking.py @@ -55,7 +55,7 @@ def test_data_masking_init(benchmark): pytest.fail(f"High level imports should be below {DATA_MASKING_INIT_SLA}s: {stat}") -def mask_json_blob(): +def erase_json_blob(): data_masker = DataMasking() data_masker.erase(json_blob, json_blob_fields) @@ -63,7 +63,7 @@ def mask_json_blob(): @pytest.mark.perf @pytest.mark.benchmark(group="core", disable_gc=True, warmup=False) def test_data_masking_encrypt_with_json_blob(benchmark): - benchmark.pedantic(mask_json_blob) + benchmark.pedantic(erase_json_blob) stat = benchmark.stats.stats.max if stat > DATA_MASKING_NESTED_ENCRYPT_SLA: pytest.fail(f"High level imports should be below {DATA_MASKING_NESTED_ENCRYPT_SLA}s: {stat}") diff --git a/tests/unit/data_masking/test_unit_data_masking.py b/tests/unit/data_masking/test_unit_data_masking.py index 350c9f01eb0..4fbbc188ceb 100644 --- a/tests/unit/data_masking/test_unit_data_masking.py +++ b/tests/unit/data_masking/test_unit_data_masking.py @@ -15,67 +15,67 @@ def data_masker() -> DataMasking: return DataMasking() -def test_mask_int(data_masker): +def test_erase_int(data_masker): # GIVEN an int data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(42) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(42) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_float(data_masker): +def test_erase_float(data_masker): # GIVEN a float data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(4.2) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(4.2) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_bool(data_masker): +def test_erase_bool(data_masker): # GIVEN a bool data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(True) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(True) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_none(data_masker): +def test_erase_none(data_masker): # GIVEN a None data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(None) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(None) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_str(data_masker): +def test_erase_str(data_masker): # GIVEN a str data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase("this is a string") + # WHEN erase is called with no fields argument + erased_string = data_masker.erase("this is a string") # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_list(data_masker): +def test_erase_list(data_masker): # GIVEN a list data type - # WHEN mask is called with no fields argument - masked_string = data_masker.erase([1, 2, "string", 3]) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase([1, 2, "string", 3]) # THEN the result is the data masked, while maintaining type list - assert masked_string == [DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING] + assert erased_string == [DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING, DATA_MASKING_STRING] -def test_mask_dict(data_masker): +def test_erase_dict(data_masker): # GIVEN a dict data type data = { "a": { @@ -84,14 +84,14 @@ def test_mask_dict(data_masker): }, } - # WHEN mask is called with no fields argument - masked_string = data_masker.erase(data) + # WHEN erase is called with no fields argument + erased_string = data_masker.erase(data) # THEN the result is the data masked - assert masked_string == DATA_MASKING_STRING + assert erased_string == DATA_MASKING_STRING -def test_mask_dict_with_fields(data_masker): +def test_erase_dict_with_fields(data_masker): # GIVEN a dict data type data = { "a": { @@ -100,11 +100,11 @@ def test_mask_dict_with_fields(data_masker): }, } - # WHEN mask is called with a list of fields specified - masked_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) + # WHEN erase is called with a list of fields specified + erased_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) - # THEN the result is only the specified fields are masked - assert masked_string == { + # THEN the result is only the specified fields are erased + assert erased_string == { "a": { "1": {"None": DATA_MASKING_STRING, "four": "world"}, "b": {"3": {"4": DATA_MASKING_STRING, "e": "world"}}, @@ -112,7 +112,7 @@ def test_mask_dict_with_fields(data_masker): } -def test_mask_json_dict_with_fields(data_masker): +def test_erase_json_dict_with_fields(data_masker): # GIVEN the data type is a json representation of a dictionary data = json.dumps( { @@ -123,10 +123,10 @@ def test_mask_json_dict_with_fields(data_masker): }, ) - # WHEN mask is called with a list of fields specified + # WHEN erase is called with a list of fields specified masked_json_string = data_masker.erase(data, fields=["a.'1'.None", "a..'4'"]) - # THEN the result is only the specified fields are masked + # THEN the result is only the specified fields are erased assert masked_json_string == { "a": { "1": {"None": DATA_MASKING_STRING, "four": "world"}, @@ -199,9 +199,9 @@ def test_parsing_nonexistent_fields_warning_on_missing_field(): }, } - # WHEN mask is called with a non-existing field + # WHEN erase is called with a non-existing field with pytest.warns(UserWarning, match="Field or expression*"): masked_json_string = data_masker.erase(data, fields=["non-existing"]) - # THEN the "masked" payload is the same of the original + # THEN the "erased" payload is the same of the original assert masked_json_string == data From 5949cea2828c3de1f72089eca6a087e705177a96 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Mon, 29 Jan 2024 15:00:29 -0800 Subject: [PATCH 129/151] Fix for jsonpath upgrade --- aws_lambda_powertools/utilities/data_masking/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aws_lambda_powertools/utilities/data_masking/base.py b/aws_lambda_powertools/utilities/data_masking/base.py index 4dea3a2fdea..db27a01840d 100644 --- a/aws_lambda_powertools/utilities/data_masking/base.py +++ b/aws_lambda_powertools/utilities/data_masking/base.py @@ -292,6 +292,7 @@ def _call_action( - None: The method does not return any value, as it updates the fields in-place. """ fields[field_name] = action(field_value, provider_options=provider_options, **encryption_context) + return fields[field_name] def _normalize_data_to_parse(self, fields: list, data: str | dict) -> dict: if not fields: From cfa20c83318a180249237b53a56c2727ad2355ca Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Mon, 29 Jan 2024 15:47:49 -0800 Subject: [PATCH 130/151] Fixing mypy typing --- .../utilities/data_masking/base.py | 32 +++---------------- .../src/changing_default_algorithm.py | 5 ++- .../src/data_masking_function_example.py | 4 +-- .../src/getting_started_decrypt_data.py | 4 +-- .../src/getting_started_decryption_context.py | 3 +- .../src/getting_started_encrypt_data.py | 6 ++-- .../src/getting_started_encryption_context.py | 7 ++-- 7 files changed, 18 insertions(+), 43 deletions(-) diff --git a/aws_lambda_powertools/utilities/data_masking/base.py b/aws_lambda_powertools/utilities/data_masking/base.py index db27a01840d..eb7acff24a0 100644 --- a/aws_lambda_powertools/utilities/data_masking/base.py +++ b/aws_lambda_powertools/utilities/data_masking/base.py @@ -23,7 +23,7 @@ class DataMasking: Please DON'T USE THIS utility in production environments. Keep in mind that when we transition to General Availability (GA), there might be breaking changes introduced. - The DataMasking class orchestrates masking, encrypting, and decrypting + The DataMasking class orchestrates erasing, encrypting, and decrypting for the base provider. Example: @@ -56,36 +56,15 @@ def __init__( self.json_deserializer = self.provider.json_deserializer self.raise_on_missing_field = raise_on_missing_field - @overload - def encrypt( - self, - data: dict, - fields: None = None, - provider_options: dict | None = None, - **encryption_context: str, - ) -> dict: - ... - - @overload def encrypt( self, - data: Mapping | Sequence | Number, - fields: None = None, + data: dict | Mapping | Sequence | Number, provider_options: dict | None = None, **encryption_context: str, ) -> str: - ... - - def encrypt( - self, - data: Mapping | Sequence | Number, - fields: list[str] | None = None, - provider_options: dict | None = None, - **encryption_context: str, - ) -> str | Mapping: return self._apply_action( data=data, - fields=fields, + fields=None, action=self.provider.encrypt, provider_options=provider_options or {}, **encryption_context, @@ -94,13 +73,12 @@ def encrypt( def decrypt( self, data, - fields: list[str] | None = None, provider_options: dict | None = None, **encryption_context: str, ) -> Any: return self._apply_action( data=data, - fields=fields, + fields=None, action=self.provider.decrypt, provider_options=provider_options or {}, **encryption_context, @@ -289,7 +267,7 @@ def _call_action( - **encryption_context: Additional keyword arguments collected into a dictionary. Returns: - - None: The method does not return any value, as it updates the fields in-place. + - fields[field_name]: Returns the processed field value """ fields[field_name] = action(field_value, provider_options=provider_options, **encryption_context) return fields[field_name] diff --git a/examples/data_masking/src/changing_default_algorithm.py b/examples/data_masking/src/changing_default_algorithm.py index 5fa9e41a16c..5ba80f5bbbf 100644 --- a/examples/data_masking/src/changing_default_algorithm.py +++ b/examples/data_masking/src/changing_default_algorithm.py @@ -18,16 +18,15 @@ @logger.inject_lambda_context -def lambda_handler(event: dict, context: LambdaContext) -> dict: +def lambda_handler(event: dict, context: LambdaContext) -> str: data: dict = event.get("body", {}) - logger.info("Encrypting fields email, address.street, and company_address with a different algorithm") + logger.info("Encrypting whole object with a different algorithm") provider_options = {"algorithm": Algorithm.AES_256_GCM_HKDF_SHA512_COMMIT_KEY} decrypted = data_masker.encrypt( data, - fields=["email", "address.street", "company_address"], provider_options=provider_options, ) diff --git a/examples/data_masking/src/data_masking_function_example.py b/examples/data_masking/src/data_masking_function_example.py index bb9a1ed5f06..e7ed3326890 100644 --- a/examples/data_masking/src/data_masking_function_example.py +++ b/examples/data_masking/src/data_masking_function_example.py @@ -21,6 +21,6 @@ def lambda_handler(event: dict, context: LambdaContext) -> dict: data = event["body"] data_masker = DataMasking(provider=AWSEncryptionSDKProvider(keys=[KMS_KEY_ARN])) - encrypted = data_masker.encrypt(data, fields=["address.street", "job_history.company.company_name"]) - decrypted = data_masker.decrypt(encrypted, fields=["address.street", "job_history.company.company_name"]) + encrypted = data_masker.encrypt(data) + decrypted = data_masker.decrypt(encrypted) return {"Decrypted_json": decrypted} diff --git a/examples/data_masking/src/getting_started_decrypt_data.py b/examples/data_masking/src/getting_started_decrypt_data.py index eaab64097c1..1c1a3d92d6d 100644 --- a/examples/data_masking/src/getting_started_decrypt_data.py +++ b/examples/data_masking/src/getting_started_decrypt_data.py @@ -19,8 +19,8 @@ def lambda_handler(event: dict, context: LambdaContext) -> dict: data: dict = event.get("body", {}) - logger.info("Decrypting fields email, address.street, and company_address") + logger.info("Decrypting whole object") - decrypted = data_masker.decrypt(data, fields=["email", "address.street", "company_address"]) # (3)! + decrypted = data_masker.decrypt(data) # (3)! return decrypted diff --git a/examples/data_masking/src/getting_started_decryption_context.py b/examples/data_masking/src/getting_started_decryption_context.py index fd6540ebac8..f4b0f6d8ac3 100644 --- a/examples/data_masking/src/getting_started_decryption_context.py +++ b/examples/data_masking/src/getting_started_decryption_context.py @@ -19,11 +19,10 @@ def lambda_handler(event: dict, context: LambdaContext) -> dict: data = event.get("body", {}) - logger.info("Decrypting email field") + logger.info("Decrypting whole object") decrypted: dict = data_masker.decrypt( data, - fields=["email"], data_classification="confidential", # (1)! data_type="customer-data", tenant_id="a06bf973-0734-4b53-9072-39d7ac5b2cba", diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py index aeba96a8395..b3709d89182 100644 --- a/examples/data_masking/src/getting_started_encrypt_data.py +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -18,11 +18,11 @@ @logger.inject_lambda_context -def lambda_handler(event: dict, context: LambdaContext) -> dict: +def lambda_handler(event: dict, context: LambdaContext) -> str: data: dict = event.get("body", {}) - logger.info("Encrypting fields email, address.street, and company_address") + logger.info("Encrypting the whole object") - encrypted = data_masker.encrypt(data, fields=["email", "address.street", "company_address"]) # (2)! + encrypted = data_masker.encrypt(data) # (2)! return encrypted diff --git a/examples/data_masking/src/getting_started_encryption_context.py b/examples/data_masking/src/getting_started_encryption_context.py index 1cf6ce882a4..6fea5dc9f65 100644 --- a/examples/data_masking/src/getting_started_encryption_context.py +++ b/examples/data_masking/src/getting_started_encryption_context.py @@ -16,14 +16,13 @@ @logger.inject_lambda_context -def lambda_handler(event: dict, context: LambdaContext) -> dict: +def lambda_handler(event: dict, context: LambdaContext) -> str: data = event.get("body", {}) - logger.info("Encrypting email field") + logger.info("Encrypting whole object") - encrypted: dict = data_masker.encrypt( + encrypted: str = data_masker.encrypt( data, - fields=["email"], data_classification="confidential", # (1)! data_type="customer-data", tenant_id="a06bf973-0734-4b53-9072-39d7ac5b2cba", From eb66f19949d035af4c562a0269690cc0e3dc695f Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Mon, 29 Jan 2024 16:13:41 -0800 Subject: [PATCH 131/151] test e2e --- tests/e2e/data_masking/test_e2e_data_masking.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/e2e/data_masking/test_e2e_data_masking.py b/tests/e2e/data_masking/test_e2e_data_masking.py index 6fd67109507..a720a265d83 100644 --- a/tests/e2e/data_masking/test_e2e_data_masking.py +++ b/tests/e2e/data_masking/test_e2e_data_masking.py @@ -11,8 +11,6 @@ ) from tests.e2e.utils import data_fetcher -pytest.skip(reason="Data masking tests disabled until we go GA.", allow_module_level=True) # ??? - @pytest.fixture def basic_handler_fn(infrastructure: dict) -> str: From 0f01d50f14248aa181b0611947817f8386473c86 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Tue, 30 Jan 2024 16:13:50 +0000 Subject: [PATCH 132/151] Renaming files --- ...getting_started_mask_data.py => getting_started_erase_data.py} | 0 ...sk_data_output.json => getting_started_erase_data_output.json} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename examples/data_masking/src/{getting_started_mask_data.py => getting_started_erase_data.py} (100%) rename examples/data_masking/src/{getting_started_mask_data_output.json => getting_started_erase_data_output.json} (100%) diff --git a/examples/data_masking/src/getting_started_mask_data.py b/examples/data_masking/src/getting_started_erase_data.py similarity index 100% rename from examples/data_masking/src/getting_started_mask_data.py rename to examples/data_masking/src/getting_started_erase_data.py diff --git a/examples/data_masking/src/getting_started_mask_data_output.json b/examples/data_masking/src/getting_started_erase_data_output.json similarity index 100% rename from examples/data_masking/src/getting_started_mask_data_output.json rename to examples/data_masking/src/getting_started_erase_data_output.json From 0d96349f215f08e6d9c0632cf86bb2d33cb3c000 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Tue, 30 Jan 2024 16:26:25 +0000 Subject: [PATCH 133/151] Fixing examples --- examples/data_masking/src/encrypt_data_output.json | 14 +------------- .../src/getting_started_encrypt_data.py | 2 +- 2 files changed, 2 insertions(+), 14 deletions(-) diff --git a/examples/data_masking/src/encrypt_data_output.json b/examples/data_masking/src/encrypt_data_output.json index cdd85e08701..eb00438d88f 100644 --- a/examples/data_masking/src/encrypt_data_output.json +++ b/examples/data_masking/src/encrypt_data_output.json @@ -1,13 +1 @@ -{ - "id": 1, - "name": "John Doe", - "age": 30, - "email": "InRoaXMgaXMgYSBzdHJpbmciHsLZGx2na-XzP_TB5Bf2LNU1bLc", - "address": { - "street": "XMgYSB_KDddaDJYMb-JpbmGnagTklwQ-msdaDLP", - "city": "Anytown", - "state": "CA", - "zip": "12345" - }, - "company_address": "B_KDddaDJYMb-93daSFGmnrtepytrejPNVXX98" -} \ No newline at end of file +{"body": "AgV4uF5K2YMtNhYrtviTwKNrUHhqQr73l/jNfukkh+qLOC8AXwABABVhd3MtY3J5cHRvLXB1YmxpYy1rZXkAREEvcjEyaFZHY1R5cjJuTDNKbTJ3UFA3R3ZjaytIdi9hekZqbXVUb25Ya3J5SzFBOUlJZDZxZXpSR1NTVnZDUUxoZz09AAEAB2F3cy1rbXMAS2Fybjphd3M6a21zOnVzLWVhc3QtMToyMDA5ODQxMTIzODY6a2V5LzZkODJiMzRlLTM2NjAtNDRlMi04YWJiLTdmMzA1OGJlYTIxMgC4AQIBAHjxYXAO7wQGd+7qxoyvXAajwqboF5FL/9lgYUNJTB8VtAHBP2hwVgw+zypp7GoMNTPAAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMx/B25MTgWwpL7CmuAgEQgDtan3orAOKFUfyNm3v6rFcglb+BVVVDV71fj4aRljhpg1ixsYFaKsoej8NcwRktIiWE+mw9XmTEVb6xFQIAABAA9DeLzlRaRQgTcXMJG0iBu/YTyyDKiROD+bU1Y09X9RBz5LA1nWIENJKq2seAhNSB/////wAAAAEAAAAAAAAAAAAAAAEAAAEBExLJ9wI4n7t+wyPEEP4kjYFBdkmNuLLsVC2Yt8mv9Y1iH2G+/g9SaIcdK57pkoW0ECpBxZVOxCuhmK2s74AJCUdem9McjS1waUKyzYTi9vv2ySNBsABIDwT990rE7jZJ3tEZAqcWZg/eWlxvnksFR/akBWZKsKzFz6lF57+cTgdISCEJRV0E7fcUeCuaMaQGK1Qw2OCmIeHEG5j5iztBkZG2IB2CVND/AbxmDUFHwgjsrJPTzaDYSufcGMoZW1A9X1sLVfqNVKvnOFP5tNY7kPF5eAI9FhGBw8SjTqODXz4k6zuqzy9no8HtXowP265U8NZ5VbVTd/zuVEbZyK5KBqzP1sExW4RhnlpXMoOs9WSuAGcwZQIxANTeEwb9V7CacV2Urt/oCqysUzhoV2AcT2ZjryFqY79Tsg+FRpIx7cBizL4ieRzbhQIwcRasNncO5OZOcmVr0MqHv+gCVznndMgjXJmWwUa7h6skJKmhhMPlN0CsugxtVWnD"} diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py index b3709d89182..a6de0954840 100644 --- a/examples/data_masking/src/getting_started_encrypt_data.py +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -25,4 +25,4 @@ def lambda_handler(event: dict, context: LambdaContext) -> str: encrypted = data_masker.encrypt(data) # (2)! - return encrypted + return {"body": encrypted} From 990e8ad5e40223420733e62e78e850340dd1e7f6 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Tue, 30 Jan 2024 17:42:26 +0000 Subject: [PATCH 134/151] Fixing examples --- examples/data_masking/src/getting_started_encrypt_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py index a6de0954840..795b22b9a00 100644 --- a/examples/data_masking/src/getting_started_encrypt_data.py +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -18,7 +18,7 @@ @logger.inject_lambda_context -def lambda_handler(event: dict, context: LambdaContext) -> str: +def lambda_handler(event: dict, context: LambdaContext) -> dict: data: dict = event.get("body", {}) logger.info("Encrypting the whole object") From a6bb1e347a9d36db6c4b1de6f645e01ab1eeeea9 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Tue, 30 Jan 2024 14:08:47 -0800 Subject: [PATCH 135/151] Used AWS guidelines safe names in examples --- .../utilities/data_masking/base.py | 2 +- .../utilities/data_masking/constants.py | 7 +++-- .../utilities/data_masking/provider/base.py | 2 +- .../provider/kms/aws_encryption_sdk.py | 2 +- docs/utilities/data_masking.md | 26 +++++++++---------- .../src/choosing_payload_all_nested_keys.json | 12 ++++----- ...oosing_payload_all_nested_keys_output.json | 4 +-- .../src/choosing_payload_complex_search.json | 16 ++++++------ ...hoosing_payload_complex_search_output.json | 12 ++++----- .../src/choosing_payload_list_all_index.json | 12 ++++----- ...hoosing_payload_list_all_index_output.json | 8 +++--- .../src/choosing_payload_list_index.json | 12 ++++----- .../choosing_payload_list_index_output.json | 10 +++---- .../src/choosing_payload_list_slice.json | 14 +++++----- .../choosing_payload_list_slice_output.json | 14 +++++----- .../src/choosing_payload_multiple_keys.json | 8 +++--- ...choosing_payload_multiple_keys_output.json | 4 +-- .../src/choosing_payload_nested_key.json | 6 ++--- .../choosing_payload_nested_key_output.json | 4 +-- .../src/choosing_payload_simple_json.json | 2 +- .../choosing_payload_simple_json_output.json | 2 +- .../src/choosing_payload_top_keys.json | 4 +-- .../src/choosing_payload_top_keys_output.json | 2 +- 23 files changed, 93 insertions(+), 92 deletions(-) diff --git a/aws_lambda_powertools/utilities/data_masking/base.py b/aws_lambda_powertools/utilities/data_masking/base.py index eb7acff24a0..c2557dcef24 100644 --- a/aws_lambda_powertools/utilities/data_masking/base.py +++ b/aws_lambda_powertools/utilities/data_masking/base.py @@ -197,7 +197,7 @@ def _apply_action_to_fields( >>> fields = ['a.b.c', 'a.x.y'] # The function will transform the value at 'a.b.c' (1) and 'a.x.y' (2) # and store the result as: - new_dict = {'a': {'b': {'c': 'transformed_value'}}, 'x': {'y': 'transformed_value'}} + new_dict = {'a': {'b': {'c': '*****'}}, 'x': {'y': '*****'}} ``` """ diff --git a/aws_lambda_powertools/utilities/data_masking/constants.py b/aws_lambda_powertools/utilities/data_masking/constants.py index 70eb0baf354..f35f4291e40 100644 --- a/aws_lambda_powertools/utilities/data_masking/constants.py +++ b/aws_lambda_powertools/utilities/data_masking/constants.py @@ -1,11 +1,14 @@ +# The string that replaces values that have been erased DATA_MASKING_STRING: str = "*****" # The maximum number of entries that can be retained in the local cryptographic materials cache CACHE_CAPACITY: int = 100 # The maximum time (in seconds) that a cache entry may be kept in the cache MAX_CACHE_AGE_SECONDS: float = 300.0 # Maximum number of messages which are allowed to be encrypted under a single cached data key -MAX_MESSAGES_ENCRYPTED: int = 4294967296 # 2 ** 32 +# Values can be [1 - 4294967296] (2 ** 32) +MAX_MESSAGES_ENCRYPTED: int = 4294967296 # Maximum number of bytes which are allowed to be encrypted under a single cached data key -MAX_BYTES_ENCRYPTED: int = 9223372036854775807 # 2 ** 63 - 1 +# Values can be [1 - 9223372036854775807] (2 ** 63 - 1) +MAX_BYTES_ENCRYPTED: int = 9223372036854775807 ENCRYPTED_DATA_KEY_CTX_KEY = "aws-crypto-public-key" diff --git a/aws_lambda_powertools/utilities/data_masking/provider/base.py b/aws_lambda_powertools/utilities/data_masking/provider/base.py index 9f5a3974548..3aacba1b7b2 100644 --- a/aws_lambda_powertools/utilities/data_masking/provider/base.py +++ b/aws_lambda_powertools/utilities/data_masking/provider/base.py @@ -37,7 +37,7 @@ def lambda_handler(event, context): "sensitive": "password" } - encrypted = data_masker.encrypt(data, fields=["sensitive"]) + encrypted = data_masker.encrypt(data) return encrypted ``` diff --git a/aws_lambda_powertools/utilities/data_masking/provider/kms/aws_encryption_sdk.py b/aws_lambda_powertools/utilities/data_masking/provider/kms/aws_encryption_sdk.py index ba9aeb4c9a1..bbdbb0bad6f 100644 --- a/aws_lambda_powertools/utilities/data_masking/provider/kms/aws_encryption_sdk.py +++ b/aws_lambda_powertools/utilities/data_masking/provider/kms/aws_encryption_sdk.py @@ -67,7 +67,7 @@ def lambda_handler(event, context): "sensitive": "password" } - encrypted = data_masker.encrypt(data, fields=["sensitive"]) + encrypted = data_masker.encrypt(data) return encrypted diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 354b73f2549..f685b59ae82 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -43,7 +43,7 @@ stateDiagram-v2 ## Terminology -**Erasing** replaces sensitive information **irreversibly** with a non-sensitive placeholder _(`*****`)_. It replaces data in-memory, hence why being irreversible. +**Erasing** replaces sensitive information **irreversibly** with a non-sensitive placeholder _(`*****`)_. This operation replaces data in-memory, making it a one-way action. **Encrypting** transforms plaintext into ciphertext using an encryption algorithm and a cryptographic key. It allows you to encrypt any sensitive data, so only allowed personnel to decrypt it. @@ -99,7 +99,7 @@ Before you start, you will need a KMS symmetric key to encrypt and decrypt your ### Erasing data -Erasing will erase the original data and replace with `*****`. This means you cannot recover erased data, and its type will change to `str`. +Erasing will remove the original data and replace it with a `*****`. This means you cannot recover erased data, and the data type will change to `str` for all data unless the data to be erased is of an Iterable type (`list`, `tuple`, `set`), in which case the method will return a new object of the same type as the input data but with each element replaced by the string `*****`. === "getting_started_erase_data.py" ```python hl_lines="4 8 17" @@ -134,7 +134,6 @@ Under the hood, we delegate a [number of operations](#encrypt-operation-with-enc ``` 1. You can use more than one KMS Key for higher availability but increased latency.

Encryption SDK will ensure the data key is encrypted with both keys. - 2. See [working with nested data](#working-with-nested-data) to learn more about the `fields` parameter.

If we omit `fields` parameter, the entire dictionary will be encrypted. === "generic_data_input.json" ```json hl_lines="7-9 14" @@ -165,7 +164,6 @@ Under the hood, we delegate a [number of operations](#decrypt-operation-with-enc 1. Note that KMS key alias or key ID won't work. 2. You can use more than one KMS Key for higher availability but increased latency.

Encryption SDK will call `Decrypt` API with all master keys when trying to decrypt the data key. - 3. See [working with nested data](#working-with-nested-data) to learn more about the `fields` parameter. === "encrypt_data_output.json" @@ -208,13 +206,12 @@ For a stronger security posture, you can add metadata to each encryption operati !!! note "We support `JSON` data types only - see [data serialization for more details](#data-serialization-and-preservation)." -You can use the `fields` parameter with dot notation `.` to choose one or more parts of your data to `erase`, `encrypt`, or `decrypt`. This is useful when you want to keep data structure intact except the confidential fields. +You can use the `fields` parameter with dot notation `.` to choose one or more parts of your data to `erase`. This is useful when you want to keep data structure intact except the confidential fields. -When `fields` is present, `erase` and `encrypt` behave differently: +When `fields` is present, `erase` behaves differently: | Operation | Behavior | Example | Obfuscated | | --------- | ----------------------------------------------------------- | ----------------------- | ------------------------------- | -| `encrypt` | Obfuscate entire data and replacing with ciphertext string. | `{"cards": ["a", "b"]}` | `{"cards": "ciphertext"}` | | `erase` | Replace data while keeping collections type intact. | `{"cards": ["a", "b"]}` | `{"cards": ["*****", "*****"]}` | Here are common scenarios to best visualize how to use `fields`. @@ -395,7 +392,8 @@ Note that the return will be a deserialized JSON and your desired fields updated ### Data serialization ???+ note "Current limitations" - 1. Python classes, `Dataclasses`, and `Pydantic models` are not supported yet. + 1. The `fields` parameter is currently only available to use with the `erase` method, with the potential for it to be added to the `encrypt` and `decrypt` methods in the future. + 2. Python classes, `Dataclasses`, and `Pydantic models` are not supported yet. Before we traverse the data structure, we perform two important operations on input data: @@ -418,12 +416,12 @@ For compatibility or performance, you can optionally pass your own JSON serializ You can modify the following values when initializing the `AWSEncryptionSDKProvider` to best accommodate your security and performance thresholds. -| Parameter | Required | Default | Description | -| -------------------------- | -------- | --------------------- | --------------------------------------------------------------------------------------------- | -| **local_cache_capacity** | | `100` | The maximum number of entries that can be retained in the local cryptographic materials cache | -| **max_cache_age_seconds** | | `300` | The maximum time (in seconds) that a cache entry may be kept in the cache | -| **max_messages_encrypted** | | `4294967296` | The maximum number of messages that may be encrypted under a cache entry | -| **max_bytes_encrypted** | | `9223372036854775807` | The maximum number of bytes that may be encrypted under a cache entry | +| Parameter | Default | Description | +| -------------------------- | --------------------- | --------------------------------------------------------------------------------------------- | +| **local_cache_capacity** | `100` | The maximum number of entries that can be retained in the local cryptographic materials cache | +| **max_cache_age_seconds** | `300` | The maximum time (in seconds) that a cache entry may be kept in the cache | +| **max_messages_encrypted** | `4294967296` | The maximum number of messages that may be encrypted under a cache entry | +| **max_bytes_encrypted** | `9223372036854775807` | The maximum number of bytes that may be encrypted under a cache entry | **Changing the default algorithm** diff --git a/examples/data_masking/src/choosing_payload_all_nested_keys.json b/examples/data_masking/src/choosing_payload_all_nested_keys.json index 8f0c3cd1e11..7fad154c03e 100644 --- a/examples/data_masking/src/choosing_payload_all_nested_keys.json +++ b/examples/data_masking/src/choosing_payload_all_nested_keys.json @@ -1,17 +1,17 @@ { - "name": "Leandro", + "name": "Carlos", "operation": "non sensitive", - "card_number": "1000 4444 333 2222", + "card_number": "1111 2222 3333 4444", "address": [ { - "postcode": 81847, - "street": "38986 Joanne Stravenue", + "postcode": 12345, + "street": "123 Any Street", "country": "United States", "timezone": "America/La_Paz" }, { - "postcode": 94400, - "street": "623 Kraig Mall", + "postcode": 67890, + "street": "100 Main Street", "country": "United States", "timezone": "America/Mazatlan" } diff --git a/examples/data_masking/src/choosing_payload_all_nested_keys_output.json b/examples/data_masking/src/choosing_payload_all_nested_keys_output.json index 214638b2056..a28bfee974e 100644 --- a/examples/data_masking/src/choosing_payload_all_nested_keys_output.json +++ b/examples/data_masking/src/choosing_payload_all_nested_keys_output.json @@ -1,7 +1,7 @@ { - "name": "Leandro", + "name": "Carlos", "operation": "non sensitive", - "card_number": "1000 4444 333 2222", + "card_number": "1111 2222 3333 4444", "address": [ "*****", "*****" diff --git a/examples/data_masking/src/choosing_payload_complex_search.json b/examples/data_masking/src/choosing_payload_complex_search.json index ebb371b8686..e8db38a79ad 100644 --- a/examples/data_masking/src/choosing_payload_complex_search.json +++ b/examples/data_masking/src/choosing_payload_complex_search.json @@ -1,19 +1,19 @@ { - "name": "Lessa", + "name": "Carlos", "operation": "non sensitive", - "card_number": "1000 4444 333 2222", + "card_number": "1111 2222 3333 4444", "address": [ { - "postcode": 81847, - "street": "38986 Joanne Stravenue" + "postcode": 12345, + "street": "123 Any Drive" }, { - "postcode": 91034, - "street": "14987 Avenue 1" + "postcode": 67890, + "street": "111 Main Street" }, { - "postcode": 78495, - "street": "34452 Avenue 10" + "postcode": 11111, + "street": "100 Any Street" } ] } diff --git a/examples/data_masking/src/choosing_payload_complex_search_output.json b/examples/data_masking/src/choosing_payload_complex_search_output.json index e186d0720ed..6198e27c09a 100644 --- a/examples/data_masking/src/choosing_payload_complex_search_output.json +++ b/examples/data_masking/src/choosing_payload_complex_search_output.json @@ -1,19 +1,19 @@ { - "name": "Lessa", + "name": "Carlos", "operation": "non sensitive", - "card_number": "1000 4444 333 2222", + "card_number": "1111 2222 3333 4444", "address": [ { - "postcode": 81847, + "postcode": 12345, "street": "*****" }, { - "postcode": 91034, + "postcode": 67890, "street": "*****" }, { - "postcode": 78495, - "street": "34452 Avenue 10" + "postcode": 11111, + "street": "100 Any Street" } ] } diff --git a/examples/data_masking/src/choosing_payload_list_all_index.json b/examples/data_masking/src/choosing_payload_list_all_index.json index d1ca2654ef6..670e3c420be 100644 --- a/examples/data_masking/src/choosing_payload_list_all_index.json +++ b/examples/data_masking/src/choosing_payload_list_all_index.json @@ -1,15 +1,15 @@ { - "name": "Lessa", + "name": "Carlos", "operation": "non sensitive", - "card_number": "1000 4444 333 2222", + "card_number": "1111 2222 3333 4444", "address": [ { - "postcode": 81847, - "street": "38986 Joanne Stravenue" + "postcode": 12345, + "street": "123 Any Drive" }, { - "postcode": 91034, - "street": "14987 Avenue 1" + "postcode": 67890, + "street": "100 Main Street," } ] } diff --git a/examples/data_masking/src/choosing_payload_list_all_index_output.json b/examples/data_masking/src/choosing_payload_list_all_index_output.json index e00d92728f7..8fb1f1b1c6d 100644 --- a/examples/data_masking/src/choosing_payload_list_all_index_output.json +++ b/examples/data_masking/src/choosing_payload_list_all_index_output.json @@ -1,15 +1,15 @@ { - "name": "Lessa", + "name": "Carlos", "operation": "non sensitive", - "card_number": "1000 4444 333 2222", + "card_number": "1111 2222 3333 4444", "address": [ { - "postcode": 81847, + "postcode": 12345, "street": "*****" }, { - "postcode": 91034, + "postcode": 67890, "street": "*****" } ] diff --git a/examples/data_masking/src/choosing_payload_list_index.json b/examples/data_masking/src/choosing_payload_list_index.json index d1ca2654ef6..0f543b42f5f 100644 --- a/examples/data_masking/src/choosing_payload_list_index.json +++ b/examples/data_masking/src/choosing_payload_list_index.json @@ -1,15 +1,15 @@ { - "name": "Lessa", + "name": "Carlos", "operation": "non sensitive", - "card_number": "1000 4444 333 2222", + "card_number": "1111 2222 3333 4444", "address": [ { - "postcode": 81847, - "street": "38986 Joanne Stravenue" + "postcode": 12345, + "street": "123 Any Street" }, { - "postcode": 91034, - "street": "14987 Avenue 1" + "postcode": 67890, + "street": "100 Main Street" } ] } diff --git a/examples/data_masking/src/choosing_payload_list_index_output.json b/examples/data_masking/src/choosing_payload_list_index_output.json index f780e8d16b9..1481d78f4b6 100644 --- a/examples/data_masking/src/choosing_payload_list_index_output.json +++ b/examples/data_masking/src/choosing_payload_list_index_output.json @@ -1,15 +1,15 @@ { - "name": "Lessa", + "name": "Carlos", "operation": "non sensitive", - "card_number": "1000 4444 333 2222", + "card_number": "1111 2222 3333 4444", "address": [ { - "postcode": 81847, - "street": "38986 Joanne Stravenue" + "postcode": 12345, + "street": "123 Any Street" }, { - "postcode": 91034, + "postcode": 67890, "street": "*****" } ] diff --git a/examples/data_masking/src/choosing_payload_list_slice.json b/examples/data_masking/src/choosing_payload_list_slice.json index ebb371b8686..c8a9f7f58af 100644 --- a/examples/data_masking/src/choosing_payload_list_slice.json +++ b/examples/data_masking/src/choosing_payload_list_slice.json @@ -1,19 +1,19 @@ { - "name": "Lessa", + "name": "Carlos", "operation": "non sensitive", - "card_number": "1000 4444 333 2222", + "card_number": "1111 2222 3333 4444", "address": [ { - "postcode": 81847, - "street": "38986 Joanne Stravenue" + "postcode": 12345, + "street": "123 Any Street" }, { - "postcode": 91034, - "street": "14987 Avenue 1" + "postcode": 67890, + "street": "100 Main Street" }, { "postcode": 78495, - "street": "34452 Avenue 10" + "street": "111 Any Drive" } ] } diff --git a/examples/data_masking/src/choosing_payload_list_slice_output.json b/examples/data_masking/src/choosing_payload_list_slice_output.json index 9a81d19b654..efab8b03400 100644 --- a/examples/data_masking/src/choosing_payload_list_slice_output.json +++ b/examples/data_masking/src/choosing_payload_list_slice_output.json @@ -1,18 +1,18 @@ { - "name": "Lessa", + "name": "Carlos", "operation": "non sensitive", - "card_number": "1000 4444 333 2222", + "card_number": "1111 2222 3333 4444", "address": [ { - "postcode": 81847, - "street": "38986 Joanne Stravenue" + "postcode": 12345, + "street": "123 Any Street" }, { - "postcode": 91034, - "street": "14987 Avenue 1" + "postcode": 67890, + "street": "100 Main Street" }, { - "postcode": 78495, + "postcode": 11111, "street": "*****" } ] diff --git a/examples/data_masking/src/choosing_payload_multiple_keys.json b/examples/data_masking/src/choosing_payload_multiple_keys.json index 91fcba832fc..640c274868e 100644 --- a/examples/data_masking/src/choosing_payload_multiple_keys.json +++ b/examples/data_masking/src/choosing_payload_multiple_keys.json @@ -1,9 +1,9 @@ { - "name": "Lessa", + "name": "Carlos", "operation": "non sensitive", - "card_number": "1000 4444 333 2222", + "card_number": "1111 2222 3333 4444", "address": { - "postcode": 81847, - "street": "38986 Joanne Stravenue" + "postcode": 12345, + "street": "123 Any Street" } } \ No newline at end of file diff --git a/examples/data_masking/src/choosing_payload_multiple_keys_output.json b/examples/data_masking/src/choosing_payload_multiple_keys_output.json index b638d74d80e..74454439767 100644 --- a/examples/data_masking/src/choosing_payload_multiple_keys_output.json +++ b/examples/data_masking/src/choosing_payload_multiple_keys_output.json @@ -1,7 +1,7 @@ { - "name": "Lessa", + "name": "card_number", "operation": "non sensitive", - "card_number": "1000 4444 333 2222", + "card_number": "1111 2222 3333 4444", "address": { "postcode": "*****", "street": "*****" diff --git a/examples/data_masking/src/choosing_payload_nested_key.json b/examples/data_masking/src/choosing_payload_nested_key.json index 6c606fdd89f..e3ff995026f 100644 --- a/examples/data_masking/src/choosing_payload_nested_key.json +++ b/examples/data_masking/src/choosing_payload_nested_key.json @@ -1,8 +1,8 @@ { - "name": "Lessa", + "name": "Carlos", "operation": "non sensitive", - "card_number": "1000 4444 333 2222", + "card_number": "1111 2222 3333 4444", "address": { - "postcode": 81847 + "postcode": 12345 } } \ No newline at end of file diff --git a/examples/data_masking/src/choosing_payload_nested_key_output.json b/examples/data_masking/src/choosing_payload_nested_key_output.json index 946535f6094..463f5a943f3 100644 --- a/examples/data_masking/src/choosing_payload_nested_key_output.json +++ b/examples/data_masking/src/choosing_payload_nested_key_output.json @@ -1,7 +1,7 @@ { - "name": "Lessa", + "name": "Carlos", "operation": "non sensitive", - "card_number": "1000 4444 333 2222", + "card_number": "1111 2222 3333 4444", "address": { "postcode": "*****" } diff --git a/examples/data_masking/src/choosing_payload_simple_json.json b/examples/data_masking/src/choosing_payload_simple_json.json index daff4925f4e..057d43087f0 100644 --- a/examples/data_masking/src/choosing_payload_simple_json.json +++ b/examples/data_masking/src/choosing_payload_simple_json.json @@ -1 +1 @@ -'{"name": "Ruben", "operation": "non sensitive", "card_number": "1000 4444 333 2222", "address": {"postcode": 81847}}' \ No newline at end of file +'{"name": "Carlos", "operation": "non sensitive", "card_number": "1111 2222 3333 4444", "address": {"postcode": 12345}}' \ No newline at end of file diff --git a/examples/data_masking/src/choosing_payload_simple_json_output.json b/examples/data_masking/src/choosing_payload_simple_json_output.json index 52fc8b3cc5a..b8920dc9696 100644 --- a/examples/data_masking/src/choosing_payload_simple_json_output.json +++ b/examples/data_masking/src/choosing_payload_simple_json_output.json @@ -1,5 +1,5 @@ { - "name": "Ruben", + "name": "Carlos", "operation": "non sensitive", "card_number": "*****", "address": { diff --git a/examples/data_masking/src/choosing_payload_top_keys.json b/examples/data_masking/src/choosing_payload_top_keys.json index e7b5821f3d8..dce6ed78780 100644 --- a/examples/data_masking/src/choosing_payload_top_keys.json +++ b/examples/data_masking/src/choosing_payload_top_keys.json @@ -1,5 +1,5 @@ { - "name": "Lessa", + "name": "Carlos", "operation": "non sensitive", - "card_number": "1000 4444 333 2222" + "card_number": "1111 2222 3333 4444" } \ No newline at end of file diff --git a/examples/data_masking/src/choosing_payload_top_keys_output.json b/examples/data_masking/src/choosing_payload_top_keys_output.json index 79fd1407916..c7d877cb804 100644 --- a/examples/data_masking/src/choosing_payload_top_keys_output.json +++ b/examples/data_masking/src/choosing_payload_top_keys_output.json @@ -1,5 +1,5 @@ { - "name": "Lessa", + "name": "Carlos", "operation": "non sensitive", "card_number": "*****" } \ No newline at end of file From 62e1abae1e22aa045ada12e8a2683e5607a0132e Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Tue, 30 Jan 2024 15:12:49 -0800 Subject: [PATCH 136/151] Docs cleanup --- docs/utilities/data_masking.md | 18 +++++++++--------- .../choosing_payload_multiple_keys_output.json | 2 +- .../data_masking/src/encrypt_data_output.json | 4 +++- .../src/getting_started_decrypt_data.py | 2 +- .../src/getting_started_encrypt_data.py | 2 +- .../data_masking/tests/test_lambda_mask.py | 2 +- 6 files changed, 16 insertions(+), 14 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index f685b59ae82..ab178fcbc67 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -38,14 +38,14 @@ stateDiagram-v2 ## Key features * Encrypt, decrypt, or irreversibly erase data with ease -* Remove sensitive information in one or more fields within nested data +* Erase sensitive information in one or more fields within nested data * Seamless integration with [AWS Encryption SDK](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/introduction.html){target="_blank"} for industry and AWS security best practices ## Terminology **Erasing** replaces sensitive information **irreversibly** with a non-sensitive placeholder _(`*****`)_. This operation replaces data in-memory, making it a one-way action. -**Encrypting** transforms plaintext into ciphertext using an encryption algorithm and a cryptographic key. It allows you to encrypt any sensitive data, so only allowed personnel to decrypt it. +**Encrypting** transforms plaintext into ciphertext using an encryption algorithm and a cryptographic key. It allows you to encrypt any sensitive data, so only allowed personnel to decrypt it. Learn more about encryption [here](https://aws.amazon.com/blogs/security/importance-of-encryption-and-how-aws-can-help/){target="_blank"}. **Decrypting** transforms ciphertext back into plaintext using a decryption algorithm and the correct decryption key. @@ -179,16 +179,16 @@ Under the hood, we delegate a [number of operations](#decrypt-operation-with-enc ### Encryption context for integrity and authenticity -For a stronger security posture, you can add metadata to each encryption operation, and verify them during decryption. This is known as additional authenticated data (AAD). These are non-sensitive data that can help protect authenticity and integrity of your encrypted data. +For a stronger security posture, you can add metadata to each encryption operation, and verify them during decryption. This is known as additional authenticated data (AAD). These are non-sensitive data that can help protect authenticity and integrity of your encrypted data, and even help to prevent a [confused deputy](https://docs.aws.amazon.com/IAM/latest/UserGuide/confused-deputy.html) situation. ???+ danger "Important considerations you should know" 1. **Exact match verification on decrypt**. Be careful using random data like `timestamps` as encryption context if you can't provide them on decrypt. 2. **Only `string` values are supported**. We will raise `DataMaskingUnsupportedTypeError` for non-string values. - 3. **Use non-sensitive data only**. When using KMS, encryption context is available as plaintext in AWS CloudTrail. Unless you [intentionally disabled KMS events](https://docs.aws.amazon.com/kms/latest/developerguide/logging-using-cloudtrail.html#filtering-kms-events){target="_blank"}. + 3. **Use non-sensitive data only**. When using KMS, encryption context is available as plaintext in AWS CloudTrail, unless you [intentionally disabled KMS events](https://docs.aws.amazon.com/kms/latest/developerguide/logging-using-cloudtrail.html#filtering-kms-events){target="_blank"}. === "getting_started_encryption_context.py" - ```python hl_lines="27-29" + ```python hl_lines="26-28" --8<-- "examples/data_masking/src/getting_started_encryption_context.py" ``` @@ -348,13 +348,13 @@ Here are common scenarios to best visualize how to use `fields`. === "Data" - > Expression: `data_masker.erase(data, fields=["$.address[?(@.postcode > 81846)]"])` + > Expression: `data_masker.erase(data, fields=["$.address[?(@.postcode > 12000)]"])` > `$`: Represents the root of the JSON structure. > `.address`: Selects the "address" property within the JSON structure. - > `(@.postcode > 81846)`: Specifies the condition that elements should meet. It selects elements where the value of the `postcode` property is `greater than 81846`. + > `(@.postcode > 12000)`: Specifies the condition that elements should meet. It selects elements where the value of the `postcode` property is `greater than 12000`. ```json hl_lines="8 12" --8<-- "examples/data_masking/src/choosing_payload_complex_search.json" @@ -406,7 +406,7 @@ For compatibility or performance, you can optionally pass your own JSON serializ === "advanced_custom_serializer.py" - ```python hl_lines="16" + ```python hl_lines="17-18" --8<-- "examples/data_masking/src/advanced_custom_serializer.py" ``` @@ -429,7 +429,7 @@ The AWS Encryption SDK defaults to using the `AES_256_GCM_HKDF_SHA512_COMMIT_KEY === "changing_default_algorithm.py" - ```python hl_lines="5 29" + ```python hl_lines="5 26" --8<-- "examples/data_masking/src/changing_default_algorithm.py" ``` diff --git a/examples/data_masking/src/choosing_payload_multiple_keys_output.json b/examples/data_masking/src/choosing_payload_multiple_keys_output.json index 74454439767..fca3391f2f4 100644 --- a/examples/data_masking/src/choosing_payload_multiple_keys_output.json +++ b/examples/data_masking/src/choosing_payload_multiple_keys_output.json @@ -1,5 +1,5 @@ { - "name": "card_number", + "name": "Carlos", "operation": "non sensitive", "card_number": "1111 2222 3333 4444", "address": { diff --git a/examples/data_masking/src/encrypt_data_output.json b/examples/data_masking/src/encrypt_data_output.json index eb00438d88f..06e32c83804 100644 --- a/examples/data_masking/src/encrypt_data_output.json +++ b/examples/data_masking/src/encrypt_data_output.json @@ -1 +1,3 @@ -{"body": "AgV4uF5K2YMtNhYrtviTwKNrUHhqQr73l/jNfukkh+qLOC8AXwABABVhd3MtY3J5cHRvLXB1YmxpYy1rZXkAREEvcjEyaFZHY1R5cjJuTDNKbTJ3UFA3R3ZjaytIdi9hekZqbXVUb25Ya3J5SzFBOUlJZDZxZXpSR1NTVnZDUUxoZz09AAEAB2F3cy1rbXMAS2Fybjphd3M6a21zOnVzLWVhc3QtMToyMDA5ODQxMTIzODY6a2V5LzZkODJiMzRlLTM2NjAtNDRlMi04YWJiLTdmMzA1OGJlYTIxMgC4AQIBAHjxYXAO7wQGd+7qxoyvXAajwqboF5FL/9lgYUNJTB8VtAHBP2hwVgw+zypp7GoMNTPAAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMx/B25MTgWwpL7CmuAgEQgDtan3orAOKFUfyNm3v6rFcglb+BVVVDV71fj4aRljhpg1ixsYFaKsoej8NcwRktIiWE+mw9XmTEVb6xFQIAABAA9DeLzlRaRQgTcXMJG0iBu/YTyyDKiROD+bU1Y09X9RBz5LA1nWIENJKq2seAhNSB/////wAAAAEAAAAAAAAAAAAAAAEAAAEBExLJ9wI4n7t+wyPEEP4kjYFBdkmNuLLsVC2Yt8mv9Y1iH2G+/g9SaIcdK57pkoW0ECpBxZVOxCuhmK2s74AJCUdem9McjS1waUKyzYTi9vv2ySNBsABIDwT990rE7jZJ3tEZAqcWZg/eWlxvnksFR/akBWZKsKzFz6lF57+cTgdISCEJRV0E7fcUeCuaMaQGK1Qw2OCmIeHEG5j5iztBkZG2IB2CVND/AbxmDUFHwgjsrJPTzaDYSufcGMoZW1A9X1sLVfqNVKvnOFP5tNY7kPF5eAI9FhGBw8SjTqODXz4k6zuqzy9no8HtXowP265U8NZ5VbVTd/zuVEbZyK5KBqzP1sExW4RhnlpXMoOs9WSuAGcwZQIxANTeEwb9V7CacV2Urt/oCqysUzhoV2AcT2ZjryFqY79Tsg+FRpIx7cBizL4ieRzbhQIwcRasNncO5OZOcmVr0MqHv+gCVznndMgjXJmWwUa7h6skJKmhhMPlN0CsugxtVWnD"} +{ + "body": "AgV4uF5K2YMtNhYrtviTwKNrUHhqQr73l/jNfukkh+qLOC8AXwABABVhd3MtY3J5cHRvLXB1YmxpYy1rZXkAREEvcjEyaFZHY1R5cjJuTDNKbTJ3UFA3R3ZjaytIdi9hekZqbXVUb25Ya3J5SzFBOUlJZDZxZXpSR1NTVnZDUUxoZz09AAEAB2F3cy1rbXMAS2Fybjphd3M6a21zOnVzLWVhc3QtMToyMDA5ODQxMTIzODY6a2V5LzZkODJiMzRlLTM2NjAtNDRlMi04YWJiLTdmMzA1OGJlYTIxMgC4AQIBAHjxYXAO7wQGd+7qxoyvXAajwqboF5FL/9lgYUNJTB8VtAHBP2hwVgw+zypp7GoMNTPAAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMx/B25MTgWwpL7CmuAgEQgDtan3orAOKFUfyNm3v6rFcglb+BVVVDV71fj4aRljhpg1ixsYFaKsoej8NcwRktIiWE+mw9XmTEVb6xFQIAABAA9DeLzlRaRQgTcXMJG0iBu/YTyyDKiROD+bU1Y09X9RBz5LA1nWIENJKq2seAhNSB/////wAAAAEAAAAAAAAAAAAAAAEAAAEBExLJ9wI4n7t+wyPEEP4kjYFBdkmNuLLsVC2Yt8mv9Y1iH2G+/g9SaIcdK57pkoW0ECpBxZVOxCuhmK2s74AJCUdem9McjS1waUKyzYTi9vv2ySNBsABIDwT990rE7jZJ3tEZAqcWZg/eWlxvnksFR/akBWZKsKzFz6lF57+cTgdISCEJRV0E7fcUeCuaMaQGK1Qw2OCmIeHEG5j5iztBkZG2IB2CVND/AbxmDUFHwgjsrJPTzaDYSufcGMoZW1A9X1sLVfqNVKvnOFP5tNY7kPF5eAI9FhGBw8SjTqODXz4k6zuqzy9no8HtXowP265U8NZ5VbVTd/zuVEbZyK5KBqzP1sExW4RhnlpXMoOs9WSuAGcwZQIxANTeEwb9V7CacV2Urt/oCqysUzhoV2AcT2ZjryFqY79Tsg+FRpIx7cBizL4ieRzbhQIwcRasNncO5OZOcmVr0MqHv+gCVznndMgjXJmWwUa7h6skJKmhhMPlN0CsugxtVWnD" +} diff --git a/examples/data_masking/src/getting_started_decrypt_data.py b/examples/data_masking/src/getting_started_decrypt_data.py index 1c1a3d92d6d..d8e746a8dfe 100644 --- a/examples/data_masking/src/getting_started_decrypt_data.py +++ b/examples/data_masking/src/getting_started_decrypt_data.py @@ -21,6 +21,6 @@ def lambda_handler(event: dict, context: LambdaContext) -> dict: logger.info("Decrypting whole object") - decrypted = data_masker.decrypt(data) # (3)! + decrypted = data_masker.decrypt(data) return decrypted diff --git a/examples/data_masking/src/getting_started_encrypt_data.py b/examples/data_masking/src/getting_started_encrypt_data.py index 795b22b9a00..579170113dd 100644 --- a/examples/data_masking/src/getting_started_encrypt_data.py +++ b/examples/data_masking/src/getting_started_encrypt_data.py @@ -23,6 +23,6 @@ def lambda_handler(event: dict, context: LambdaContext) -> dict: logger.info("Encrypting the whole object") - encrypted = data_masker.encrypt(data) # (2)! + encrypted = data_masker.encrypt(data) return {"body": encrypted} diff --git a/examples/data_masking/tests/test_lambda_mask.py b/examples/data_masking/tests/test_lambda_mask.py index 47e0f09cfde..596f065b380 100644 --- a/examples/data_masking/tests/test_lambda_mask.py +++ b/examples/data_masking/tests/test_lambda_mask.py @@ -10,7 +10,7 @@ def lambda_context(): class LambdaContext: function_name: str = "test" memory_limit_in_mb: int = 128 - invoked_function_arn: str = "arn:aws:lambda:eu-west-1:809313241:function:test" + invoked_function_arn: str = "arn:aws:lambda:eu-west-1:111111111:function:test" aws_request_id: str = "52fdfc07-2182-154f-163f-5f0f9a621d72" def get_remaining_time_in_millis(self) -> int: From 5ca7f991801f6ccb1343051df16d51bcb10957a4 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Tue, 30 Jan 2024 15:23:44 -0800 Subject: [PATCH 137/151] Revise docs --- docs/utilities/data_masking.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index ab178fcbc67..ed596ee11d1 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -206,6 +206,9 @@ For a stronger security posture, you can add metadata to each encryption operati !!! note "We support `JSON` data types only - see [data serialization for more details](#data-serialization-and-preservation)." +???+ note "Current limitations" + 1. The `fields` parameter is currently only available to use with the `erase` method, with the potential for it to be added to the `encrypt` and `decrypt` methods in the future. + You can use the `fields` parameter with dot notation `.` to choose one or more parts of your data to `erase`. This is useful when you want to keep data structure intact except the confidential fields. When `fields` is present, `erase` behaves differently: @@ -392,8 +395,7 @@ Note that the return will be a deserialized JSON and your desired fields updated ### Data serialization ???+ note "Current limitations" - 1. The `fields` parameter is currently only available to use with the `erase` method, with the potential for it to be added to the `encrypt` and `decrypt` methods in the future. - 2. Python classes, `Dataclasses`, and `Pydantic models` are not supported yet. + 1. Python classes, `Dataclasses`, and `Pydantic models` are not supported yet. Before we traverse the data structure, we perform two important operations on input data: From f116d3c6f608d58779adcfa8a67b491cdca3b885 Mon Sep 17 00:00:00 2001 From: seshubaws <116689586+seshubaws@users.noreply.github.com> Date: Wed, 31 Jan 2024 08:37:00 -0800 Subject: [PATCH 138/151] Update docs/utilities/data_masking.md Co-authored-by: Heitor Lessa Signed-off-by: seshubaws <116689586+seshubaws@users.noreply.github.com> --- docs/utilities/data_masking.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index ed596ee11d1..ccd3abf563d 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -209,7 +209,7 @@ For a stronger security posture, you can add metadata to each encryption operati ???+ note "Current limitations" 1. The `fields` parameter is currently only available to use with the `erase` method, with the potential for it to be added to the `encrypt` and `decrypt` methods in the future. -You can use the `fields` parameter with dot notation `.` to choose one or more parts of your data to `erase`. This is useful when you want to keep data structure intact except the confidential fields. +You can use the `fields` parameter with the dot notation `.` to choose one or more parts of your data to `erase`. This is useful when you want to keep data structure intact except the confidential fields. When `fields` is present, `erase` behaves differently: From 839755aa5b9fcea34601ffe72e8ac2a4aa04dc55 Mon Sep 17 00:00:00 2001 From: seshubaws <116689586+seshubaws@users.noreply.github.com> Date: Wed, 31 Jan 2024 08:38:17 -0800 Subject: [PATCH 139/151] Update docs/utilities/data_masking.md Co-authored-by: Heitor Lessa Signed-off-by: seshubaws <116689586+seshubaws@users.noreply.github.com> --- docs/utilities/data_masking.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index ccd3abf563d..e4c558fc402 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -213,7 +213,7 @@ You can use the `fields` parameter with the dot notation `.` to choose one or mo When `fields` is present, `erase` behaves differently: -| Operation | Behavior | Example | Obfuscated | +| Operation | Behavior | Example | Result | | --------- | ----------------------------------------------------------- | ----------------------- | ------------------------------- | | `erase` | Replace data while keeping collections type intact. | `{"cards": ["a", "b"]}` | `{"cards": ["*****", "*****"]}` | From 2a38326f07e16703cbb1f9bfdc398474f225cb55 Mon Sep 17 00:00:00 2001 From: seshubaws <116689586+seshubaws@users.noreply.github.com> Date: Wed, 31 Jan 2024 08:42:25 -0800 Subject: [PATCH 140/151] Update docs/utilities/data_masking.md Co-authored-by: Heitor Lessa Signed-off-by: seshubaws <116689586+seshubaws@users.noreply.github.com> --- docs/utilities/data_masking.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index e4c558fc402..897b02847ec 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -221,7 +221,7 @@ Here are common scenarios to best visualize how to use `fields`. === "Top keys only" - You want to obfuscate data in the `card_number` field. + You want to erase data in the `card_number` field. === "Data" @@ -239,7 +239,7 @@ Here are common scenarios to best visualize how to use `fields`. === "Nested key" - You want to obfuscate data in the `postcode` field. + You want to erase data in the `postcode` field. === "Data" @@ -257,7 +257,7 @@ Here are common scenarios to best visualize how to use `fields`. === "Multiple keys" - You want to obfuscate data in both `postcode` and `street` fields. + You want to erase data in both `postcode` and `street` fields. === "Data" @@ -275,7 +275,7 @@ Here are common scenarios to best visualize how to use `fields`. === "All key items" - You want to obfuscate data under `address` field. + You want to erase data under `address` field. === "Data" @@ -293,7 +293,7 @@ Here are common scenarios to best visualize how to use `fields`. === "Complex nested key" - You want to obfuscate data under `name` field. + You want to erase data under `name` field. === "Data" @@ -311,7 +311,7 @@ Here are common scenarios to best visualize how to use `fields`. === "All fields in a list" - You want to obfuscate data under `street` field located at the any index of the address list. + You want to erase data under `street` field located at the any index of the address list. === "Data" @@ -329,7 +329,7 @@ Here are common scenarios to best visualize how to use `fields`. === "Slicing a list" - You want to obfuscate data by slicing a list. + You want to erase data by slicing a list. === "Data" @@ -347,7 +347,7 @@ Here are common scenarios to best visualize how to use `fields`. === "Complex expressions" - You want to obfuscate data by finding for a field with conditional expression. + You want to erase data by finding for a field with conditional expression. === "Data" @@ -368,6 +368,7 @@ Here are common scenarios to best visualize how to use `fields`. ```json hl_lines="8 12" --8<-- "examples/data_masking/src/choosing_payload_complex_search_output.json" ``` + For comprehensive guidance on using JSONPath syntax, please refer to the official documentation available at [jsonpath-ng](https://github.com/h2non/jsonpath-ng#jsonpath-syntax){target="_blank" rel="nofollow"} #### JSON From a6a975c56b7ff366b9cf4b4b88dceba0bebecd75 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Wed, 31 Jan 2024 11:59:28 -0800 Subject: [PATCH 141/151] Added data masking to features lists, removed unnecessary tabs --- README.md | 1 + docs/index.md | 1 + docs/utilities/data_masking.md | 72 ++++++++++--------- .../src/aws_encryption_provider_example.py | 34 +++++++++ 4 files changed, 74 insertions(+), 34 deletions(-) create mode 100644 examples/data_masking/src/aws_encryption_provider_example.py diff --git a/README.md b/README.md index d230c31906e..d3f0ec30603 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ Powertools for AWS Lambda (Python) is a developer toolkit to implement Serverles * **[Event source data classes](https://docs.powertools.aws.dev/lambda/python/latest/utilities/data_classes/)** - Data classes describing the schema of common Lambda event triggers * **[Parser](https://docs.powertools.aws.dev/lambda/python/latest/utilities/parser/)** - Data parsing and deep validation using Pydantic * **[Idempotency](https://docs.powertools.aws.dev/lambda/python/latest/utilities/idempotency/)** - Convert your Lambda functions into idempotent operations which are safe to retry +* **[Data Masking](https://docs.powertools.aws.dev/lambda/python/latest/utilities/data_masking/)** - Protect confidential data with easy removal or encryption * **[Feature Flags](https://docs.powertools.aws.dev/lambda/python/latest/utilities/feature_flags/)** - A simple rule engine to evaluate when one or multiple features should be enabled depending on the input * **[Streaming](https://docs.powertools.aws.dev/lambda/python/latest/utilities/streaming/)** - Streams datasets larger than the available memory as streaming data. diff --git a/docs/index.md b/docs/index.md index 7f1ca98fb74..b13bbc122d8 100644 --- a/docs/index.md +++ b/docs/index.md @@ -701,6 +701,7 @@ Core utilities such as Tracing, Logging, Metrics, and Event Handler will be avai | [**Event source data classes**](./utilities/data_classes.md){target="_blank"} | Data classes describing the schema of common Lambda event triggers | | [**Parser**](./utilities/parser.md){target="_blank"} | Data parsing and deep validation using Pydantic | | [**Idempotency**](./utilities/idempotency.md){target="_blank"} | Idempotent Lambda handler | +| [**Data Masking**](./utilities/data_masking.md){target="_blank"} | Protect confidential data with easy removal or encryption | | [**Feature Flags**](./utilities/feature_flags.md){target="_blank"} | A simple rule engine to evaluate when one or multiple features should be enabled depending on the input | | [**Streaming**](./utilities/streaming.md){target="_blank"} | Streams datasets larger than the available memory as streaming data. | diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 897b02847ec..086898b7e31 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -109,14 +109,14 @@ Erasing will remove the original data and replace it with a `*****`. This means 1. See [working with nested data](#working-with-nested-data) to learn more about the `fields` parameter.

If we omit `fields` parameter, the entire dictionary will be erased with `*****`. === "generic_data_input.json" - ```json hl_lines="7 9 14" - --8<-- "examples/data_masking/src/generic_data_input.json" - ``` +```json hl_lines="7 9 14" +--8<-- "examples/data_masking/src/generic_data_input.json" +``` === "getting_started_erase_data_output.json" - ```json hl_lines="5 7 12" - --8<-- "examples/data_masking/src/getting_started_erase_data_output.json" - ``` +```json hl_lines="5 7 12" +--8<-- "examples/data_masking/src/getting_started_erase_data_output.json" +``` ### Encrypting data @@ -128,7 +128,6 @@ To encrypt, you will need an [encryption provider](#providers). Here, we will us Under the hood, we delegate a [number of operations](#encrypt-operation-with-encryption-sdk-kms) to AWS Encryption SDK to authenticate, create a portable encryption message, and actual data encryption. === "getting_started_encrypt_data.py" - ```python hl_lines="6-8 14-15 26" --8<-- "examples/data_masking/src/getting_started_encrypt_data.py" ``` @@ -136,14 +135,14 @@ Under the hood, we delegate a [number of operations](#encrypt-operation-with-enc 1. You can use more than one KMS Key for higher availability but increased latency.

Encryption SDK will ensure the data key is encrypted with both keys. === "generic_data_input.json" - ```json hl_lines="7-9 14" - --8<-- "examples/data_masking/src/generic_data_input.json" - ``` +```json hl_lines="7-9 14" +--8<-- "examples/data_masking/src/generic_data_input.json" +``` === "encrypt_data_output.json" - ```json hl_lines="5-7 12" - --8<-- "examples/data_masking/src/encrypt_data_output.json" - ``` +```json hl_lines="5-7 12" +--8<-- "examples/data_masking/src/encrypt_data_output.json" +``` ### Decrypting data @@ -167,15 +166,15 @@ Under the hood, we delegate a [number of operations](#decrypt-operation-with-enc === "encrypt_data_output.json" - ```json hl_lines="5-7 12" - --8<-- "examples/data_masking/src/encrypt_data_output.json" - ``` +```json hl_lines="5-7 12" +--8<-- "examples/data_masking/src/encrypt_data_output.json" +``` === "getting_started_decrypt_data_output.json" - ```json hl_lines="5-7 12-17" - --8<-- "examples/data_masking/src/getting_started_decrypt_data_output.json" - ``` +```json hl_lines="5-7 12-17" +--8<-- "examples/data_masking/src/getting_started_decrypt_data_output.json" +``` ### Encryption context for integrity and authenticity @@ -204,10 +203,9 @@ For a stronger security posture, you can add metadata to each encryption operati ### Choosing parts of your data -!!! note "We support `JSON` data types only - see [data serialization for more details](#data-serialization-and-preservation)." - ???+ note "Current limitations" - 1. The `fields` parameter is currently only available to use with the `erase` method, with the potential for it to be added to the `encrypt` and `decrypt` methods in the future. + 1. The `fields` parameter is currently exclusive to the `erase` method, with potential future inclusion into `encrypt` and `decrypt`. + 2. We support `JSON` data types only - see [data serialization for more details](#data-serialization-and-preservation)." You can use the `fields` parameter with the dot notation `.` to choose one or more parts of your data to `erase`. This is useful when you want to keep data structure intact except the confidential fields. @@ -409,9 +407,9 @@ For compatibility or performance, you can optionally pass your own JSON serializ === "advanced_custom_serializer.py" - ```python hl_lines="17-18" - --8<-- "examples/data_masking/src/advanced_custom_serializer.py" - ``` +```python hl_lines="17-18" +--8<-- "examples/data_masking/src/advanced_custom_serializer.py" +``` ### Providers @@ -419,6 +417,12 @@ For compatibility or performance, you can optionally pass your own JSON serializ You can modify the following values when initializing the `AWSEncryptionSDKProvider` to best accommodate your security and performance thresholds. +=== "aws_encryption_provider_example.py" + +```python hl_lines="14-19" +--8<-- "examples/data_masking/src/aws_encryption_provider_example.py" +``` + | Parameter | Default | Description | | -------------------------- | --------------------- | --------------------------------------------------------------------------------------------- | | **local_cache_capacity** | `100` | The maximum number of entries that can be retained in the local cryptographic materials cache | @@ -432,9 +436,9 @@ The AWS Encryption SDK defaults to using the `AES_256_GCM_HKDF_SHA512_COMMIT_KEY === "changing_default_algorithm.py" - ```python hl_lines="5 26" - --8<-- "examples/data_masking/src/changing_default_algorithm.py" - ``` +```python hl_lines="5 26" +--8<-- "examples/data_masking/src/changing_default_algorithm.py" +``` ### Data masking request flow @@ -578,11 +582,11 @@ sequenceDiagram Testing your code with a simple erase operation === "test_lambda_mask.py" - ```python hl_lines="22" - --8<-- "examples/data_masking/tests/test_lambda_mask.py" - ``` +```python hl_lines="22" +--8<-- "examples/data_masking/tests/test_lambda_mask.py" +``` === "lambda_mask.py" - ```python hl_lines="3 12" - --8<-- "examples/data_masking/tests/lambda_mask.py" - ``` +```python hl_lines="3 12" +--8<-- "examples/data_masking/tests/lambda_mask.py" +``` diff --git a/examples/data_masking/src/aws_encryption_provider_example.py b/examples/data_masking/src/aws_encryption_provider_example.py new file mode 100644 index 00000000000..2ef34a82934 --- /dev/null +++ b/examples/data_masking/src/aws_encryption_provider_example.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +import os + +from aws_lambda_powertools import Logger +from aws_lambda_powertools.utilities.data_masking import DataMasking +from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import ( + AWSEncryptionSDKProvider, +) +from aws_lambda_powertools.utilities.typing import LambdaContext + +KMS_KEY_ARN = os.getenv("KMS_KEY_ARN", "") + +encryption_provider = AWSEncryptionSDKProvider( + keys=[KMS_KEY_ARN], + local_cache_capacity=200, + max_cache_age_seconds=400, + max_messages_encrypted=200, + max_bytes_encrypted=2000) + +data_masker = DataMasking(provider=encryption_provider) + +logger = Logger() + + +@logger.inject_lambda_context +def lambda_handler(event: dict, context: LambdaContext) -> dict: + data: dict = event.get("body", {}) + + logger.info("Encrypting the whole object") + + encrypted = data_masker.encrypt(data) + + return {"body": encrypted} From 4156b3deec9f4314ec3482f9fa0e802deb826bec Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Wed, 31 Jan 2024 12:34:11 -0800 Subject: [PATCH 142/151] Made passing sdk args section more general --- docs/utilities/data_masking.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 086898b7e31..8217ee8631c 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -178,7 +178,7 @@ Under the hood, we delegate a [number of operations](#decrypt-operation-with-enc ### Encryption context for integrity and authenticity -For a stronger security posture, you can add metadata to each encryption operation, and verify them during decryption. This is known as additional authenticated data (AAD). These are non-sensitive data that can help protect authenticity and integrity of your encrypted data, and even help to prevent a [confused deputy](https://docs.aws.amazon.com/IAM/latest/UserGuide/confused-deputy.html) situation. +For a stronger security posture, you can add metadata to each encryption operation, and verify them during decryption. This is known as additional authenticated data (AAD). These are non-sensitive data that can help protect authenticity and integrity of your encrypted data, and even help to prevent a [confused deputy](https://docs.aws.amazon.com/IAM/latest/UserGuide/confused-deputy.html){target="_blank"} situation. ???+ danger "Important considerations you should know" 1. **Exact match verification on decrypt**. Be careful using random data like `timestamps` as encryption context if you can't provide them on decrypt. @@ -430,9 +430,11 @@ You can modify the following values when initializing the `AWSEncryptionSDKProvi | **max_messages_encrypted** | `4294967296` | The maximum number of messages that may be encrypted under a cache entry | | **max_bytes_encrypted** | `9223372036854775807` | The maximum number of bytes that may be encrypted under a cache entry | -**Changing the default algorithm** +**Passing additional SDK arguments** -The AWS Encryption SDK defaults to using the `AES_256_GCM_HKDF_SHA512_COMMIT_KEY_ECDSA_P384` algorithm for encrypting your Data Key. If you want, you have the flexibility to customize and choose a different encryption algorithm. +You can pass additional arguments to the `AWSEncryptionSDKProvider` via the `provider_options` parameter. To learn more about the different arguments you can give to the SDK, see the [EncryptionSDKClient's documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.html#aws_encryption_sdk.EncryptionSDKClient.encrypt){target="_blank"}. + +For example, the AWS Encryption SDK defaults to using the `AES_256_GCM_HKDF_SHA512_COMMIT_KEY_ECDSA_P384` algorithm for encrypting your Data Key. If you want, you have the flexibility to customize and choose a different encryption algorithm. === "changing_default_algorithm.py" From f0cc727abe1ddd44c9f96633a8d33e4811849ac2 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Wed, 31 Jan 2024 15:28:37 -0800 Subject: [PATCH 143/151] Added using multiple keys section --- docs/utilities/data_masking.md | 24 ++++++++++----- .../src/changing_default_algorithm.py | 4 +-- .../data_masking/src/using_multiple_keys.py | 29 +++++++++++++++++++ 3 files changed, 48 insertions(+), 9 deletions(-) create mode 100644 examples/data_masking/src/using_multiple_keys.py diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 8217ee8631c..9e65d0b4c89 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -417,12 +417,6 @@ For compatibility or performance, you can optionally pass your own JSON serializ You can modify the following values when initializing the `AWSEncryptionSDKProvider` to best accommodate your security and performance thresholds. -=== "aws_encryption_provider_example.py" - -```python hl_lines="14-19" ---8<-- "examples/data_masking/src/aws_encryption_provider_example.py" -``` - | Parameter | Default | Description | | -------------------------- | --------------------- | --------------------------------------------------------------------------------------------- | | **local_cache_capacity** | `100` | The maximum number of entries that can be retained in the local cryptographic materials cache | @@ -430,6 +424,12 @@ You can modify the following values when initializing the `AWSEncryptionSDKProvi | **max_messages_encrypted** | `4294967296` | The maximum number of messages that may be encrypted under a cache entry | | **max_bytes_encrypted** | `9223372036854775807` | The maximum number of bytes that may be encrypted under a cache entry | +=== "aws_encryption_provider_example.py" + +```python hl_lines="14-19" +--8<-- "examples/data_masking/src/aws_encryption_provider_example.py" +``` + **Passing additional SDK arguments** You can pass additional arguments to the `AWSEncryptionSDKProvider` via the `provider_options` parameter. To learn more about the different arguments you can give to the SDK, see the [EncryptionSDKClient's documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.html#aws_encryption_sdk.EncryptionSDKClient.encrypt){target="_blank"}. @@ -438,10 +438,20 @@ For example, the AWS Encryption SDK defaults to using the `AES_256_GCM_HKDF_SHA5 === "changing_default_algorithm.py" -```python hl_lines="5 26" +```python hl_lines="5 26 30" --8<-- "examples/data_masking/src/changing_default_algorithm.py" ``` +**Using multiple keys** + +The `AWSEncryptionSDKProvider` allows you to instantiate it with several KMS keys by passing them all in a `list` to the `keys` parameter. This could be beneficial if you own keys in different regions, enabling you to perform cross-regional encryption and decryption. + +=== "using_multiple_keys.py" + +```python hl_lines="15" +--8<-- "examples/data_masking/src/using_multiple_keys.py" +``` + ### Data masking request flow The following sequence diagrams explain how `DataMasking` behaves under different scenarios. diff --git a/examples/data_masking/src/changing_default_algorithm.py b/examples/data_masking/src/changing_default_algorithm.py index 5ba80f5bbbf..27d52905459 100644 --- a/examples/data_masking/src/changing_default_algorithm.py +++ b/examples/data_masking/src/changing_default_algorithm.py @@ -25,9 +25,9 @@ def lambda_handler(event: dict, context: LambdaContext) -> str: provider_options = {"algorithm": Algorithm.AES_256_GCM_HKDF_SHA512_COMMIT_KEY} - decrypted = data_masker.encrypt( + encrypted = data_masker.encrypt( data, provider_options=provider_options, ) - return decrypted + return encrypted diff --git a/examples/data_masking/src/using_multiple_keys.py b/examples/data_masking/src/using_multiple_keys.py new file mode 100644 index 00000000000..18b307b5986 --- /dev/null +++ b/examples/data_masking/src/using_multiple_keys.py @@ -0,0 +1,29 @@ +from _future_ import annotations + +import os + +from aws_lambda_powertools import Logger +from aws_lambda_powertools.utilities.data_masking import DataMasking +from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import ( + AWSEncryptionSDKProvider, +) +from aws_lambda_powertools.utilities.typing import LambdaContext + +KMS_KEY_ARN_1 = os.getenv("KMS_KEY_ARN_1", "") +KMS_KEY_ARN_2 = os.getenv("KMS_KEY_ARN_2", "") + +encryption_provider = AWSEncryptionSDKProvider(keys=[KMS_KEY_ARN_1, KMS_KEY_ARN_2]) +data_masker = DataMasking(provider=encryption_provider) + +logger = Logger() + + +@logger.inject_lambda_context +def lambda_handler(event: dict, context: LambdaContext) -> dict: + data: dict = event.get("body", {}) + + logger.info("Encrypting the whole object") + + encrypted = data_masker.encrypt(data) + + return {"body": encrypted} From 9d1d1e92b4d04f50451d9c07609d3ea4634246b2 Mon Sep 17 00:00:00 2001 From: Seshu Brahma Date: Wed, 31 Jan 2024 15:32:55 -0800 Subject: [PATCH 144/151] Fix lint --- examples/data_masking/src/using_multiple_keys.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/data_masking/src/using_multiple_keys.py b/examples/data_masking/src/using_multiple_keys.py index 18b307b5986..45c49f467d3 100644 --- a/examples/data_masking/src/using_multiple_keys.py +++ b/examples/data_masking/src/using_multiple_keys.py @@ -1,4 +1,4 @@ -from _future_ import annotations +from __future__ import annotations import os From 581145d61ad6b0fe237770c6205d65d37d44f770 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Thu, 1 Feb 2024 01:25:41 +0000 Subject: [PATCH 145/151] Polishing few things --- docs/utilities/data_masking.md | 46 +++++++++++-------- .../getting_started_decrypt_data_input.json | 3 ++ 2 files changed, 29 insertions(+), 20 deletions(-) create mode 100644 examples/data_masking/src/getting_started_decrypt_data_input.json diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 9e65d0b4c89..d30bff8f524 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -109,14 +109,14 @@ Erasing will remove the original data and replace it with a `*****`. This means 1. See [working with nested data](#working-with-nested-data) to learn more about the `fields` parameter.

If we omit `fields` parameter, the entire dictionary will be erased with `*****`. === "generic_data_input.json" -```json hl_lines="7 9 14" ---8<-- "examples/data_masking/src/generic_data_input.json" -``` + ```json hl_lines="7 9 14" + --8<-- "examples/data_masking/src/generic_data_input.json" + ``` === "getting_started_erase_data_output.json" -```json hl_lines="5 7 12" ---8<-- "examples/data_masking/src/getting_started_erase_data_output.json" -``` + ```json hl_lines="5 7 12" + --8<-- "examples/data_masking/src/getting_started_erase_data_output.json" + ``` ### Encrypting data @@ -135,14 +135,14 @@ Under the hood, we delegate a [number of operations](#encrypt-operation-with-enc 1. You can use more than one KMS Key for higher availability but increased latency.

Encryption SDK will ensure the data key is encrypted with both keys. === "generic_data_input.json" -```json hl_lines="7-9 14" ---8<-- "examples/data_masking/src/generic_data_input.json" -``` + ```json + --8<-- "examples/data_masking/src/generic_data_input.json" + ``` === "encrypt_data_output.json" -```json hl_lines="5-7 12" ---8<-- "examples/data_masking/src/encrypt_data_output.json" -``` + ```json + --8<-- "examples/data_masking/src/encrypt_data_output.json" + ``` ### Decrypting data @@ -164,21 +164,23 @@ Under the hood, we delegate a [number of operations](#decrypt-operation-with-enc 1. Note that KMS key alias or key ID won't work. 2. You can use more than one KMS Key for higher availability but increased latency.

Encryption SDK will call `Decrypt` API with all master keys when trying to decrypt the data key. -=== "encrypt_data_output.json" +=== "getting_started_decrypt_data_input.json" -```json hl_lines="5-7 12" ---8<-- "examples/data_masking/src/encrypt_data_output.json" -``` + ```json + --8<-- "examples/data_masking/src/getting_started_decrypt_data_input.json" + ``` === "getting_started_decrypt_data_output.json" -```json hl_lines="5-7 12-17" ---8<-- "examples/data_masking/src/getting_started_decrypt_data_output.json" -``` + ```json + --8<-- "examples/data_masking/src/getting_started_decrypt_data_output.json" + ``` ### Encryption context for integrity and authenticity + For a stronger security posture, you can add metadata to each encryption operation, and verify them during decryption. This is known as additional authenticated data (AAD). These are non-sensitive data that can help protect authenticity and integrity of your encrypted data, and even help to prevent a [confused deputy](https://docs.aws.amazon.com/IAM/latest/UserGuide/confused-deputy.html){target="_blank"} situation. + ???+ danger "Important considerations you should know" 1. **Exact match verification on decrypt**. Be careful using random data like `timestamps` as encryption context if you can't provide them on decrypt. @@ -205,7 +207,7 @@ For a stronger security posture, you can add metadata to each encryption operati ???+ note "Current limitations" 1. The `fields` parameter is currently exclusive to the `erase` method, with potential future inclusion into `encrypt` and `decrypt`. - 2. We support `JSON` data types only - see [data serialization for more details](#data-serialization-and-preservation)." + 2. We support `JSON` data types only - see [data serialization for more details](#data-serialization)." You can use the `fields` parameter with the dot notation `.` to choose one or more parts of your data to `erase`. This is useful when you want to keep data structure intact except the confidential fields. @@ -424,6 +426,8 @@ You can modify the following values when initializing the `AWSEncryptionSDKProvi | **max_messages_encrypted** | `4294967296` | The maximum number of messages that may be encrypted under a cache entry | | **max_bytes_encrypted** | `9223372036854775807` | The maximum number of bytes that may be encrypted under a cache entry | +If required, you have the option to customize the default values when initializing the `AWSEncryptionSDKProvider` class. + === "aws_encryption_provider_example.py" ```python hl_lines="14-19" @@ -594,11 +598,13 @@ sequenceDiagram Testing your code with a simple erase operation === "test_lambda_mask.py" + ```python hl_lines="22" --8<-- "examples/data_masking/tests/test_lambda_mask.py" ``` === "lambda_mask.py" + ```python hl_lines="3 12" --8<-- "examples/data_masking/tests/lambda_mask.py" ``` diff --git a/examples/data_masking/src/getting_started_decrypt_data_input.json b/examples/data_masking/src/getting_started_decrypt_data_input.json new file mode 100644 index 00000000000..06e32c83804 --- /dev/null +++ b/examples/data_masking/src/getting_started_decrypt_data_input.json @@ -0,0 +1,3 @@ +{ + "body": "AgV4uF5K2YMtNhYrtviTwKNrUHhqQr73l/jNfukkh+qLOC8AXwABABVhd3MtY3J5cHRvLXB1YmxpYy1rZXkAREEvcjEyaFZHY1R5cjJuTDNKbTJ3UFA3R3ZjaytIdi9hekZqbXVUb25Ya3J5SzFBOUlJZDZxZXpSR1NTVnZDUUxoZz09AAEAB2F3cy1rbXMAS2Fybjphd3M6a21zOnVzLWVhc3QtMToyMDA5ODQxMTIzODY6a2V5LzZkODJiMzRlLTM2NjAtNDRlMi04YWJiLTdmMzA1OGJlYTIxMgC4AQIBAHjxYXAO7wQGd+7qxoyvXAajwqboF5FL/9lgYUNJTB8VtAHBP2hwVgw+zypp7GoMNTPAAAAAfjB8BgkqhkiG9w0BBwagbzBtAgEAMGgGCSqGSIb3DQEHATAeBglghkgBZQMEAS4wEQQMx/B25MTgWwpL7CmuAgEQgDtan3orAOKFUfyNm3v6rFcglb+BVVVDV71fj4aRljhpg1ixsYFaKsoej8NcwRktIiWE+mw9XmTEVb6xFQIAABAA9DeLzlRaRQgTcXMJG0iBu/YTyyDKiROD+bU1Y09X9RBz5LA1nWIENJKq2seAhNSB/////wAAAAEAAAAAAAAAAAAAAAEAAAEBExLJ9wI4n7t+wyPEEP4kjYFBdkmNuLLsVC2Yt8mv9Y1iH2G+/g9SaIcdK57pkoW0ECpBxZVOxCuhmK2s74AJCUdem9McjS1waUKyzYTi9vv2ySNBsABIDwT990rE7jZJ3tEZAqcWZg/eWlxvnksFR/akBWZKsKzFz6lF57+cTgdISCEJRV0E7fcUeCuaMaQGK1Qw2OCmIeHEG5j5iztBkZG2IB2CVND/AbxmDUFHwgjsrJPTzaDYSufcGMoZW1A9X1sLVfqNVKvnOFP5tNY7kPF5eAI9FhGBw8SjTqODXz4k6zuqzy9no8HtXowP265U8NZ5VbVTd/zuVEbZyK5KBqzP1sExW4RhnlpXMoOs9WSuAGcwZQIxANTeEwb9V7CacV2Urt/oCqysUzhoV2AcT2ZjryFqY79Tsg+FRpIx7cBizL4ieRzbhQIwcRasNncO5OZOcmVr0MqHv+gCVznndMgjXJmWwUa7h6skJKmhhMPlN0CsugxtVWnD" +} From 8f40a1526a35fcfa3f3322cbe3f8f559a51eda98 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Thu, 1 Feb 2024 09:02:34 +0000 Subject: [PATCH 146/151] Addressing Heitor's feedback --- docs/utilities/data_masking.md | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index d30bff8f524..64e45987100 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -206,8 +206,8 @@ For a stronger security posture, you can add metadata to each encryption operati ### Choosing parts of your data ???+ note "Current limitations" - 1. The `fields` parameter is currently exclusive to the `erase` method, with potential future inclusion into `encrypt` and `decrypt`. - 2. We support `JSON` data types only - see [data serialization for more details](#data-serialization)." + 1. The `fields` parameter is not yet supported in `encrypt` and `decrypt` operations. + 2. We support `JSON` data types only - see [data serialization for more details](#data-serialization). You can use the `fields` parameter with the dot notation `.` to choose one or more parts of your data to `erase`. This is useful when you want to keep data structure intact except the confidential fields. @@ -407,9 +407,7 @@ When decrypting, we revert the operation to restore the original data structure. For compatibility or performance, you can optionally pass your own JSON serializer and deserializer to replace `json.dumps` and `json.loads` respectively: -=== "advanced_custom_serializer.py" - -```python hl_lines="17-18" +```python hl_lines="17-18" title="advanced_custom_serializer.py" --8<-- "examples/data_masking/src/advanced_custom_serializer.py" ``` @@ -428,9 +426,7 @@ You can modify the following values when initializing the `AWSEncryptionSDKProvi If required, you have the option to customize the default values when initializing the `AWSEncryptionSDKProvider` class. -=== "aws_encryption_provider_example.py" - -```python hl_lines="14-19" +```python hl_lines="14-19" title="aws_encryption_provider_example.py" --8<-- "examples/data_masking/src/aws_encryption_provider_example.py" ``` @@ -440,19 +436,15 @@ You can pass additional arguments to the `AWSEncryptionSDKProvider` via the `pro For example, the AWS Encryption SDK defaults to using the `AES_256_GCM_HKDF_SHA512_COMMIT_KEY_ECDSA_P384` algorithm for encrypting your Data Key. If you want, you have the flexibility to customize and choose a different encryption algorithm. -=== "changing_default_algorithm.py" - -```python hl_lines="5 26 30" +```python hl_lines="5 26 30" title="changing_default_algorithm.py" --8<-- "examples/data_masking/src/changing_default_algorithm.py" ``` -**Using multiple keys** - -The `AWSEncryptionSDKProvider` allows you to instantiate it with several KMS keys by passing them all in a `list` to the `keys` parameter. This could be beneficial if you own keys in different regions, enabling you to perform cross-regional encryption and decryption. +##### Using multiple keys -=== "using_multiple_keys.py" +You can use multiple KMS keys from more than one AWS account for higher availability, when instantiating `AWSEncryptionSDKProvider`. -```python hl_lines="15" +```python hl_lines="15" title="using_multiple_keys.py" --8<-- "examples/data_masking/src/using_multiple_keys.py" ``` From 09f89ea0f03b780e4b59635e2c47cecdd6ca2c50 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Thu, 1 Feb 2024 09:46:02 +0000 Subject: [PATCH 147/151] Adding workflow --- docs/utilities/data_masking.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 64e45987100..a77dda87db5 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -504,6 +504,39 @@ sequenceDiagram Encrypting operation using envelope encryption.
+#### Encrypt operation with multiple KMS Keys + +When encrypting data with multiple KMS keys, the `aws_encryption_sdk` executes additional encryption calls to encrypt the data with each of the specified keys. + +
+```mermaid +sequenceDiagram + autonumber + participant Client + participant Lambda + participant DataMasking as Data Masking + participant EncryptionProvider as Encryption Provider + Client->>Lambda: Invoke (event) + Lambda->>DataMasking: Init Encryption Provider with master key + Note over Lambda,DataMasking: AWSEncryptionSDKProvider([KMS_KEY]) + Lambda->>DataMasking: encrypt(data) + DataMasking->>EncryptionProvider: Create unique data key + Note over DataMasking,EncryptionProvider: KMS GenerateDataKey API + DataMasking->>DataMasking: Cache new unique data key + DataMasking->>DataMasking: DATA_KEY.encrypt(data) + DataMasking->>DataMasking: MASTER_KEY.encrypt(DATA_KEY) + DataMasking->>DataMasking: Create encrypted message + alt Using another KMS key? + DataMasking->>EncryptionProvider: Encrypt data + Note over DataMasking,EncryptionProvider: KMS Encrypt API + end + Note over DataMasking: Encrypted message includes encrypted data, data key encrypted, algorithm, and more. + DataMasking->>Lambda: Ciphertext from encrypted message + Lambda-->>Client: Return response +``` +Encrypting operation using envelope encryption. +
+ #### Decrypt operation with Encryption SDK (KMS) We call KMS to decrypt the encrypted data key available in the encrypted message. If successful, we run authentication _(context)_ and integrity checks (_algorithm, data key length, etc_) to confirm its proceedings. From f19651d7597fb71c0e1bc1afa9d51d2cd0c03859 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Thu, 1 Feb 2024 09:57:24 +0000 Subject: [PATCH 148/151] Addressing GH hidden conversations --- docs/utilities/data_masking.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index a77dda87db5..1f3002ba4a7 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -424,15 +424,17 @@ You can modify the following values when initializing the `AWSEncryptionSDKProvi | **max_messages_encrypted** | `4294967296` | The maximum number of messages that may be encrypted under a cache entry | | **max_bytes_encrypted** | `9223372036854775807` | The maximum number of bytes that may be encrypted under a cache entry | -If required, you have the option to customize the default values when initializing the `AWSEncryptionSDKProvider` class. +If required, you can customize the default values when initializing the `AWSEncryptionSDKProvider` class. ```python hl_lines="14-19" title="aws_encryption_provider_example.py" --8<-- "examples/data_masking/src/aws_encryption_provider_example.py" ``` -**Passing additional SDK arguments** +##### Passing additional SDK arguments -You can pass additional arguments to the `AWSEncryptionSDKProvider` via the `provider_options` parameter. To learn more about the different arguments you can give to the SDK, see the [EncryptionSDKClient's documentation](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.html#aws_encryption_sdk.EncryptionSDKClient.encrypt){target="_blank"}. +!!! note "See the [AWS Encryption SDK docs for more details](https://aws-encryption-sdk-python.readthedocs.io/en/latest/generated/aws_encryption_sdk.html#aws_encryption_sdk.EncryptionSDKClient.encrypt){target="_blank"}" + +As an escape hatch mechanism, you can pass additional arguments to the `AWSEncryptionSDKProvider` via the `provider_options` parameter. For example, the AWS Encryption SDK defaults to using the `AES_256_GCM_HKDF_SHA512_COMMIT_KEY_ECDSA_P384` algorithm for encrypting your Data Key. If you want, you have the flexibility to customize and choose a different encryption algorithm. From f66068337f698d7b5cb393b5cd4104426903c39e Mon Sep 17 00:00:00 2001 From: Cavalcante Damascena Date: Thu, 1 Feb 2024 11:14:45 +0000 Subject: [PATCH 149/151] Documentation --- docs/utilities/data_masking.md | 35 +++++++++++++------------ examples/data_masking/sam/template.yaml | 2 ++ 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 1f3002ba4a7..4619d6f74d1 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -96,6 +96,7 @@ Before you start, you will need a KMS symmetric key to encrypt and decrypt your 1. [Key policy examples using IAM Roles](https://docs.aws.amazon.com/kms/latest/developerguide/key-policy-default.html#key-policy-default-allow-administrators){target="_blank"} 2. [SAM generated CloudFormation Resources](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-specification-generated-resources-function.html#sam-specification-generated-resources-function-not-role){target="_blank"} + 3. Required only when using [multiple keys](#using-multiple-keys) ### Erasing data @@ -411,6 +412,14 @@ For compatibility or performance, you can optionally pass your own JSON serializ --8<-- "examples/data_masking/src/advanced_custom_serializer.py" ``` +### Using multiple keys + +You can use multiple KMS keys from more than one AWS account for higher availability, when instantiating `AWSEncryptionSDKProvider`. + +```python hl_lines="15" title="using_multiple_keys.py" +--8<-- "examples/data_masking/src/using_multiple_keys.py" +``` + ### Providers #### AWS Encryption SDK @@ -442,14 +451,6 @@ For example, the AWS Encryption SDK defaults to using the `AES_256_GCM_HKDF_SHA5 --8<-- "examples/data_masking/src/changing_default_algorithm.py" ``` -##### Using multiple keys - -You can use multiple KMS keys from more than one AWS account for higher availability, when instantiating `AWSEncryptionSDKProvider`. - -```python hl_lines="15" title="using_multiple_keys.py" ---8<-- "examples/data_masking/src/using_multiple_keys.py" -``` - ### Data masking request flow The following sequence diagrams explain how `DataMasking` behaves under different scenarios. @@ -508,7 +509,7 @@ sequenceDiagram #### Encrypt operation with multiple KMS Keys -When encrypting data with multiple KMS keys, the `aws_encryption_sdk` executes additional encryption calls to encrypt the data with each of the specified keys. +When encrypting data with multiple KMS keys, the `aws_encryption_sdk` makes additional API calls to encrypt the data with each of the specified keys.
```mermaid @@ -520,19 +521,19 @@ sequenceDiagram participant EncryptionProvider as Encryption Provider Client->>Lambda: Invoke (event) Lambda->>DataMasking: Init Encryption Provider with master key - Note over Lambda,DataMasking: AWSEncryptionSDKProvider([KMS_KEY]) + Note over Lambda,DataMasking: AWSEncryptionSDKProvider([KEY_1, KEY_2]) Lambda->>DataMasking: encrypt(data) DataMasking->>EncryptionProvider: Create unique data key - Note over DataMasking,EncryptionProvider: KMS GenerateDataKey API + Note over DataMasking,EncryptionProvider: KMS GenerateDataKey API - KEY_1 DataMasking->>DataMasking: Cache new unique data key DataMasking->>DataMasking: DATA_KEY.encrypt(data) - DataMasking->>DataMasking: MASTER_KEY.encrypt(DATA_KEY) - DataMasking->>DataMasking: Create encrypted message - alt Using another KMS key? - DataMasking->>EncryptionProvider: Encrypt data - Note over DataMasking,EncryptionProvider: KMS Encrypt API + DataMasking->>DataMasking: KEY_1.encrypt(DATA_KEY) + loop For every additional KMS Key + DataMasking->>EncryptionProvider: Encrypt DATA_KEY + Note over DataMasking,EncryptionProvider: KMS Encrypt API - KEY_2 end - Note over DataMasking: Encrypted message includes encrypted data, data key encrypted, algorithm, and more. + DataMasking->>DataMasking: Create encrypted message + Note over DataMasking: Encrypted message includes encrypted data, all data keys encrypted, algorithm, and more. DataMasking->>Lambda: Ciphertext from encrypted message Lambda-->>Client: Return response ``` diff --git a/examples/data_masking/sam/template.yaml b/examples/data_masking/sam/template.yaml index 96410cc1425..67d5d923515 100644 --- a/examples/data_masking/sam/template.yaml +++ b/examples/data_masking/sam/template.yaml @@ -62,4 +62,6 @@ Resources: Action: - kms:Decrypt # to decrypt encrypted data key - kms:GenerateDataKey # to create an unique and random data key for encryption + # Encrypt permission is required only when using multiple keys + - kms:Encrypt # (3)! Resource: "*" From cfeb833ee5b947a2dd3bd46275b621571684d13b Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Thu, 1 Feb 2024 11:43:40 +0000 Subject: [PATCH 150/151] Final changes --- Makefile | 4 ++-- docs/utilities/data_masking.md | 2 +- pyproject.toml | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 80c89f72961..7fa170b28c6 100644 --- a/Makefile +++ b/Makefile @@ -8,13 +8,13 @@ dev: pip install --upgrade pip pre-commit poetry poetry config --local virtualenvs.in-project true @$(MAKE) dev-version-plugin - poetry install --extras "all datamasking-aws-sdk redis" + poetry install --extras "all redis" pre-commit install dev-gitpod: pip install --upgrade pip poetry @$(MAKE) dev-version-plugin - poetry install --extras "all datamasking-aws-sdk redis" + poetry install --extras "all redis" pre-commit install format: diff --git a/docs/utilities/data_masking.md b/docs/utilities/data_masking.md index 4619d6f74d1..5c30edc6bff 100644 --- a/docs/utilities/data_masking.md +++ b/docs/utilities/data_masking.md @@ -75,7 +75,7 @@ graph LR !!! note "This is not necessary if you're installing Powertools for AWS Lambda (Python) via [Lambda Layer/SAR](../index.md#lambda-layer){target="_blank"}" -Add `aws-lambda-powertools[datamasking-aws-sdk]` as a dependency in your preferred tool: _e.g._, _requirements.txt_, _pyproject.toml_. This will install the [AWS Encryption SDK](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/introduction.html){target="_blank"}. +Add `aws-lambda-powertools[datamasking]` as a dependency in your preferred tool: _e.g._, _requirements.txt_, _pyproject.toml_. This will install the [AWS Encryption SDK](https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/introduction.html){target="_blank"}. AWS Encryption SDK contains non-Python dependencies. This means you should use [AWS SAM CLI](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/using-sam-cli-build.html#using-sam-cli-build-options-container){target="_blank"} or [official build container images](https://gallery.ecr.aws/search?searchTerm=sam%2Fbuild-python&popularRegistries=amazon){target="_blank"} when building your application for AWS Lambda. Local development should work as expected. diff --git a/pyproject.toml b/pyproject.toml index cfc7220efa7..cb1f322e9ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ redis = {version = ">=4.4,<6.0", optional = true} typing-extensions = "^4.6.2" datadog-lambda = { version = ">=4.77,<6.0", optional = true } aws-encryption-sdk = { version = "^3.1.1", optional = true } -jsonpath-ng = "^1.6.0" +jsonpath-ng = { version = "^1.6.0", optional = true } [tool.poetry.dev-dependencies] coverage = {extras = ["toml"], version = "^7.2"} @@ -98,11 +98,11 @@ parser = ["pydantic"] validation = ["fastjsonschema"] tracer = ["aws-xray-sdk"] redis = ["redis"] -all = ["pydantic", "aws-xray-sdk", "fastjsonschema"] +all = ["pydantic", "aws-xray-sdk", "fastjsonschema", "aws-encryption-sdk", "jsonpath-ng"] # allow customers to run code locally without emulators (SAM CLI, etc.) aws-sdk = ["boto3"] datadog = ["datadog-lambda"] -datamasking-aws-sdk = ["aws-encryption-sdk"] +datamasking = ["aws-encryption-sdk", "jsonpath-ng"] [tool.poetry.group.dev.dependencies] cfn-lint = "0.83.8" From 007fba7f268450a1b64c794df60b1d4cb1fa1091 Mon Sep 17 00:00:00 2001 From: Leandro Damascena Date: Thu, 1 Feb 2024 11:49:08 +0000 Subject: [PATCH 151/151] Lock file --- poetry.lock | 370 ++++++++++++++++++++++++++++------------------------ 1 file changed, 198 insertions(+), 172 deletions(-) diff --git a/poetry.lock b/poetry.lock index 318b983a005..8e7fcad2cd9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -768,58 +768,67 @@ toml = ["tomli"] [[package]] name = "cryptography" -version = "41.0.7" +version = "42.0.2" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false python-versions = ">=3.7" files = [ - {file = "cryptography-41.0.7-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:3c78451b78313fa81607fa1b3f1ae0a5ddd8014c38a02d9db0616133987b9cdf"}, - {file = "cryptography-41.0.7-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:928258ba5d6f8ae644e764d0f996d61a8777559f72dfeb2eea7e2fe0ad6e782d"}, - {file = "cryptography-41.0.7-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a1b41bc97f1ad230a41657d9155113c7521953869ae57ac39ac7f1bb471469a"}, - {file = "cryptography-41.0.7-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:841df4caa01008bad253bce2a6f7b47f86dc9f08df4b433c404def869f590a15"}, - {file = "cryptography-41.0.7-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:5429ec739a29df2e29e15d082f1d9ad683701f0ec7709ca479b3ff2708dae65a"}, - {file = "cryptography-41.0.7-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:43f2552a2378b44869fe8827aa19e69512e3245a219104438692385b0ee119d1"}, - {file = "cryptography-41.0.7-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:af03b32695b24d85a75d40e1ba39ffe7db7ffcb099fe507b39fd41a565f1b157"}, - {file = "cryptography-41.0.7-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:49f0805fc0b2ac8d4882dd52f4a3b935b210935d500b6b805f321addc8177406"}, - {file = "cryptography-41.0.7-cp37-abi3-win32.whl", hash = "sha256:f983596065a18a2183e7f79ab3fd4c475205b839e02cbc0efbbf9666c4b3083d"}, - {file = "cryptography-41.0.7-cp37-abi3-win_amd64.whl", hash = "sha256:90452ba79b8788fa380dfb587cca692976ef4e757b194b093d845e8d99f612f2"}, - {file = "cryptography-41.0.7-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:079b85658ea2f59c4f43b70f8119a52414cdb7be34da5d019a77bf96d473b960"}, - {file = "cryptography-41.0.7-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:b640981bf64a3e978a56167594a0e97db71c89a479da8e175d8bb5be5178c003"}, - {file = "cryptography-41.0.7-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e3114da6d7f95d2dee7d3f4eec16dacff819740bbab931aff8648cb13c5ff5e7"}, - {file = "cryptography-41.0.7-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d5ec85080cce7b0513cfd233914eb8b7bbd0633f1d1703aa28d1dd5a72f678ec"}, - {file = "cryptography-41.0.7-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7a698cb1dac82c35fcf8fe3417a3aaba97de16a01ac914b89a0889d364d2f6be"}, - {file = "cryptography-41.0.7-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:37a138589b12069efb424220bf78eac59ca68b95696fc622b6ccc1c0a197204a"}, - {file = "cryptography-41.0.7-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:68a2dec79deebc5d26d617bfdf6e8aab065a4f34934b22d3b5010df3ba36612c"}, - {file = "cryptography-41.0.7-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:09616eeaef406f99046553b8a40fbf8b1e70795a91885ba4c96a70793de5504a"}, - {file = "cryptography-41.0.7-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:48a0476626da912a44cc078f9893f292f0b3e4c739caf289268168d8f4702a39"}, - {file = "cryptography-41.0.7-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c7f3201ec47d5207841402594f1d7950879ef890c0c495052fa62f58283fde1a"}, - {file = "cryptography-41.0.7-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c5ca78485a255e03c32b513f8c2bc39fedb7f5c5f8535545bdc223a03b24f248"}, - {file = "cryptography-41.0.7-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:d6c391c021ab1f7a82da5d8d0b3cee2f4b2c455ec86c8aebbc84837a631ff309"}, - {file = "cryptography-41.0.7.tar.gz", hash = "sha256:13f93ce9bea8016c253b34afc6bd6a75993e5c40672ed5405a9c832f0d4a00bc"}, -] - -[package.dependencies] -cffi = ">=1.12" + {file = "cryptography-42.0.2-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:701171f825dcab90969596ce2af253143b93b08f1a716d4b2a9d2db5084ef7be"}, + {file = "cryptography-42.0.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:61321672b3ac7aade25c40449ccedbc6db72c7f5f0fdf34def5e2f8b51ca530d"}, + {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea2c3ffb662fec8bbbfce5602e2c159ff097a4631d96235fcf0fb00e59e3ece4"}, + {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b15c678f27d66d247132cbf13df2f75255627bcc9b6a570f7d2fd08e8c081d2"}, + {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8e88bb9eafbf6a4014d55fb222e7360eef53e613215085e65a13290577394529"}, + {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a047682d324ba56e61b7ea7c7299d51e61fd3bca7dad2ccc39b72bd0118d60a1"}, + {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:36d4b7c4be6411f58f60d9ce555a73df8406d484ba12a63549c88bd64f7967f1"}, + {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:a00aee5d1b6c20620161984f8ab2ab69134466c51f58c052c11b076715e72929"}, + {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b97fe7d7991c25e6a31e5d5e795986b18fbbb3107b873d5f3ae6dc9a103278e9"}, + {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5fa82a26f92871eca593b53359c12ad7949772462f887c35edaf36f87953c0e2"}, + {file = "cryptography-42.0.2-cp37-abi3-win32.whl", hash = "sha256:4b063d3413f853e056161eb0c7724822a9740ad3caa24b8424d776cebf98e7ee"}, + {file = "cryptography-42.0.2-cp37-abi3-win_amd64.whl", hash = "sha256:841ec8af7a8491ac76ec5a9522226e287187a3107e12b7d686ad354bb78facee"}, + {file = "cryptography-42.0.2-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:55d1580e2d7e17f45d19d3b12098e352f3a37fe86d380bf45846ef257054b242"}, + {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28cb2c41f131a5758d6ba6a0504150d644054fd9f3203a1e8e8d7ac3aea7f73a"}, + {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9097a208875fc7bbeb1286d0125d90bdfed961f61f214d3f5be62cd4ed8a446"}, + {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:44c95c0e96b3cb628e8452ec060413a49002a247b2b9938989e23a2c8291fc90"}, + {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2f9f14185962e6a04ab32d1abe34eae8a9001569ee4edb64d2304bf0d65c53f3"}, + {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:09a77e5b2e8ca732a19a90c5bca2d124621a1edb5438c5daa2d2738bfeb02589"}, + {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:ad28cff53f60d99a928dfcf1e861e0b2ceb2bc1f08a074fdd601b314e1cc9e0a"}, + {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:130c0f77022b2b9c99d8cebcdd834d81705f61c68e91ddd614ce74c657f8b3ea"}, + {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:fa3dec4ba8fb6e662770b74f62f1a0c7d4e37e25b58b2bf2c1be4c95372b4a33"}, + {file = "cryptography-42.0.2-cp39-abi3-win32.whl", hash = "sha256:3dbd37e14ce795b4af61b89b037d4bc157f2cb23e676fa16932185a04dfbf635"}, + {file = "cryptography-42.0.2-cp39-abi3-win_amd64.whl", hash = "sha256:8a06641fb07d4e8f6c7dda4fc3f8871d327803ab6542e33831c7ccfdcb4d0ad6"}, + {file = "cryptography-42.0.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:087887e55e0b9c8724cf05361357875adb5c20dec27e5816b653492980d20380"}, + {file = "cryptography-42.0.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:a7ef8dd0bf2e1d0a27042b231a3baac6883cdd5557036f5e8df7139255feaac6"}, + {file = "cryptography-42.0.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4383b47f45b14459cab66048d384614019965ba6c1a1a141f11b5a551cace1b2"}, + {file = "cryptography-42.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:fbeb725c9dc799a574518109336acccaf1303c30d45c075c665c0793c2f79a7f"}, + {file = "cryptography-42.0.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:320948ab49883557a256eab46149df79435a22d2fefd6a66fe6946f1b9d9d008"}, + {file = "cryptography-42.0.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5ef9bc3d046ce83c4bbf4c25e1e0547b9c441c01d30922d812e887dc5f125c12"}, + {file = "cryptography-42.0.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:52ed9ebf8ac602385126c9a2fe951db36f2cb0c2538d22971487f89d0de4065a"}, + {file = "cryptography-42.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:141e2aa5ba100d3788c0ad7919b288f89d1fe015878b9659b307c9ef867d3a65"}, + {file = "cryptography-42.0.2.tar.gz", hash = "sha256:e0ec52ba3c7f1b7d813cd52649a5b3ef1fc0d433219dc8c93827c57eab6cf888"}, +] + +[package.dependencies] +cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""} [package.extras] docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] -docstest = ["pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"] +docstest = ["pyenchant (>=1.6.11)", "readme-renderer", "sphinxcontrib-spelling (>=4.0.1)"] nox = ["nox"] -pep8test = ["black", "check-sdist", "mypy", "ruff"] +pep8test = ["check-sdist", "click", "mypy", "ruff"] sdist = ["build"] ssh = ["bcrypt (>=3.1.5)"] -test = ["pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] +test = ["certifi", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] test-randomorder = ["pytest-randomly"] [[package]] name = "datadog" -version = "0.47.0" +version = "0.48.0" description = "The Datadog Python library" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ - {file = "datadog-0.47.0-py2.py3-none-any.whl", hash = "sha256:a45ec997ab554208837e8c44d81d0e1456539dc14da5743687250e028bc809b7"}, - {file = "datadog-0.47.0.tar.gz", hash = "sha256:47be3b2c3d709a7f5b709eb126ed4fe6cc7977d618fe5c158dd89c2a9f7d9916"}, + {file = "datadog-0.48.0-py2.py3-none-any.whl", hash = "sha256:c3f819e2dc632a546a5b4e8d45409e996d4fa18c60df7814c82eda548e0cca59"}, + {file = "datadog-0.48.0.tar.gz", hash = "sha256:d4d661358c3e7f801fbfe15118f5ccf08b9bd9b1f45b8b910605965283edad64"}, ] [package.dependencies] @@ -867,71 +876,71 @@ six = "*" [[package]] name = "ddtrace" -version = "2.4.0" +version = "2.5.2" description = "Datadog APM client library" optional = false python-versions = ">=3.7" files = [ - {file = "ddtrace-2.4.0-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:90641de597d3424573aa96263509800bb64018727bf74e29e250e6d21200a4be"}, - {file = "ddtrace-2.4.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:75b7d01af5fb8d279a2edb56d48af0dc221ed43f4e5049387e4a9be529217033"}, - {file = "ddtrace-2.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f104933ffbae735887e10e3e0d9a5d28dd7d42d1fd86141c4fa171c07598b561"}, - {file = "ddtrace-2.4.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d675545d2fd7c5be10fe704a3f151add0ce8b101c976ca0ab452699aac0d8489"}, - {file = "ddtrace-2.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b380dabf377a318ebd909423293b02beaa43ffda03ad129a5a93c4a1a4b5c6"}, - {file = "ddtrace-2.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d2f93337c1546404967525388a45174481daa72ecf7d3a1e4c21349e1a2d572c"}, - {file = "ddtrace-2.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:0e345e034e8962d76642ab2763f5bdb1bc4424c2ea17d9ca5f82e093160d6ca1"}, - {file = "ddtrace-2.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1aa5e1a7121d08d50795e3f6218f3959cfa55363a3896210410ef354a7573de9"}, - {file = "ddtrace-2.4.0-cp310-cp310-win32.whl", hash = "sha256:d9c69a42919a27cff8d42461b301014d79683c40f60d0cb5f3000e4ff7cb907f"}, - {file = "ddtrace-2.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:962de6a60f42e2cde1823c47a3383bb0d6beaa954d57b12687688935d0ddd3d3"}, - {file = "ddtrace-2.4.0-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:ed91c32353c8288fb95de67faa341c5ab9a089c0161ad51fc739f0db2b46866e"}, - {file = "ddtrace-2.4.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:410c9b9241ed2514dc9413887d852140cc7ff396b40ffc412835a14668b9b1a3"}, - {file = "ddtrace-2.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:639b11f780d0ed1a372a2a6b92cc1b9c586a0fea27439557e768d5ebedabbc34"}, - {file = "ddtrace-2.4.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:08861e4acd61198428f0d994db1bc5d2893ec816b9cd78c0c6d1fc963f0dc771"}, - {file = "ddtrace-2.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aad627a4611bff8f527e2c0c0fc51be9d74a563328269f53b871901570ee4ff3"}, - {file = "ddtrace-2.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e6ae2f75f2edc068d6c104ceb0e882a6dfad8f702b27384b3dac5290aebbc248"}, - {file = "ddtrace-2.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:82a0832000fedcb95856477bab95c6f151fa28ede3aceafaabe7c08beffaa577"}, - {file = "ddtrace-2.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f8b1baac10f9cc3803854f802062e02ae5de0d5546f19165c3b6e8840e9b09f4"}, - {file = "ddtrace-2.4.0-cp311-cp311-win32.whl", hash = "sha256:c687fe20b17e2d24de222913dc2383e6b1462641d8ff18d27678dcb72ced82a3"}, - {file = "ddtrace-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:47296b116a97e01fe6bf48a4eea4e825212ee23288ee064964ab87ba608fc038"}, - {file = "ddtrace-2.4.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:6e2b2b4160ea53dd3e4f8bb35af7124a5e8954c8badffa81468c8a62d12acc51"}, - {file = "ddtrace-2.4.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:49ac0d69f98a4ff2175db39481598300fd94f038a027b537d0a66d9dbeca1ed7"}, - {file = "ddtrace-2.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2604e1c91b44d3b6fb15d0337cda1ac2c15aec215f6a44e1bb39d25b47c2633c"}, - {file = "ddtrace-2.4.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb7d2c846e3d7e8156199855d4db014a71d62daedba84a213416e2a488e834b3"}, - {file = "ddtrace-2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85774e12d5d92152cd1c64f3a8a2f4dbe7f3d39201f8a8ff5e914b9639fe6e17"}, - {file = "ddtrace-2.4.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:418c0c990c505accc8665bfc056f4297938a54176157bf1f0765f2fae584efec"}, - {file = "ddtrace-2.4.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:183f7c3ddd9a2891bd1b6f5ea3af6d16517775268b3940259820ca3c83292d16"}, - {file = "ddtrace-2.4.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:eb90e71b70e3ea6c24711cfb5c48c711a2175c315daf07f4f28903aa773a48b7"}, - {file = "ddtrace-2.4.0-cp312-cp312-win32.whl", hash = "sha256:5eab75f1d4170c41de1f9c32e7e39714b2dd11a59d9ff7e94a199b88fa813ecd"}, - {file = "ddtrace-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:d892e0b71f3b6bcf31920b5e7fd699c86aea734bc02eec3c1b22acd8f63057e4"}, - {file = "ddtrace-2.4.0-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:c07ea7a17a2897d891ee5e95de3b0e4f57184c471e87ffcc7208b3ccd68b9fcc"}, - {file = "ddtrace-2.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05b28815e65d6361cd056c877ab051e132a6929b0d353313a499122e6522ea3"}, - {file = "ddtrace-2.4.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:63719bfc8fe5e8510022a3275145d6b2b1c4f955c395698fb792d99d4cda698d"}, - {file = "ddtrace-2.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:190f96eccdd8107cc93db6e79af4b8fc9403418c823d895af898cf635f5cada6"}, - {file = "ddtrace-2.4.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b0fdb6a2fe0eadd122df4ea3a11690cb88f4f642bd19b1a21d01e9dcfd6eb20c"}, - {file = "ddtrace-2.4.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1b2bf18ee10ea8fe668096a6c70db4161e228edee161b04719506947d7117937"}, - {file = "ddtrace-2.4.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ca5fa396b8df0d7b55ad9e8d5b19be09c5dedefa388bf7590340ace5ce392e14"}, - {file = "ddtrace-2.4.0-cp37-cp37m-win32.whl", hash = "sha256:c67a4d8767aa269f8dfab79ae39b8170b95de6813bd1cba17dc951f0a1ee462b"}, - {file = "ddtrace-2.4.0-cp37-cp37m-win_amd64.whl", hash = "sha256:1db7931541052622a91c8c6594b274d96efe956d5dbbe09c57a50c0f74640b52"}, - {file = "ddtrace-2.4.0-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:a8b6ab9f26d2ea50dfa69a282d727c865461f0c1b535f973922072f700cde031"}, - {file = "ddtrace-2.4.0-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:9ad7aa89988b77b893c3e9898fc48e3cef9471bc2648d6a83cc800b49cad1f1f"}, - {file = "ddtrace-2.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38b95920bcc17289a0e3871830ef19030df763039021a796a1debb7fd4ea347b"}, - {file = "ddtrace-2.4.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9152dcc4b8a98392ce5853b8e160f8d215ddd148337d42861ab3c12635b32b75"}, - {file = "ddtrace-2.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c335be0ab8f4f376f51111219a9d85bcdbd6d75c18a8d5471817645bed1430c0"}, - {file = "ddtrace-2.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0c95339694034d4fbf9e1b2a0918f99b3936336e8deb4d513e9cf7a6ae1532f3"}, - {file = "ddtrace-2.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:f8bddc5e84e50663b64fbad2e2c61203484dea06de7759a47f096514d99f5c8f"}, - {file = "ddtrace-2.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0af7c4c94959481bc4060c7dfb5f7e70b1929b18089c7ea0329fc3f28707fd8a"}, - {file = "ddtrace-2.4.0-cp38-cp38-win32.whl", hash = "sha256:de3fcca4747340c835e7816009dd363d4e02dc5fc25365b2418dc3d986a6550a"}, - {file = "ddtrace-2.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:2f3dbcff2b305d34ecc63db05d0efeb923846ba07871be6f0a3509a33290fb69"}, - {file = "ddtrace-2.4.0-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:7b43e2e890e868a133afc25f57774bb6bc8ae8841094cba4e8f2b3ee50f9c7ee"}, - {file = "ddtrace-2.4.0-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:de66ea03ca5b3f02d0f878fc9d486d4d4f654cf66b38d3fdf73bf314fc0e3f5b"}, - {file = "ddtrace-2.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01cba8d20d4754135411e0e3398af02bc29b3c5f3dc85b1ee8cdfb9a0532f793"}, - {file = "ddtrace-2.4.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cb324809582b65baa682f045cb2873d686de3aa93cac75718462d0a23f980836"}, - {file = "ddtrace-2.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f475ea4e2086e6a16a48568688918b21043ba391a6f968cb9bc17ec70d51de75"}, - {file = "ddtrace-2.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f1d4a5d9c89db2cc0e4a6eaf10b6d1af449d1ef14060000b23eceee19497705e"}, - {file = "ddtrace-2.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a057db38d52271b6206bac2ab23f2a36cbe547397cba1ce586021df711570559"}, - {file = "ddtrace-2.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:45ee78018276764f7fdaf1cf3b945660cf1ab39e1a03e0c61bf1984a71562204"}, - {file = "ddtrace-2.4.0-cp39-cp39-win32.whl", hash = "sha256:4f63dea207c90bb2c2d52ff9de0ee71b27aedb5d8540745e4e0b38a896737de0"}, - {file = "ddtrace-2.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:e3523c71d37fb3135d0817e92b486bcee7829c41e5465ed41b080286d7e2739d"}, - {file = "ddtrace-2.4.0.tar.gz", hash = "sha256:fb1bab23debb3a1fb71e3d6a1ce9818bc5e6ad9b885b901f78f3f28639393ecb"}, + {file = "ddtrace-2.5.2-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:f918538a6adb33696be653d343ee318b16ea977376d9b7214d14fe97c42e9bd9"}, + {file = "ddtrace-2.5.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:f56735eb636d3ab2f7224f261d3a6bd43f884e9901d68407d485ea65f3dc0f46"}, + {file = "ddtrace-2.5.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:72d21fe6842a8d80c8765dd699153a2475ae2d49e82e10f9668eadb08b454040"}, + {file = "ddtrace-2.5.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a6e48caf63506d7ac3df7caa955b6258de91c1a1f55149506ab8ac36143770b9"}, + {file = "ddtrace-2.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc3f26e04ba7521f6885d871fd6266fedc0a7ccf2637b85579c058927404bad7"}, + {file = "ddtrace-2.5.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:15d78b0cd5d2090c063031d76e933b8b24e043d524a6091a751cf57b0fab025f"}, + {file = "ddtrace-2.5.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ee76beaf87695f2204b0c2c2a3664b39f3483b7a8447b28e5e2bcc899861b3eb"}, + {file = "ddtrace-2.5.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8840f0e82d6dca3888bd06e7ab0ca6d39009f3cd3475028d8bc03c939127afc2"}, + {file = "ddtrace-2.5.2-cp310-cp310-win32.whl", hash = "sha256:a34ccab0c8991c5fc5252d5cd6e88852cd7f77c8bf838de84e70b4a3bfacaad4"}, + {file = "ddtrace-2.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:ffa4f5779c7000fe5960156bd15339184355b30a661b0955799cae50da5d03a7"}, + {file = "ddtrace-2.5.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:ea2740a3d61876cb07b271af444e98cdc8b730497cfcddbc3794c7a7441b8d15"}, + {file = "ddtrace-2.5.2-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:62e775ba9d2a2b5f952a6609029e965057bdd852ccd6e53b55c0f82ae83aa542"}, + {file = "ddtrace-2.5.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30186112f156a564efda5e2018240b55baee7664897ca5fc35c452d032a77185"}, + {file = "ddtrace-2.5.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f9dccdc69de364cffc2b892280724c78cb54db151452a0b6d1b4a89b6f060c44"}, + {file = "ddtrace-2.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa2543c2303ab325af7794f2a8a420133cd9222e70bfbce3869da146fc5e2ba"}, + {file = "ddtrace-2.5.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:aa2e64f79ada9f2fd5307cd0eba726d8585e47b0282fb9463aaa4b267265e94a"}, + {file = "ddtrace-2.5.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:37b4d55a5be59530e6e5761a36d727aee812be69c81b00ee0182eb62be6f3b75"}, + {file = "ddtrace-2.5.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6d97f990d2322a23e82203cc5a2aa694fb0d42541a44bb120390e6598a63e5f5"}, + {file = "ddtrace-2.5.2-cp311-cp311-win32.whl", hash = "sha256:5d3f1bc3ce87fbcf2256197178179ef681df720ebbc39b0559bda00247744533"}, + {file = "ddtrace-2.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:a50057085b0972e695bb1ef3042f6cd6a1a3b12111fac4985942f2dbbcf8ac2f"}, + {file = "ddtrace-2.5.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:b923b099b9a1e50f01ce8bcd4d11e3255a48c71f3e6314dd9a482baed0a87ed6"}, + {file = "ddtrace-2.5.2-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:512d3975b1657c706ca9c84373e5fce323f6fc94bfac33c30876ad8d55e0ea71"}, + {file = "ddtrace-2.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c54bc474c70151d5a141061b6c20a1efabdf458e4239c790d45fa12a13b8e7d"}, + {file = "ddtrace-2.5.2-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b5fb2bbd38dc46ba6a7ea1031c4751b1ca888be5fac8a42049ebc2517707c00d"}, + {file = "ddtrace-2.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:caa6fb6bcfb3810d8f0882e489e7d2ef4dd3a92b452cfdd8d1fd4703dc496b17"}, + {file = "ddtrace-2.5.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3f4eed40d978352c7371804ecb68bbe9e55967bb904bd03b0568554e0b6b92cf"}, + {file = "ddtrace-2.5.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:57606af5380888e2e7cc67b7c4fa5e1bc51d29c48f004b4be0cbe1b319fddc75"}, + {file = "ddtrace-2.5.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ee8d0259a004964a8eddb394aa84a5754435d4270cd2041e6559c9e68fa49141"}, + {file = "ddtrace-2.5.2-cp312-cp312-win32.whl", hash = "sha256:4df564e620ec7e657fcdb0d5bf1231aa1357bf49b736f0d9e9f6df17a23fc569"}, + {file = "ddtrace-2.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:637f16af1c84566bde044798312c67bc5676df949632ab02e740440558f2a598"}, + {file = "ddtrace-2.5.2-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:d24841a9390f3e169edcaf1ca5ac80599062e66dee43a510decb25e779b6f7b4"}, + {file = "ddtrace-2.5.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:49aa4e0210862e829e09569de2e2f34ac17c5e246567c5b6662ec21e2a06d938"}, + {file = "ddtrace-2.5.2-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:985738fe875b11f05dfa2b1f21a619d499344eb740f63e01d6eae1fb29eb949b"}, + {file = "ddtrace-2.5.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8814321822e4afc95ac86fbc476dc20d78dd4b1d510c02606459df4580093d18"}, + {file = "ddtrace-2.5.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ad6c0ae7baff9d00c689834aec0627274d681ed1d2a8ae627348a6191e8d32ec"}, + {file = "ddtrace-2.5.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:aa596f2e80c525a2310e605bfa3fa6ba6790b2ae90c02e47ceee0e62ceae17a6"}, + {file = "ddtrace-2.5.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6bdfae9fa03af334820678196a4895450d0b6bd9f1b5119d42ddbd327a55fcce"}, + {file = "ddtrace-2.5.2-cp37-cp37m-win32.whl", hash = "sha256:227bb0391d310e0d5a54505c7ab59f9692a5db91dc492373489bc45726980e1d"}, + {file = "ddtrace-2.5.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6e55c4738b58b4452933204305243e19000f6f283af93bf51b63382100cb8f21"}, + {file = "ddtrace-2.5.2-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:4d9e7a9e26c38ae1e368f5d820e78459ff2d39689f40d4a3db185ddb3686c383"}, + {file = "ddtrace-2.5.2-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:c361ea11b442b04d8e011528205ed65b926d71d18f38d372270204eabf49b068"}, + {file = "ddtrace-2.5.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5aafd86eeea622cd0e8cf6b63632efc67a52a32317d2a376382ef6170d383c9f"}, + {file = "ddtrace-2.5.2-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3ff039635470ba483ed448baaf6337d85a731b17af62fef06dfa811f761f374f"}, + {file = "ddtrace-2.5.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20f1cb3bea1170410d603f9d557918c24d4d8783659c03817daea6352d9f37f9"}, + {file = "ddtrace-2.5.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7351500241eb24c7d789b371a6860ca2b0e2db1ff9d317089153b562a3a461e1"}, + {file = "ddtrace-2.5.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a2cfc6ee800890556e404b94d13680c83952efa5d3dafa72ef8cb08a8782f874"}, + {file = "ddtrace-2.5.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:96a791f03b62ebdb9f3e635a0e93711149123a8fc1c1c152be0d1cdb5d8e6359"}, + {file = "ddtrace-2.5.2-cp38-cp38-win32.whl", hash = "sha256:6c61e72abec3f2f6b46e53712a32a971de1b6a9be657d5ebeff1334f6146babc"}, + {file = "ddtrace-2.5.2-cp38-cp38-win_amd64.whl", hash = "sha256:b93d8b536f5fc45a72bb2785051dc729f4d581ef2d69ed10bccae6a7487477b2"}, + {file = "ddtrace-2.5.2-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:38cbcb7b4ff1371480b29228d2b8e570e7d7b386a7632b96f9600135ec3eb9db"}, + {file = "ddtrace-2.5.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:a270d128c6067f52a76ecbb658fae3f4d3bd4888baa9e6159ff82b6de14c53be"}, + {file = "ddtrace-2.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e59f3958016fcec5eb16abd7979a9ec4d850733e2a03b878b096277fc092784"}, + {file = "ddtrace-2.5.2-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:066403f0e00a8de09c8187037befe7463d1fab5d8178b62a07c2542792710d14"}, + {file = "ddtrace-2.5.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cbbcbf24bca8497f1412ec438fbdc94847aef9e86092ffd4f8626bbe6d278d33"}, + {file = "ddtrace-2.5.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d34f8da809e2783770a6c88396b3653fb12a4196e9b5f16b8c10f37bbf2b7b31"}, + {file = "ddtrace-2.5.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9eaca41664dd0c2bd7257fe2e91c7e46718b20591bfaa0b5c01c39b599115f88"}, + {file = "ddtrace-2.5.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8f4b67e02ba5c316711719dcfc15e94f47684e7af1785289d016a29a2c664827"}, + {file = "ddtrace-2.5.2-cp39-cp39-win32.whl", hash = "sha256:9bbd675d73aae6516e02a86cb830778771dafb0e182d5a122270ccd82ee77eed"}, + {file = "ddtrace-2.5.2-cp39-cp39-win_amd64.whl", hash = "sha256:e93f3f5d3d57beb492b04286c758be65495908bd313df6f56865ad7af222e49e"}, + {file = "ddtrace-2.5.2.tar.gz", hash = "sha256:5addeb19eea5ebdc23c493e5635f4c8737795b48ba637117a1895f31b900985f"}, ] [package.dependencies] @@ -943,7 +952,7 @@ bytecode = [ cattrs = "*" ddsketch = ">=2.0.1" envier = "*" -importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} +importlib-metadata = {version = "<=6.5.0", markers = "python_version < \"3.8\""} opentelemetry-api = ">=1" protobuf = ">=3" setuptools = {version = "*", markers = "python_version >= \"3.12\""} @@ -1019,13 +1028,13 @@ ssh = ["paramiko (>=2.4.3)"] [[package]] name = "envier" -version = "0.5.0" +version = "0.5.1" description = "Python application configuration via the environment" optional = false python-versions = ">=3.7" files = [ - {file = "envier-0.5.0-py3-none-any.whl", hash = "sha256:5fed6099ee5d7ad4cf664f8bb99d1281d4ab5fadeec8f40ba9458610938293be"}, - {file = "envier-0.5.0.tar.gz", hash = "sha256:f35ca8605f0c70c2c0367133af9dc1ef16710021dbd0e28c1b0a83070db06768"}, + {file = "envier-0.5.1-py3-none-any.whl", hash = "sha256:b45ef6051fea33d0c32a64e186bff2cfb446e2242d6781216c9bc9ce708c5909"}, + {file = "envier-0.5.1.tar.gz", hash = "sha256:bd5ccf707447973ea0f4125b7df202ba415ad888bcdcb8df80e0b002ee11ffdb"}, ] [package.extras] @@ -1310,13 +1319,13 @@ files = [ [[package]] name = "importlib-metadata" -version = "6.7.0" +version = "6.5.0" description = "Read metadata from Python packages" optional = false python-versions = ">=3.7" files = [ - {file = "importlib_metadata-6.7.0-py3-none-any.whl", hash = "sha256:cb52082e659e97afc5dac71e79de97d8681de3aa07ff18578330904a9d18e5b5"}, - {file = "importlib_metadata-6.7.0.tar.gz", hash = "sha256:1aaf550d4f73e5d6783e7acb77aec43d49da8017410afae93822cc9cca98c4d4"}, + {file = "importlib_metadata-6.5.0-py3-none-any.whl", hash = "sha256:03ba783c3a2c69d751b109fc0c94a62c51f581b3d6acf8ed1331b6d5729321ff"}, + {file = "importlib_metadata-6.5.0.tar.gz", hash = "sha256:7a8bdf1bc3a726297f5cfbc999e6e7ff6b4fa41b26bba4afc580448624460045"}, ] [package.dependencies] @@ -1326,7 +1335,7 @@ zipp = ">=0.5" [package.extras] docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] perf = ["ipython"] -testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"] +testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] [[package]] name = "importlib-resources" @@ -1454,13 +1463,13 @@ jsonpointer = ">=1.9" [[package]] name = "jsonpath-ng" -version = "1.6.0" +version = "1.6.1" description = "A final implementation of JSONPath for Python that aims to be standard compliant, including arithmetic and binary comparison operators and providing clear AST for metaprogramming." -optional = false +optional = true python-versions = "*" files = [ - {file = "jsonpath-ng-1.6.0.tar.gz", hash = "sha256:5483f8e9d74c39c9abfab554c070ae783c1c8cbadf5df60d561bc705ac68a07e"}, - {file = "jsonpath_ng-1.6.0-py3-none-any.whl", hash = "sha256:6fd04833412c4b3d9299edf369542f5e67095ca84efa17cbb7f06a34958adc9f"}, + {file = "jsonpath-ng-1.6.1.tar.gz", hash = "sha256:086c37ba4917304850bd837aeab806670224d3f038fe2833ff593a672ef0a5fa"}, + {file = "jsonpath_ng-1.6.1-py3-none-any.whl", hash = "sha256:8f22cd8273d7772eea9aaa84d922e0841aa36fdb8a2c6b7f6c3791a16a9bc0be"}, ] [package.dependencies] @@ -1615,61 +1624,71 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] [[package]] name = "markupsafe" -version = "2.1.3" +version = "2.1.4" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.7" files = [ - {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"}, - {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, - {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"}, - {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"}, - {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"}, - {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"}, - {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:de8153a7aae3835484ac168a9a9bdaa0c5eee4e0bc595503c95d53b942879c84"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e888ff76ceb39601c59e219f281466c6d7e66bd375b4ec1ce83bcdc68306796b"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0b838c37ba596fcbfca71651a104a611543077156cb0a26fe0c475e1f152ee8"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac1ebf6983148b45b5fa48593950f90ed6d1d26300604f321c74a9ca1609f8e"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0fbad3d346df8f9d72622ac71b69565e621ada2ce6572f37c2eae8dacd60385d"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d5291d98cd3ad9a562883468c690a2a238c4a6388ab3bd155b0c75dd55ece858"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a7cc49ef48a3c7a0005a949f3c04f8baa5409d3f663a1b36f0eba9bfe2a0396e"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b83041cda633871572f0d3c41dddd5582ad7d22f65a72eacd8d3d6d00291df26"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-win32.whl", hash = "sha256:0c26f67b3fe27302d3a412b85ef696792c4a2386293c53ba683a89562f9399b0"}, + {file = "MarkupSafe-2.1.4-cp310-cp310-win_amd64.whl", hash = "sha256:a76055d5cb1c23485d7ddae533229039b850db711c554a12ea64a0fd8a0129e2"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9e9e3c4020aa2dc62d5dd6743a69e399ce3de58320522948af6140ac959ab863"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0042d6a9880b38e1dd9ff83146cc3c9c18a059b9360ceae207805567aacccc69"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d03fea4c4e9fd0ad75dc2e7e2b6757b80c152c032ea1d1de487461d8140efc"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ab3a886a237f6e9c9f4f7d272067e712cdb4efa774bef494dccad08f39d8ae6"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abf5ebbec056817057bfafc0445916bb688a255a5146f900445d081db08cbabb"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e1a0d1924a5013d4f294087e00024ad25668234569289650929ab871231668e7"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e7902211afd0af05fbadcc9a312e4cf10f27b779cf1323e78d52377ae4b72bea"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c669391319973e49a7c6230c218a1e3044710bc1ce4c8e6eb71f7e6d43a2c131"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-win32.whl", hash = "sha256:31f57d64c336b8ccb1966d156932f3daa4fee74176b0fdc48ef580be774aae74"}, + {file = "MarkupSafe-2.1.4-cp311-cp311-win_amd64.whl", hash = "sha256:54a7e1380dfece8847c71bf7e33da5d084e9b889c75eca19100ef98027bd9f56"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:a76cd37d229fc385738bd1ce4cba2a121cf26b53864c1772694ad0ad348e509e"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:987d13fe1d23e12a66ca2073b8d2e2a75cec2ecb8eab43ff5624ba0ad42764bc"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5244324676254697fe5c181fc762284e2c5fceeb1c4e3e7f6aca2b6f107e60dc"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78bc995e004681246e85e28e068111a4c3f35f34e6c62da1471e844ee1446250"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4d176cfdfde84f732c4a53109b293d05883e952bbba68b857ae446fa3119b4f"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f9917691f410a2e0897d1ef99619fd3f7dd503647c8ff2475bf90c3cf222ad74"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f06e5a9e99b7df44640767842f414ed5d7bedaaa78cd817ce04bbd6fd86e2dd6"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:396549cea79e8ca4ba65525470d534e8a41070e6b3500ce2414921099cb73e8d"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-win32.whl", hash = "sha256:f6be2d708a9d0e9b0054856f07ac7070fbe1754be40ca8525d5adccdbda8f475"}, + {file = "MarkupSafe-2.1.4-cp312-cp312-win_amd64.whl", hash = "sha256:5045e892cfdaecc5b4c01822f353cf2c8feb88a6ec1c0adef2a2e705eef0f656"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7a07f40ef8f0fbc5ef1000d0c78771f4d5ca03b4953fc162749772916b298fc4"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d18b66fe626ac412d96c2ab536306c736c66cf2a31c243a45025156cc190dc8a"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:698e84142f3f884114ea8cf83e7a67ca8f4ace8454e78fe960646c6c91c63bfa"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49a3b78a5af63ec10d8604180380c13dcd870aba7928c1fe04e881d5c792dc4e"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:15866d7f2dc60cfdde12ebb4e75e41be862348b4728300c36cdf405e258415ec"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:6aa5e2e7fc9bc042ae82d8b79d795b9a62bd8f15ba1e7594e3db243f158b5565"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:54635102ba3cf5da26eb6f96c4b8c53af8a9c0d97b64bdcb592596a6255d8518"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-win32.whl", hash = "sha256:3583a3a3ab7958e354dc1d25be74aee6228938312ee875a22330c4dc2e41beb0"}, + {file = "MarkupSafe-2.1.4-cp37-cp37m-win_amd64.whl", hash = "sha256:d6e427c7378c7f1b2bef6a344c925b8b63623d3321c09a237b7cc0e77dd98ceb"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:bf1196dcc239e608605b716e7b166eb5faf4bc192f8a44b81e85251e62584bd2"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4df98d4a9cd6a88d6a585852f56f2155c9cdb6aec78361a19f938810aa020954"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b835aba863195269ea358cecc21b400276747cc977492319fd7682b8cd2c253d"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23984d1bdae01bee794267424af55eef4dfc038dc5d1272860669b2aa025c9e3"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c98c33ffe20e9a489145d97070a435ea0679fddaabcafe19982fe9c971987d5"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9896fca4a8eb246defc8b2a7ac77ef7553b638e04fbf170bff78a40fa8a91474"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b0fe73bac2fed83839dbdbe6da84ae2a31c11cfc1c777a40dbd8ac8a6ed1560f"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c7556bafeaa0a50e2fe7dc86e0382dea349ebcad8f010d5a7dc6ba568eaaa789"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-win32.whl", hash = "sha256:fc1a75aa8f11b87910ffd98de62b29d6520b6d6e8a3de69a70ca34dea85d2a8a"}, + {file = "MarkupSafe-2.1.4-cp38-cp38-win_amd64.whl", hash = "sha256:3a66c36a3864df95e4f62f9167c734b3b1192cb0851b43d7cc08040c074c6279"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:765f036a3d00395a326df2835d8f86b637dbaf9832f90f5d196c3b8a7a5080cb"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:21e7af8091007bf4bebf4521184f4880a6acab8df0df52ef9e513d8e5db23411"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5c31fe855c77cad679b302aabc42d724ed87c043b1432d457f4976add1c2c3e"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7653fa39578957bc42e5ebc15cf4361d9e0ee4b702d7d5ec96cdac860953c5b4"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47bb5f0142b8b64ed1399b6b60f700a580335c8e1c57f2f15587bd072012decc"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:fe8512ed897d5daf089e5bd010c3dc03bb1bdae00b35588c49b98268d4a01e00"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:36d7626a8cca4d34216875aee5a1d3d654bb3dac201c1c003d182283e3205949"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b6f14a9cd50c3cb100eb94b3273131c80d102e19bb20253ac7bd7336118a673a"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-win32.whl", hash = "sha256:c8f253a84dbd2c63c19590fa86a032ef3d8cc18923b8049d91bcdeeb2581fbf6"}, + {file = "MarkupSafe-2.1.4-cp39-cp39-win_amd64.whl", hash = "sha256:8b570a1537367b52396e53325769608f2a687ec9a4363647af1cded8928af959"}, + {file = "MarkupSafe-2.1.4.tar.gz", hash = "sha256:3aae9af4cac263007fd6309c64c6ab4506dd2b79382d9d19a1994f9240b8db4f"}, ] [[package]] @@ -2168,7 +2187,7 @@ testing = ["pytest", "pytest-benchmark"] name = "ply" version = "3.11" description = "Python Lex & Yacc" -optional = false +optional = true python-versions = "*" files = [ {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"}, @@ -2509,13 +2528,13 @@ six = ">=1.5" [[package]] name = "pytz" -version = "2023.3.post1" +version = "2023.4" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" files = [ - {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, - {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, + {file = "pytz-2023.4-py2.py3-none-any.whl", hash = "sha256:f90ef520d95e7c46951105338d918664ebfd6f1d995bd7d153127ce90efafa6a"}, + {file = "pytz-2023.4.tar.gz", hash = "sha256:31d4583c4ed539cd037956140d695e42c033a19e984bfce9964a3f7d59bc2b40"}, ] [[package]] @@ -2562,13 +2581,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -3144,6 +3156,20 @@ files = [ [package.dependencies] types-urllib3 = "*" +[[package]] +name = "types-requests" +version = "2.31.0.20231231" +description = "Typing stubs for requests" +optional = false +python-versions = ">=3.7" +files = [ + {file = "types-requests-2.31.0.20231231.tar.gz", hash = "sha256:0f8c0c9764773384122813548d9eea92a5c4e1f33ed54556b508968ec5065cee"}, + {file = "types_requests-2.31.0.20231231-py3-none-any.whl", hash = "sha256:2e2230c7bc8dd63fa3153c1c0ae335f8a368447f0582fc332f17d54f88e69027"}, +] + +[package.dependencies] +urllib3 = ">=2" + [[package]] name = "types-urllib3" version = "1.26.25.14" @@ -3390,10 +3416,10 @@ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] [extras] -all = ["aws-xray-sdk", "fastjsonschema", "pydantic"] +all = ["aws-encryption-sdk", "aws-xray-sdk", "fastjsonschema", "jsonpath-ng", "pydantic"] aws-sdk = ["boto3"] datadog = ["datadog-lambda"] -datamasking-aws-sdk = ["aws-encryption-sdk"] +datamasking = ["aws-encryption-sdk", "jsonpath-ng"] parser = ["pydantic"] redis = ["redis"] tracer = ["aws-xray-sdk"] @@ -3402,4 +3428,4 @@ validation = ["fastjsonschema"] [metadata] lock-version = "2.0" python-versions = "^3.7.4" -content-hash = "f4c66a8fa656902aba0c04cc8b5dc236d7f0ed6f7c3e22507cc89e711b0b62b2" +content-hash = "28c3a405185f635f8e65ea51adfe1cfc589cb469497d800100521f91037ba26a"