Skip to content

Commit 35fb001

Browse files
seshubawsroger-zhanggleandrodamascenaRoy Assisrubenfonseca
authored
feat(data_masking): add new sensitive data masking utility (#2197)
Co-authored-by: Roger Zhang <[email protected]> Co-authored-by: Leandro Damascena <[email protected]> Co-authored-by: Roy Assis <[email protected]> Co-authored-by: Ruben Fonseca <[email protected]> Co-authored-by: Roger Zhang <[email protected]> Co-authored-by: aal80 <[email protected]> Co-authored-by: Seshu Brahma <[email protected]> Co-authored-by: Heitor Lessa <[email protected]>
1 parent e441c0b commit 35fb001

38 files changed

+2435
-160
lines changed

Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@ target:
77
dev:
88
pip install --upgrade pip pre-commit poetry
99
@$(MAKE) dev-version-plugin
10-
poetry install --extras "all"
10+
poetry install --extras "all datamasking-aws-sdk"
1111
pre-commit install
1212

1313
dev-gitpod:
1414
pip install --upgrade pip poetry
1515
@$(MAKE) dev-version-plugin
16-
poetry install --extras "all"
16+
poetry install --extras "all datamasking-aws-sdk"
1717
pre-commit install
1818

1919
format:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from aws_lambda_powertools.utilities.data_masking.base import DataMasking
2+
3+
__all__ = [
4+
"DataMasking",
5+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
import json
2+
from typing import Optional, Union
3+
4+
from aws_lambda_powertools.utilities.data_masking.provider import BaseProvider
5+
6+
7+
class DataMasking:
8+
"""
9+
A utility class for masking sensitive data within various data types.
10+
11+
This class provides methods for masking sensitive information, such as personal
12+
identifiers or confidential data, within different data types such as strings,
13+
dictionaries, lists, and more. It helps protect sensitive information while
14+
preserving the structure of the original data.
15+
16+
Usage:
17+
Instantiate an object of this class and use its methods to mask sensitive data
18+
based on the data type. Supported data types include strings, dictionaries,
19+
and more.
20+
21+
Example:
22+
```
23+
from aws_lambda_powertools.utilities.data_masking.base import DataMasking
24+
25+
def lambda_handler(event, context):
26+
masker = DataMasking()
27+
28+
data = {
29+
"project": "powertools",
30+
"sensitive": "xxxxxxxxxx"
31+
}
32+
33+
masked = masker.mask(data,fields=["sensitive"])
34+
35+
return masked
36+
37+
```
38+
"""
39+
40+
def __init__(self, provider: Optional[BaseProvider] = None):
41+
self.provider = provider or BaseProvider()
42+
43+
def encrypt(self, data, fields=None, **provider_options):
44+
return self._apply_action(data, fields, self.provider.encrypt, **provider_options)
45+
46+
def decrypt(self, data, fields=None, **provider_options):
47+
return self._apply_action(data, fields, self.provider.decrypt, **provider_options)
48+
49+
def mask(self, data, fields=None, **provider_options):
50+
return self._apply_action(data, fields, self.provider.mask, **provider_options)
51+
52+
def _apply_action(self, data, fields, action, **provider_options):
53+
"""
54+
Helper method to determine whether to apply a given action to the entire input data
55+
or to specific fields if the 'fields' argument is specified.
56+
57+
Parameters
58+
----------
59+
data : any
60+
The input data to process.
61+
fields : Optional[List[any]] = None
62+
A list of fields to apply the action to. If 'None', the action is applied to the entire 'data'.
63+
action : Callable
64+
The action to apply to the data. It should be a callable that performs an operation on the data
65+
and returns the modified value.
66+
67+
Returns
68+
-------
69+
any
70+
The modified data after applying the action.
71+
"""
72+
73+
if fields is not None:
74+
return self._apply_action_to_fields(data, fields, action, **provider_options)
75+
else:
76+
return action(data, **provider_options)
77+
78+
def _apply_action_to_fields(
79+
self,
80+
data: Union[dict, str],
81+
fields: list,
82+
action,
83+
**provider_options,
84+
) -> Union[dict, str]:
85+
"""
86+
This method takes the input data, which can be either a dictionary or a JSON string,
87+
and applies a mask, an encryption, or a decryption to the specified fields.
88+
89+
Parameters
90+
----------
91+
data : Union[dict, str])
92+
The input data to process. It can be either a dictionary or a JSON string.
93+
fields : List
94+
A list of fields to apply the action to. Each field can be specified as a string or
95+
a list of strings representing nested keys in the dictionary.
96+
action : Callable
97+
The action to apply to the fields. It should be a callable that takes the current
98+
value of the field as the first argument and any additional arguments that might be required
99+
for the action. It performs an operation on the current value using the provided arguments and
100+
returns the modified value.
101+
**provider_options:
102+
Additional keyword arguments to pass to the 'action' function.
103+
104+
Returns
105+
-------
106+
dict
107+
The modified dictionary after applying the action to the
108+
specified fields.
109+
110+
Raises
111+
-------
112+
ValueError
113+
If 'fields' parameter is None.
114+
TypeError
115+
If the 'data' parameter is not a traversable type
116+
117+
Example
118+
-------
119+
```python
120+
>>> data = {'a': {'b': {'c': 1}}, 'x': {'y': 2}}
121+
>>> fields = ['a.b.c', 'a.x.y']
122+
# The function will transform the value at 'a.b.c' (1) and 'a.x.y' (2)
123+
# and store the result as:
124+
new_dict = {'a': {'b': {'c': 'transformed_value'}}, 'x': {'y': 'transformed_value'}}
125+
```
126+
"""
127+
128+
if fields is None:
129+
raise ValueError("No fields specified.")
130+
131+
if isinstance(data, str):
132+
# Parse JSON string as dictionary
133+
my_dict_parsed = json.loads(data)
134+
elif isinstance(data, dict):
135+
# In case their data has keys that are not strings (i.e. ints), convert it all into a JSON string
136+
my_dict_parsed = json.dumps(data)
137+
# Turn back into dict so can parse it
138+
my_dict_parsed = json.loads(my_dict_parsed)
139+
else:
140+
raise TypeError(
141+
f"Unsupported data type for 'data' parameter. Expected a traversable type, but got {type(data)}.",
142+
)
143+
144+
# For example: ['a.b.c'] in ['a.b.c', 'a.x.y']
145+
for nested_key in fields:
146+
# Prevent overriding loop variable
147+
curr_nested_key = nested_key
148+
149+
# If the nested_key is not a string, convert it to a string representation
150+
if not isinstance(curr_nested_key, str):
151+
curr_nested_key = json.dumps(curr_nested_key)
152+
153+
# Split the nested key string into a list of nested keys
154+
# ['a.b.c'] -> ['a', 'b', 'c']
155+
keys = curr_nested_key.split(".")
156+
157+
# Initialize a current dictionary to the root dictionary
158+
curr_dict = my_dict_parsed
159+
160+
# Traverse the dictionary hierarchy by iterating through the list of nested keys
161+
for key in keys[:-1]:
162+
curr_dict = curr_dict[key]
163+
164+
# Retrieve the final value of the nested field
165+
valtochange = curr_dict[(keys[-1])]
166+
167+
# Apply the specified 'action' to the target value
168+
curr_dict[keys[-1]] = action(valtochange, **provider_options)
169+
170+
return my_dict_parsed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
DATA_MASKING_STRING: str = "*****"
2+
CACHE_CAPACITY: int = 100
3+
MAX_CACHE_AGE_SECONDS: float = 300.0
4+
MAX_MESSAGES_ENCRYPTED: int = 200
5+
# NOTE: You can also set max messages/bytes per data key
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from aws_lambda_powertools.utilities.data_masking.provider.base import BaseProvider
2+
3+
__all__ = [
4+
"BaseProvider",
5+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import json
2+
from typing import Any
3+
4+
from aws_lambda_powertools.utilities.data_masking.constants import DATA_MASKING_STRING
5+
6+
7+
class BaseProvider:
8+
"""
9+
When you try to create an instance of a subclass that does not implement the encrypt method,
10+
you will get a NotImplementedError with a message that says the method is not implemented:
11+
"""
12+
13+
def __init__(self, json_serializer=None, json_deserializer=None) -> None:
14+
self.json_serializer = json_serializer or self.default_json_serializer
15+
self.json_deserializer = json_deserializer or self.default_json_deserializer
16+
17+
def default_json_serializer(self, data):
18+
return json.dumps(data).encode("utf-8")
19+
20+
def default_json_deserializer(self, data):
21+
return json.loads(data.decode("utf-8"))
22+
23+
def encrypt(self, data) -> str:
24+
raise NotImplementedError("Subclasses must implement encrypt()")
25+
26+
def decrypt(self, data) -> Any:
27+
raise NotImplementedError("Subclasses must implement decrypt()")
28+
29+
def mask(self, data) -> Any:
30+
if isinstance(data, (str, dict, bytes)):
31+
return DATA_MASKING_STRING
32+
elif isinstance(data, (list, tuple, set)):
33+
return type(data)([DATA_MASKING_STRING] * len(data))
34+
return DATA_MASKING_STRING
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from aws_lambda_powertools.utilities.data_masking.provider.kms.aws_encryption_sdk import AwsEncryptionSdkProvider
2+
3+
__all__ = [
4+
"AwsEncryptionSdkProvider",
5+
]

0 commit comments

Comments
 (0)