Skip to content

Commit 5cbf81f

Browse files
committed
fix(serverless): Enhance yaml parsing, better support for file expansions
- Improve yaml parsing wrt file() expansion - Add tests for file() expansion - Add a '__file__' marker attribute to yaml nodes - Utilize the '__file__' marker when generating reports for serverless - Raise CfnParseError on circular inclusions - Added a ``logger.error`` that logs specifics on pyyaml parse errors - Updated the serverless graph builder to cope with file() expansion Fixes #7006
1 parent cfa62cd commit 5cbf81f

File tree

21 files changed

+245
-55
lines changed

21 files changed

+245
-55
lines changed

checkov/cloudformation/cfn_utils.py

+3
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,9 @@ def enrich_resources_with_globals(original_template: dict[str, Any]) -> dict[str
260260

261261
# Iterate over the resources in the template copy
262262
for _resource_name, resource_details in new_template.get('Resources', {}).items():
263+
if _resource_name == '__file__':
264+
continue
265+
263266
resource_type = resource_details.get('Type', '')
264267
if (resource_type not in supported_types_and_globals):
265268
continue

checkov/cloudformation/parser/__init__.py

+11-10
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,15 @@ def parse(
3030

3131
try:
3232
(template, template_lines) = cfn_yaml.load(filename, cfn_yaml.ContentType.CFN)
33-
except IOError as err:
34-
if err.errno == 2:
35-
error = f"Template file not found: {filename} - {err}"
36-
LOGGER.error(error)
37-
elif err.errno == 21:
38-
error = f"Template references a directory, not a file: {filename} - {err}"
39-
LOGGER.error(error)
40-
elif err.errno == 13:
41-
error = f"Permission denied when accessing template file: {filename} - {err}"
42-
LOGGER.error(error)
33+
except FileNotFoundError as e:
34+
error = f'Template file not found: {e.filename}'
35+
LOGGER.error(error)
36+
except IsADirectoryError as e:
37+
error = f'Template references a directory, not a file: {e.filename}'
38+
LOGGER.error(error)
39+
except PermissionError as e:
40+
error = f'Permission denied when accessing {e.filename}'
41+
LOGGER.error(error)
4342
except UnicodeDecodeError as err:
4443
error = f"Cannot read file contents: {filename} - {err}"
4544
LOGGER.error(error)
@@ -81,6 +80,8 @@ def parse(
8180
if isinstance(template, dict):
8281
resources = template.get(TemplateSections.RESOURCES.value, None)
8382
if resources and isinstance(resources, dict):
83+
if '__file__' in resources:
84+
del resources['__file__']
8485
if "__startline__" in resources:
8586
del resources["__startline__"]
8687
if "__endline__" in resources:

checkov/cloudformation/parser/cfn_yaml.py

+65-16
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,19 @@
77
import json
88
import logging
99
import platform
10+
import re
1011
from collections.abc import Hashable
1112
from enum import Enum
1213
from pathlib import Path
13-
from typing import Any, TYPE_CHECKING, NoReturn, Callable
14+
from typing import Any, TYPE_CHECKING, NoReturn, Callable, cast
1415

15-
from yaml import MappingNode
16-
from yaml import ScalarNode
17-
from yaml import SequenceNode
16+
from yaml import MappingNode, ScalarNode, SequenceNode
1817
from yaml.composer import Composer
19-
from yaml.constructor import ConstructorError
20-
from yaml.constructor import SafeConstructor
18+
from yaml.constructor import ConstructorError, SafeConstructor
2119
from yaml.reader import Reader
2220
from yaml.resolver import Resolver
2321
from yaml.scanner import Scanner
22+
from yaml.error import MarkedYAMLError, YAMLError
2423
from charset_normalizer import from_path
2524

2625
from checkov.common.parsers.json.decoder import SimpleDecoder
@@ -98,6 +97,7 @@ def __init__(self, filename: str, content_type: ContentType | None = None) -> No
9897
NodeConstructor.construct_yaml_null_error,
9998
)
10099
self.filename = filename
100+
self.files_loaded: dict[Path, bool] = {}
101101

102102
# To support lazy loading, the original constructors first yield
103103
# an empty object, then fill them in when iterated. Due to
@@ -142,15 +142,52 @@ def construct_yaml_str(self, node: ScalarNode) -> StrNode:
142142
assert isinstance(obj, str) # nosec
143143
return StrNode(obj, node.start_mark, node.end_mark)
144144

145+
def mark_with_filename(self, root: Node | None, filename: str) -> None:
146+
if not root:
147+
return
148+
149+
setattr(root, 'filename', filename) # noqa: B010
150+
if isinstance(root, SequenceNode):
151+
for v in root.value:
152+
self.mark_with_filename(v, filename)
153+
if isinstance(root, MappingNode):
154+
for k, v in root.value:
155+
self.mark_with_filename(k, filename)
156+
self.mark_with_filename(v, filename)
157+
145158
def construct_yaml_seq(self, node: SequenceNode) -> ListNode:
159+
# Handle serverless file() expansions on SequenceNode
160+
if isinstance(node.value, list) and len(node.value) > 0:
161+
for i, v in enumerate(node.value):
162+
if not isinstance(v, ScalarNode) or not isinstance(node.value[i].value, str):
163+
continue
164+
165+
m = re.match(r'\$\{file\((.+\.ya?ml)\)\}$', v.value)
166+
if m is None:
167+
continue
168+
169+
path = (Path(self.filename).parent / m[1]).resolve()
170+
if path in self.files_loaded:
171+
raise CfnParseError(
172+
filename=node.filename if hasattr(node, 'filename') else self.filename,
173+
message=f'Circular include of {m[1]}',
174+
line_number=node.start_mark.line,
175+
column_number=node.start_mark.column
176+
)
177+
else:
178+
self.files_loaded[path] = True
179+
content = read_file_with_any_encoding(file_path=path)
180+
node.value[i] = MarkedLoader(content, m[1], None).get_single_node()
181+
self.mark_with_filename(node.value[i], m[1])
182+
146183
obj, = SafeConstructor.construct_yaml_seq(self, node) # type:ignore[no-untyped-call]
147184
assert isinstance(obj, list) # nosec
148185
return ListNode(obj, node.start_mark, node.end_mark) # nosec
149186

150187
def construct_yaml_null_error(self, node: Node) -> NoReturn:
151188
"""Throw a null error"""
152189
raise CfnParseError(
153-
filename=self.filename,
190+
filename=node.filename if hasattr(node, 'filename') else self.filename,
154191
message=f"Null value at line {node.start_mark.line + 1} column {node.start_mark.column + 1}",
155192
line_number=node.start_mark.line,
156193
column_number=node.start_mark.column,
@@ -179,7 +216,7 @@ def __init__(self, stream: str, filename: str, content_type: ContentType | None
179216
def construct_mapping(self, node: MappingNode, deep: bool = False) -> dict[Hashable, Any]:
180217
mapping = super(MarkedLoader, self).construct_mapping(node, deep=deep)
181218
# Add 1 so line numbering starts at 1
182-
# mapping['__line__'] = node.start_mark.line + 1
219+
mapping['__file__'] = node.filename if hasattr(node, 'filename') else self.filename
183220
mapping['__startline__'] = node.start_mark.line + 1
184221
mapping['__endline__'] = node.end_mark.line + 1
185222
return mapping
@@ -199,7 +236,7 @@ def multi_constructor(loader: MarkedLoader, tag_suffix: str, node: ScalarNode) -
199236
constructor = construct_getatt
200237
elif tag_suffix == "Ref" and (isinstance(node.value, list) or isinstance(node.value, dict)):
201238
raise CfnParseError(
202-
filename="",
239+
filename=node.filename if hasattr(node, 'filename') else loader.filename,
203240
message='Invalid !Ref: {}'.format(node.value),
204241
line_number=0,
205242
column_number=0)
@@ -232,18 +269,30 @@ def loads(yaml_string: str, fname: str, content_type: ContentType | None = None)
232269
"""
233270
Load the given YAML string
234271
"""
272+
if len(yaml_string) == 0:
273+
return {}
274+
235275
loader = MarkedLoader(yaml_string, fname, content_type)
236276
loader.add_multi_constructor('!', multi_constructor) # type:ignore[no-untyped-call]
237277

238-
template: "DictNode | dict[str, Any]" = loader.get_single_data()
239-
# Convert an empty file to an empty dict
240-
if template is None:
241-
template = {}
242-
243-
return template
278+
try:
279+
return cast(DictNode | dict[str, Any], loader.get_single_data())
280+
except MarkedYAMLError as e:
281+
logging.error(f'YAML error parsing {fname}: {e}')
282+
if e.problem and e.problem_mark:
283+
raise CfnParseError(
284+
filename=fname,
285+
message=e.problem,
286+
line_number=e.problem_mark.line,
287+
column_number=e.problem_mark.column) from e
288+
else:
289+
raise CfnParseError(filename=fname, message=str(e), line_number=0, column_number=0) from e
290+
except YAMLError as e:
291+
logging.error(f'YAML error parsing {fname}: {e}')
292+
raise CfnParseError(filename=fname, message=str(e), line_number=0, column_number=0) from e
244293

245294

246-
def load(filename: str | Path, content_type: ContentType) -> tuple[dict[str, Any], list[tuple[int, str]]]:
295+
def load(filename: str | Path, content_type: ContentType | None) -> tuple[dict[str, Any], list[tuple[int, str]]]:
247296
"""
248297
Load the given YAML file
249298
"""

checkov/common/util/consts.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
RESOLVED_MODULE_ENTRY_NAME = "__resolved__"
55
START_LINE = '__startline__'
66
END_LINE = '__endline__'
7-
LINE_FIELD_NAMES = {START_LINE, END_LINE}
7+
FILE = '__file__'
8+
LINE_FIELD_NAMES = {START_LINE, END_LINE, FILE}
89
TRUE_AFTER_UNKNOWN = 'true_after_unknown'
910

1011
DEV_API_GET_HEADERS = {

checkov/serverless/graph_builder/local_graph.py

+13
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,20 @@ def _create_vertex(self, file_path: str, definition: dict[str, Any] | None,
4343
element_type: ServerlessElements) -> None:
4444
if not definition:
4545
return
46+
4647
resources = definition.get(element_type)
48+
49+
# resources -> Resources
50+
if element_type == ServerlessElements.RESOURCES and resources is None:
51+
resources = definition.get('Resources')
52+
53+
if isinstance(resources, list) and len(resources) > 0 and \
54+
isinstance(resources[0], dict) and resources[0]['__file__'] != file_path:
55+
for r in resources:
56+
if isinstance(r, dict):
57+
self._create_vertex(file_path, {element_type: r}, element_type)
58+
return
59+
4760
if not resources:
4861
return
4962

checkov/serverless/parsers/context_parser.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
from __future__ import annotations
22

3+
from pathlib import Path
34
from typing import Any
45

56
from checkov.serverless.parsers.parser import FUNCTIONS_TOKEN, PROVIDER_TOKEN, IAM_ROLE_STATEMENTS_TOKEN, \
67
ENVIRONMENT_TOKEN, STACK_TAGS_TOKEN, TAGS_TOKEN
78
from checkov.cloudformation.context_parser import ContextParser as CfnContextParser, STARTLINE, ENDLINE
9+
from checkov.common.util.file_utils import read_file_with_any_encoding
810

911

1012
class ContextParser(object):
@@ -30,6 +32,11 @@ def __init__(self, sls_file: str, sls_template: dict[str, Any], sls_template_lin
3032
self.functions_conf = sls_template.get(FUNCTIONS_TOKEN) or {}
3133
self.provider_type = self._infer_provider_type()
3234

35+
def file(self, content: dict[str, Any]) -> str:
36+
if isinstance(content, dict):
37+
return str(content.get('__file__', self.sls_file))
38+
return self.sls_file
39+
3340
def extract_code_lines(
3441
self, content: dict[str, Any]
3542
) -> tuple[list[int], list[tuple[int, str]]] | tuple[None, None]:
@@ -40,7 +47,14 @@ def extract_code_lines(
4047

4148
entity_lines_range = [start_line, end_line - 1]
4249

43-
entity_code_lines = self.sls_template_lines[start_line - 1: end_line - 1]
50+
fname = self.file(content)
51+
lines = self.sls_template_lines
52+
if fname != self.sls_file:
53+
lines = []
54+
text = read_file_with_any_encoding(Path(self.sls_file).parent / fname)
55+
for i, ln in enumerate(text.splitlines(True)):
56+
lines.append((i + 1, ln))
57+
entity_code_lines = lines[start_line - 1: end_line - 1]
4458
return entity_lines_range, entity_code_lines
4559
return None, None
4660

checkov/serverless/parsers/parser.py

+12-19
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import re
1212

1313
import yaml
14-
from yaml import YAMLError
1514

1615
from checkov.cloudformation.parser import cfn_yaml
1716
from checkov.cloudformation.context_parser import ContextParser
@@ -39,37 +38,31 @@
3938
def parse(filename: str) -> tuple[dict[str, Any], list[tuple[int, str]]] | None:
4039
template = None
4140
template_lines = None
41+
4242
try:
4343
(template, template_lines) = cfn_yaml.load(filename, cfn_yaml.ContentType.SLS)
4444
if not template or not is_checked_sls_template(template):
4545
return None
46-
except IOError as e:
47-
if e.errno == 2:
48-
logger.error('Template file not found: %s', filename)
49-
return None
50-
elif e.errno == 21:
51-
logger.error('Template references a directory, not a file: %s',
52-
filename)
53-
return None
54-
elif e.errno == 13:
55-
logger.error('Permission denied when accessing template file: %s',
56-
filename)
57-
return None
46+
except FileNotFoundError as e:
47+
logger.error(f'Template file not found: {e.filename}')
48+
return None
49+
except IsADirectoryError as e:
50+
logger.error(f'Template references a directory, not a file: {e.filename}')
51+
return None
52+
except PermissionError as e:
53+
logger.error(f'Permission denied when accessing {e.filename}')
54+
return None
5855
except UnicodeDecodeError:
5956
logger.error('Cannot read file contents: %s', filename)
6057
return None
61-
except CfnParseError:
62-
logger.warning(f"Failed to parse file {filename} because it isn't a valid template")
63-
return None
64-
except YAMLError:
65-
logger.warning(f"Failed to parse file {filename} as a yaml")
58+
except CfnParseError as e:
59+
logger.warning(f"Failed to parse file {e.filename} because it isn't valid yaml")
6660
return None
6761

6862
if template is None or template_lines is None:
6963
return None
7064

7165
process_variables(template, filename)
72-
7366
return template, template_lines
7467

7568

checkov/serverless/runner.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def run(
117117

118118
logging.info("Creating Serverless graph")
119119
local_graph = self.graph_manager.build_graph_from_definitions(definitions=self.definitions)
120-
logging.info("Successfully created Serverless graph")
120+
logging.info(f'Successfully created Serverless graph ({len(local_graph.vertices)} vertices)')
121121

122122
self.graph_manager.save_graph(local_graph)
123123
self.definitions, self.breadcrumbs = convert_graph_vertices_to_definitions(
@@ -204,6 +204,8 @@ def single_item_sections_checks(self,
204204
entity = EntityDetails(sls_context_parser.provider_type, item_content)
205205
results = registry.scan(sls_file, entity, skipped_checks, runner_filter)
206206
tags = get_resource_tags(entity, registry)
207+
fname = Path(sls_context_parser.file(item_content)).resolve()
208+
207209
if results:
208210
for check, check_result in results.items():
209211
censored_code_lines = omit_secret_value_from_checks(
@@ -218,7 +220,7 @@ def single_item_sections_checks(self,
218220
check_name=check.name,
219221
check_result=check_result,
220222
code_block=censored_code_lines,
221-
file_path=self.extract_file_path_from_abs_path(Path(sls_file)),
223+
file_path=self.extract_file_path_from_abs_path(fname),
222224
file_line_range=entity_lines_range or [0, 0],
223225
resource=token,
224226
evaluations=variable_evaluations,
@@ -265,6 +267,7 @@ def multi_item_sections_checks(self,
265267
entity = EntityDetails(sls_context_parser.provider_type, item_content)
266268
results = registry.scan(sls_file, entity, skipped_checks, runner_filter)
267269
tags = get_resource_tags(entity, registry)
270+
fname = Path(sls_context_parser.file(item_content)).resolve()
268271
if results:
269272
for check, check_result in results.items():
270273
censored_code_lines = omit_secret_value_from_checks(
@@ -276,7 +279,7 @@ def multi_item_sections_checks(self,
276279
)
277280
record = Record(check_id=check.id, check_name=check.name, check_result=check_result,
278281
code_block=censored_code_lines,
279-
file_path=self.extract_file_path_from_abs_path(Path(sls_file)),
282+
file_path=self.extract_file_path_from_abs_path(fname),
280283
file_line_range=entity_lines_range,
281284
resource=item_name, evaluations=variable_evaluations,
282285
check_class=check.__class__.__module__,
+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
---
2+
service: api-services
3+
provider:
4+
name: aws
5+
stage: ${sls:stage}
6+
runtime: nodejs20.x
7+
region: 'us-east-1'
8+
iamManagedPolicies:
9+
- 'arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole'
10+
11+
resources:
12+
- ${file(./cfn_file_resources.yaml)}
13+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
---
2+
service: api-services
3+
provider:
4+
name: aws
5+
stage: ${sls:stage}
6+
runtime: nodejs20.x
7+
region: 'us-east-1'
8+
iamManagedPolicies:
9+
- 'arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole'
10+
11+
resources:
12+
- ${file(./cfn_file_circular.yaml)}
13+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
---
2+
Resources:
3+
MyBucket:
4+
Type: AWS::S3::Bucket
5+
Properties:
6+
BucketName: my-bucket
7+
AccessControl: PublicRead
8+

0 commit comments

Comments
 (0)