Skip to content

Commit d71a2f5

Browse files
authored
Chore/dataframe formatter handle nulls (#245)
* refactor DataFrameOutputter for clarity and convert nulls to empty string in table/csv outputs * change Exception to ValueError * update test with correct exception * name test correctly
1 parent 3fdc7ce commit d71a2f5

File tree

2 files changed

+97
-78
lines changed

2 files changed

+97
-78
lines changed

src/code42cli/output_formats.py

Lines changed: 42 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import json
44

55
import click
6-
from pandas import DataFrame
76

87
from code42cli.logger.formatters import CEF_TEMPLATE
98
from code42cli.logger.formatters import map_event_to_cef
@@ -81,41 +80,51 @@ def _requires_list_output(self):
8180

8281
class DataFrameOutputFormatter:
8382
def __init__(self, output_format):
84-
output_format = output_format.upper() if output_format else OutputFormat.TABLE
85-
self.output_format = output_format
86-
self._format_func = DataFrame.to_string
87-
self._output_args = {"index": False}
83+
self.output_format = (
84+
output_format.upper() if output_format else OutputFormat.TABLE
85+
)
8886

89-
if output_format == OutputFormat.CSV:
90-
self._format_func = DataFrame.to_csv
91-
elif output_format == OutputFormat.RAW:
92-
self._format_func = DataFrame.to_json
93-
self._output_args.update(
94-
{
95-
"orient": "records",
96-
"lines": False,
97-
"index": True,
98-
"default_handler": str,
99-
}
100-
)
101-
elif output_format == OutputFormat.JSON:
102-
self._format_func = DataFrame.to_json
103-
self._output_args.update(
104-
{
105-
"orient": "records",
106-
"lines": True,
107-
"index": True,
108-
"default_handler": str,
109-
}
110-
)
87+
def get_formatted_output(self, df, **kwargs):
88+
if self.output_format == OutputFormat.JSON:
89+
defaults = {
90+
"orient": "records",
91+
"lines": True,
92+
"index": True,
93+
"default_handler": str,
94+
}
95+
defaults.update(kwargs)
96+
return df.to_json(**defaults)
97+
98+
elif self.output_format == OutputFormat.RAW:
99+
defaults = {
100+
"orient": "records",
101+
"lines": False,
102+
"index": True,
103+
"default_handler": str,
104+
}
105+
defaults.update(kwargs)
106+
return df.to_json(**defaults)
107+
108+
elif self.output_format == OutputFormat.CSV:
109+
defaults = {"index": False}
110+
defaults.update(kwargs)
111+
df = df.fillna("")
112+
return df.to_csv(**defaults)
113+
114+
elif self.output_format == OutputFormat.TABLE:
115+
defaults = {"index": False}
116+
defaults.update(kwargs)
117+
df = df.fillna("")
118+
return df.to_string(**defaults)
111119

112-
def _format_output(self, output, *args, **kwargs):
113-
self._output_args.update(kwargs)
114-
return self._format_func(output, *args, **self._output_args)
120+
else:
121+
raise ValueError(
122+
f"DataFrameOutputFormatter received an invalid format: {self.output_format}"
123+
)
115124

116-
def echo_formatted_dataframe(self, output, *args, **kwargs):
117-
str_output = self._format_output(output, *args, **kwargs)
118-
if len(output) <= 10:
125+
def echo_formatted_dataframe(self, df, **kwargs):
126+
str_output = self.get_formatted_output(df, **kwargs)
127+
if len(df) <= 10:
119128
click.echo(str_output)
120129
else:
121130
click.echo_via_pager(str_output)

tests/test_output_formats.py

Lines changed: 55 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,17 @@
22
from collections import OrderedDict
33

44
import pytest
5+
from numpy import NaN
56
from pandas import DataFrame
67

78
import code42cli.output_formats as output_formats_module
89
from code42cli.maps import FILE_EVENT_TO_SIGNATURE_ID_MAP
10+
from code42cli.output_formats import DataFrameOutputFormatter
911
from code42cli.output_formats import FileEventsOutputFormat
1012
from code42cli.output_formats import FileEventsOutputFormatter
13+
from code42cli.output_formats import OutputFormat
1114
from code42cli.output_formats import to_cef
1215

13-
1416
TEST_DATA = [
1517
{
1618
"type$": "RULE_METADATA",
@@ -771,53 +773,61 @@ def test_security_data_output_format_has_expected_options():
771773

772774

773775
class TestDataFrameOutputFormatter:
774-
def test_init_sets_format_func_to_formatted_json_function_when_json_format_option_is_passed(
775-
self, mock_dataframe_to_json
776-
):
777-
output_format = output_formats_module.OutputFormat.RAW
778-
formatter = output_formats_module.DataFrameOutputFormatter(output_format)
779-
formatter.echo_formatted_dataframe(TEST_DATAFRAME)
780-
mock_dataframe_to_json.assert_called_once_with(
781-
TEST_DATAFRAME,
782-
orient="records",
783-
lines=False,
784-
index=True,
785-
default_handler=str,
776+
test_df = DataFrame(
777+
[
778+
{"string_column": "string1", "int_column": 42, "null_column": None},
779+
{"string_column": "string2", "int_column": 43, "null_column": NaN},
780+
]
781+
)
782+
783+
def test_format_when_none_passed_defaults_to_table(self):
784+
formatter = DataFrameOutputFormatter(output_format=None)
785+
assert formatter.output_format == OutputFormat.TABLE
786+
787+
def test_format_when_unknown_format_raises_value_error(self):
788+
with pytest.raises(ValueError):
789+
formatter = DataFrameOutputFormatter("NOT_A_FORMAT")
790+
formatter.get_formatted_output(self.test_df)
791+
792+
def test_json_formatter_converts_to_expected_string(self):
793+
formatter = DataFrameOutputFormatter(OutputFormat.JSON)
794+
output = formatter.get_formatted_output(self.test_df)
795+
assert (
796+
output
797+
== '{"string_column":"string1","int_column":42,"null_column":null}\n{"string_column":"string2","int_column":43,"null_column":null}'
786798
)
787799

788-
def test_init_sets_format_func_to_json_function_when_raw_json_format_option_is_passed(
789-
self, mock_dataframe_to_json
790-
):
791-
output_format = output_formats_module.OutputFormat.JSON
792-
formatter = output_formats_module.DataFrameOutputFormatter(output_format)
793-
formatter.echo_formatted_dataframe(TEST_DATAFRAME)
794-
mock_dataframe_to_json.assert_called_once_with(
795-
TEST_DATAFRAME,
796-
orient="records",
797-
lines=True,
798-
index=True,
799-
default_handler=str,
800+
def test_raw_formatter_converts_to_expected_string(self):
801+
formatter = DataFrameOutputFormatter(OutputFormat.RAW)
802+
output = formatter.get_formatted_output(self.test_df)
803+
assert (
804+
output
805+
== '[{"string_column":"string1","int_column":42,"null_column":null},{"string_column":"string2","int_column":43,"null_column":null}]'
800806
)
801807

802-
def test_init_sets_format_func_to_table_function_when_table_format_option_is_passed(
803-
self, mock_dataframe_to_string
804-
):
805-
output_format = output_formats_module.OutputFormat.TABLE
806-
formatter = output_formats_module.DataFrameOutputFormatter(output_format)
807-
formatter.echo_formatted_dataframe(TEST_DATAFRAME)
808-
mock_dataframe_to_string.assert_called_once_with(TEST_DATAFRAME, index=False)
808+
def test_csv_formatter_converts_to_expected_string(self):
809+
formatter = DataFrameOutputFormatter(OutputFormat.CSV)
810+
output = formatter.get_formatted_output(self.test_df)
811+
assert (
812+
output == "string_column,int_column,null_column\nstring1,42,\nstring2,43,\n"
813+
)
809814

810-
def test_init_sets_format_func_to_csv_function_when_csv_format_option_is_passed(
811-
self, mock_dataframe_to_csv
812-
):
813-
output_format = output_formats_module.OutputFormat.CSV
814-
formatter = output_formats_module.DataFrameOutputFormatter(output_format)
815-
formatter.echo_formatted_dataframe(TEST_DATAFRAME)
816-
mock_dataframe_to_csv.assert_called_once_with(TEST_DATAFRAME, index=False)
815+
def test_table_formatter_converts_to_expected_string(self):
816+
formatter = DataFrameOutputFormatter(OutputFormat.TABLE)
817+
output = formatter.get_formatted_output(self.test_df)
818+
assert output == (
819+
"string_column int_column null_column\n"
820+
" string1 42 \n"
821+
" string2 43 "
822+
)
817823

818-
def test_init_sets_format_func_to_table_function_when_no_format_option_is_passed(
819-
self, mock_dataframe_to_string
820-
):
821-
formatter = output_formats_module.DataFrameOutputFormatter(None)
822-
formatter.echo_formatted_dataframe(TEST_DATAFRAME)
823-
mock_dataframe_to_string.assert_called_once_with(TEST_DATAFRAME, index=False)
824+
def test_echo_formatted_dataframe_uses_pager_when_gt_10_rows(self, mocker):
825+
mock_echo = mocker.patch("click.echo")
826+
mock_pager = mocker.patch("click.echo_via_pager")
827+
formatter = DataFrameOutputFormatter(OutputFormat.TABLE)
828+
big_df = DataFrame([{"column": val} for val in range(11)])
829+
small_df = DataFrame([{"column": val} for val in range(5)])
830+
formatter.echo_formatted_dataframe(big_df)
831+
formatter.echo_formatted_dataframe(small_df)
832+
assert mock_echo.call_count == 1
833+
assert mock_pager.call_count == 1

0 commit comments

Comments
 (0)