From b4d22420e9009ae4a26776dda6b61fb7601d67c3 Mon Sep 17 00:00:00 2001 From: Dmitrii Golovanov Date: Fri, 10 Jan 2025 14:12:44 +0100 Subject: [PATCH] twister: harness: recording: Allow multiple patterns Extend Twister Harness 'recording' feature to allow multiple regular expression patterns to extract different types of records from test output. Add 'merge' recording mode to collect all extracted data fields into a single record object of the test instance. Export to CSV file now takes all field names occurred in the collected records, sort it alphabetically, and then use it for columns instead of using only the first record's fields. This is done to address possible situation when records have different set of fields. Adjust Twister documentation and test suite to the above changes. Signed-off-by: Dmitrii Golovanov --- doc/develop/test/twister.rst | 29 ++++- scripts/pylib/twister/twisterlib/harness.py | 30 +++-- .../pylib/twister/twisterlib/testinstance.py | 5 +- scripts/schemas/twister/testsuite-schema.yaml | 7 +- scripts/tests/twister/test_harness.py | 115 +++++++++++++++--- 5 files changed, 153 insertions(+), 33 deletions(-) diff --git a/doc/develop/test/twister.rst b/doc/develop/test/twister.rst index e45d8718844..bcf66f66fa2 100644 --- a/doc/develop/test/twister.rst +++ b/doc/develop/test/twister.rst @@ -626,26 +626,43 @@ harness_config: Check the regular expression strings in orderly or randomly fashion record: (optional) - regex: (required) - The regular expression with named subgroups to match data fields - at the test's output lines where the test provides some custom data + regex: (required) + Regular expressions with named subgroups to match data fields found + in the test instance's output lines where it provides some custom data for further analysis. These records will be written into the build directory ``recording.csv`` file as well as ``recording`` property of the test suite object in ``twister.json``. + With several regular expressions given, each of them will be applied + to each output line producing either several different records from + the same output line, or different records from different lines, + or similar records from different lines. + + The .CSV file will have as many columns as there are fields detected + in all records; missing values are filled by empty strings. + For example, to extract three data fields ``metric``, ``cycles``, ``nanoseconds``: .. code-block:: yaml record: - regex: "(?P.*):(?P.*) cycles, (?P.*) ns" + regex: + - "(?P.*):(?P.*) cycles, (?P.*) ns" + + merge: (default False) + Allows to keep only one record in a test instance with all the data + fields extracted by the regular expressions. Fields with the same name + will be put into lists ordered as their appearance in recordings. + It is possible for such multi value fields to have different number + of values depending on the regex rules and the test's output. as_json: (optional) - Data fields, extracted by the regular expression into named subgroups, + Data fields, extracted by the regular expressions into named subgroups, which will be additionally parsed as JSON encoded strings and written into ``twister.json`` as nested ``recording`` object properties. - The corresponding ``recording.csv`` columns will contain strings as-is. + The corresponding ``recording.csv`` columns will contain JSON strings + as-is. Using this option, a test log can convey layered data structures passed from the test image for further analysis with summary results, diff --git a/scripts/pylib/twister/twisterlib/harness.py b/scripts/pylib/twister/twisterlib/harness.py index 8ce3808f736..2ef4d50f612 100644 --- a/scripts/pylib/twister/twisterlib/harness.py +++ b/scripts/pylib/twister/twisterlib/harness.py @@ -53,7 +53,8 @@ class Harness: self.capture_coverage = False self.next_pattern = 0 self.record = None - self.record_pattern = None + self.record_patterns = [] + self.record_merge = False self.record_as_json = None self.recording = [] self.ztest = False @@ -99,7 +100,8 @@ class Harness: self.ordered = config.get('ordered', True) self.record = config.get('record', {}) if self.record: - self.record_pattern = re.compile(self.record.get("regex", "")) + self.record_patterns = [re.compile(p) for p in self.record.get("regex", [])] + self.record_merge = self.record.get("merge", False) self.record_as_json = self.record.get("as_json") def build(self): @@ -125,17 +127,27 @@ class Harness: record[k] = { 'ERROR': { 'msg': str(parse_error), 'doc': record[k] } } return record - def parse_record(self, line) -> re.Match: - match = None - if self.record_pattern: - match = self.record_pattern.search(line) + def parse_record(self, line) -> int: + match_cnt = 0 + for record_pattern in self.record_patterns: + match = record_pattern.search(line) if match: + match_cnt += 1 rec = self.translate_record( { k:v.strip() for k,v in match.groupdict(default="").items() } ) - self.recording.append(rec) - return match - # + if self.record_merge and len(self.recording) > 0: + for k,v in rec.items(): + if k in self.recording[0]: + if isinstance(self.recording[0][k], list): + self.recording[0][k].append(v) + else: + self.recording[0][k] = [self.recording[0][k], v] + else: + self.recording[0][k] = v + else: + self.recording.append(rec) + return match_cnt def process_test(self, line): diff --git a/scripts/pylib/twister/twisterlib/testinstance.py b/scripts/pylib/twister/twisterlib/testinstance.py index 8cf0f2e0963..f3199a13271 100644 --- a/scripts/pylib/twister/twisterlib/testinstance.py +++ b/scripts/pylib/twister/twisterlib/testinstance.py @@ -105,9 +105,12 @@ class TestInstance: self.recording.extend(recording) filename = os.path.join(self.build_dir, fname_csv) + fieldnames = set() + for r in self.recording: + fieldnames.update(r) with open(filename, 'w') as csvfile: cw = csv.DictWriter(csvfile, - fieldnames = self.recording[0].keys(), + fieldnames = sorted(list(fieldnames)), lineterminator = os.linesep, quoting = csv.QUOTE_NONNUMERIC) cw.writeheader() diff --git a/scripts/schemas/twister/testsuite-schema.yaml b/scripts/schemas/twister/testsuite-schema.yaml index 348a517fb72..b05e54f708d 100644 --- a/scripts/schemas/twister/testsuite-schema.yaml +++ b/scripts/schemas/twister/testsuite-schema.yaml @@ -151,8 +151,13 @@ schema;scenario-schema: required: false mapping: "regex": - type: str + type: seq required: true + sequence: + - type: str + "merge": + type: bool + required: false "as_json": type: seq required: false diff --git a/scripts/tests/twister/test_harness.py b/scripts/tests/twister/test_harness.py index 7ff006d9278..bc529932eef 100644 --- a/scripts/tests/twister/test_harness.py +++ b/scripts/tests/twister/test_harness.py @@ -61,26 +61,77 @@ def process_logs(harness, logs): TEST_DATA_RECORDING = [ - ([""], "^START:(?P.*):END", [], None), - (["START:bar:STOP"], "^START:(?P.*):END", [], None), - (["START:bar:END"], "^START:(?P.*):END", [{"foo": "bar"}], None), + ([""], ["^START:(?P.*):END"], [], None, None), + (["START:bar:STOP"], ["^START:(?P.*):END"], [], None, None), + (["START:bar:END"], ["^START:(?P.*):END"], [{"foo": "bar"}], None, None), ( ["START:bar:baz:END"], - "^START:(?P.*):(?P.*):END", + ["^START:(?P.*):(?P.*):END"], [{"foo": "bar", "boo": "baz"}], None, + None, + ), + ( + ["START:bar:END"], + ["^(START:(?P[a-z]+):END)|(START:(?P[0-9]+):END)"], + [{"foo": "bar", "boo": ""}], + None, + None, + ), + ( + ["START:bar:baz:END"], + ["^START:(?P.*):baz:END", "^START:bar:(?P.*):END"], + [{"foo": "bar"}, {"boo": "baz"}], + None, + None, + ), + ( + ["START:bar:END", "START:123:END"], + ["^START:(?P[a-z]+):END", "^START:(?P[0-9]+):END"], + [{"foo": "bar"}, {"boo": "123"}], + None, + None, + ), + ( + ["START:bar:END", "START:123:END"], + ["^START:(?P[a-z]+):END", "^START:(?P[0-9]+):END"], + [{"foo": "bar"}, {"foo": "123"}], + None, + None, + ), + ( + ["START:bar:END", "START:123:END"], + ["^START:(?P[a-z]+):END", "^START:(?P[0-9]+):END"], + [{"foo": ["bar", "123"]}], + None, + True, + ), + ( + ["START:bar:baz:END"], + ["^START:(?P.*):baz:END", "^START:bar:(?P.*):END"], + [{"foo": "bar", "boo": "baz"}], + None, + True, + ), + ( + ["START:bar:baz:END"], + ["^START:(?P.*):baz:END", "^START:bar:(?P.*):END"], + [{"foo": ["bar", "baz"]}], + None, + True, ), ( ["START:bar:baz:END", "START:may:jun:END"], - "^START:(?P.*):(?P.*):END", + ["^START:(?P.*):(?P.*):END"], [{"foo": "bar", "boo": "baz"}, {"foo": "may", "boo": "jun"}], None, + None, ), - (["START:bar:END"], "^START:(?P.*):END", [{"foo": "bar"}], []), - (["START:bar:END"], "^START:(?P.*):END", [{"foo": "bar"}], ["boo"]), + (["START:bar:END"], ["^START:(?P.*):END"], [{"foo": "bar"}], [], None), + (["START:bar:END"], ["^START:(?P.*):END"], [{"foo": "bar"}], ["boo"], None), ( ["START:bad_json:END"], - "^START:(?P.*):END", + ["^START:(?P.*):END"], [ { "foo": { @@ -92,37 +143,66 @@ TEST_DATA_RECORDING = [ } ], ["foo"], + None, ), - (["START::END"], "^START:(?P.*):END", [{"foo": {}}], ["foo"]), + (["START::END"], ["^START:(?P.*):END"], [{"foo": {}}], ["foo"], None), ( ['START: {"one":1, "two":2} :END'], - "^START:(?P.*):END", + ["^START:(?P.*):END"], [{"foo": {"one": 1, "two": 2}}], ["foo"], + None, ), ( ['START: {"one":1, "two":2} :STOP:oops:END'], - "^START:(?P.*):STOP:(?P.*):END", + ["^START:(?P.*):STOP:(?P.*):END"], [{"foo": {"one": 1, "two": 2}, "boo": "oops"}], ["foo"], + None, ), ( ['START: {"one":1, "two":2} :STOP:{"oops":0}:END'], - "^START:(?P.*):STOP:(?P.*):END", + ["^START:(?P.*):STOP:(?P.*):END"], [{"foo": {"one": 1, "two": 2}, "boo": {"oops": 0}}], ["foo", "boo"], + None, + ), + ( + ['START: {"one":1, "two":2} :STOP:{"oops":0}:END'], + ["^START:(?P.*):STOP:.*:END", + "^START:.*:STOP:(?P.*):END" + ], + [{"foo": {"one": 1, "two": 2}}, {"boo": {"oops": 0}}], + ["foo", "boo"], + None, + ), + ( + ['START: {"one":1, "two":2} :STOP:{"oops":0}:END'], + ["^START:(?P.*):STOP:.*:END", + "^START:.*:STOP:(?P.*):END" + ], + [{"foo": [{"one": 1, "two": 2}, {"oops": 0}]}], + ["foo"], + True, ), ] @pytest.mark.parametrize( - "lines, pattern, expected_records, as_json", + "lines, patterns, expected_records, as_json, merge", TEST_DATA_RECORDING, ids=[ "empty", "no match", "match 1 field", "match 2 fields", + "2 or-ed groups one miss", + "one line, two patters, match 2 fields -> 2 records", + "two lines, two patters -> 2 records", + "two lines, two patters same field -> 2 same records", + "two lines, two patters same field merge -> 1 records 2 values", + "one line, two patters, match 2 fields, merge -> 1 record", + "one line, two patters, match 1 field, merge -> 1 record list", "match 2 records", "as_json empty", "as_json no such field", @@ -131,13 +211,16 @@ TEST_DATA_RECORDING = [ "simple json", "plain field and json field", "two json fields", + "two json fields in two patterns -> 2 records", + "two json fields in two patterns merge -> 1 records 2 items", ], ) -def test_harness_parse_record(lines, pattern, expected_records, as_json): +def test_harness_parse_record(lines, patterns, expected_records, as_json, merge): harness = Harness() - harness.record = {"regex": pattern} - harness.record_pattern = re.compile(pattern) + harness.record = {"regex": patterns} + harness.record_patterns = [re.compile(p) for p in patterns] + harness.record_merge = merge harness.record_as_json = as_json if as_json is not None: harness.record["as_json"] = as_json