From b4d22420e9009ae4a26776dda6b61fb7601d67c3 Mon Sep 17 00:00:00 2001
From: Dmitrii Golovanov <dmitrii.golovanov@intel.com>
Date: Fri, 10 Jan 2025 14:12:44 +0100
Subject: [PATCH] twister: harness: recording: Allow multiple patterns

Extend Twister Harness 'recording' feature to allow multiple
regular expression patterns to extract different types of records
from test output.

Add 'merge' recording mode to collect all extracted data fields
into a single record object of the test instance.

Export to CSV file now takes all field names occurred in the collected
records, sort it alphabetically, and then use it for columns instead of
using only the first record's fields. This is done to address possible
situation when records have different set of fields.

Adjust Twister documentation and test suite to the above changes.

Signed-off-by: Dmitrii Golovanov <dmitrii.golovanov@intel.com>
---
 doc/develop/test/twister.rst                  |  29 ++++-
 scripts/pylib/twister/twisterlib/harness.py   |  30 +++--
 .../pylib/twister/twisterlib/testinstance.py  |   5 +-
 scripts/schemas/twister/testsuite-schema.yaml |   7 +-
 scripts/tests/twister/test_harness.py         | 115 +++++++++++++++---
 5 files changed, 153 insertions(+), 33 deletions(-)
diff --git a/doc/develop/test/twister.rst b/doc/develop/test/twister.rst
index e45d8718844..bcf66f66fa2 100644
--- a/doc/develop/test/twister.rst
+++ b/doc/develop/test/twister.rst
@@ -626,26 +626,43 @@ harness_config: <harness configuration options>
         Check the regular expression strings in orderly or randomly fashion
 
     record: <recording options> (optional)
-      regex: <regular expression> (required)
-        The regular expression with named subgroups to match data fields
-        at the test's output lines where the test provides some custom data
+      regex: <list of regular expressions> (required)
+        Regular expressions with named subgroups to match data fields found
+        in the test instance's output lines where it provides some custom data
         for further analysis. These records will be written into the build
         directory ``recording.csv`` file as well as ``recording`` property
         of the test suite object in ``twister.json``.
 
+        With several regular expressions given, each of them will be applied
+        to each output line producing either several different records from
+        the same output line, or different records from different lines,
+        or similar records from different lines.
+
+        The .CSV file will have as many columns as there are fields detected
+        in all records; missing values are filled by empty strings.
+
         For example, to extract three data fields ``metric``, ``cycles``,
         ``nanoseconds``:
 
         .. code-block:: yaml
 
           record:
-            regex: "(?P<metric>.*):(?P<cycles>.*) cycles, (?P<nanoseconds>.*) ns"
+            regex:
+              - "(?P<metric>.*):(?P<cycles>.*) cycles, (?P<nanoseconds>.*) ns"
+
+      merge: <True|False> (default False)
+        Allows to keep only one record in a test instance with all the data
+        fields extracted by the regular expressions. Fields with the same name
+        will be put into lists ordered as their appearance in recordings.
+        It is possible for such multi value fields to have different number
+        of values depending on the regex rules and the test's output.
 
       as_json: <list of regex subgroup names> (optional)
-        Data fields, extracted by the regular expression into named subgroups,
+        Data fields, extracted by the regular expressions into named subgroups,
         which will be additionally parsed as JSON encoded strings and written
         into ``twister.json`` as nested ``recording`` object properties.
-        The corresponding ``recording.csv`` columns will contain strings as-is.
+        The corresponding ``recording.csv`` columns will contain JSON strings
+        as-is.
 
         Using this option, a test log can convey layered data structures
         passed from the test image for further analysis with summary results,
diff --git a/scripts/pylib/twister/twisterlib/harness.py b/scripts/pylib/twister/twisterlib/harness.py
index 8ce3808f736..2ef4d50f612 100644
--- a/scripts/pylib/twister/twisterlib/harness.py
+++ b/scripts/pylib/twister/twisterlib/harness.py
@@ -53,7 +53,8 @@ class Harness:
         self.capture_coverage = False
         self.next_pattern = 0
         self.record = None
-        self.record_pattern = None
+        self.record_patterns = []
+        self.record_merge = False
         self.record_as_json = None
         self.recording = []
         self.ztest = False
@@ -99,7 +100,8 @@ class Harness:
             self.ordered = config.get('ordered', True)
             self.record = config.get('record', {})
             if self.record:
-                self.record_pattern = re.compile(self.record.get("regex", ""))
+                self.record_patterns = [re.compile(p) for p in self.record.get("regex", [])]
+                self.record_merge = self.record.get("merge", False)
                 self.record_as_json = self.record.get("as_json")
 
     def build(self):
@@ -125,17 +127,27 @@ class Harness:
                     record[k] = { 'ERROR': { 'msg': str(parse_error), 'doc': record[k] } }
         return record
 
-    def parse_record(self, line) -> re.Match:
-        match = None
-        if self.record_pattern:
-            match = self.record_pattern.search(line)
+    def parse_record(self, line) -> int:
+        match_cnt = 0
+        for record_pattern in self.record_patterns:
+            match = record_pattern.search(line)
             if match:
+                match_cnt += 1
                 rec = self.translate_record(
                     { k:v.strip() for k,v in match.groupdict(default="").items() }
                 )
-                self.recording.append(rec)
-        return match
-    #
+                if self.record_merge and len(self.recording) > 0:
+                    for k,v in rec.items():
+                        if k in self.recording[0]:
+                            if isinstance(self.recording[0][k], list):
+                                self.recording[0][k].append(v)
+                            else:
+                                self.recording[0][k] = [self.recording[0][k], v]
+                        else:
+                            self.recording[0][k] = v
+                else:
+                    self.recording.append(rec)
+        return match_cnt
 
     def process_test(self, line):
 
diff --git a/scripts/pylib/twister/twisterlib/testinstance.py b/scripts/pylib/twister/twisterlib/testinstance.py
index 8cf0f2e0963..f3199a13271 100644
--- a/scripts/pylib/twister/twisterlib/testinstance.py
+++ b/scripts/pylib/twister/twisterlib/testinstance.py
@@ -105,9 +105,12 @@ class TestInstance:
                 self.recording.extend(recording)
 
             filename = os.path.join(self.build_dir, fname_csv)
+            fieldnames = set()
+            for r in self.recording:
+                fieldnames.update(r)
             with open(filename, 'w') as csvfile:
                 cw = csv.DictWriter(csvfile,
-                                    fieldnames = self.recording[0].keys(),
+                                    fieldnames = sorted(list(fieldnames)),
                                     lineterminator = os.linesep,
                                     quoting = csv.QUOTE_NONNUMERIC)
                 cw.writeheader()
diff --git a/scripts/schemas/twister/testsuite-schema.yaml b/scripts/schemas/twister/testsuite-schema.yaml
index 348a517fb72..b05e54f708d 100644
--- a/scripts/schemas/twister/testsuite-schema.yaml
+++ b/scripts/schemas/twister/testsuite-schema.yaml
@@ -151,8 +151,13 @@ schema;scenario-schema:
           required: false
           mapping:
             "regex":
-              type: str
+              type: seq
               required: true
+              sequence:
+                - type: str
+            "merge":
+              type: bool
+              required: false
             "as_json":
               type: seq
               required: false
diff --git a/scripts/tests/twister/test_harness.py b/scripts/tests/twister/test_harness.py
index 7ff006d9278..bc529932eef 100644
--- a/scripts/tests/twister/test_harness.py
+++ b/scripts/tests/twister/test_harness.py
@@ -61,26 +61,77 @@ def process_logs(harness, logs):
 
 
 TEST_DATA_RECORDING = [
-    ([""], "^START:(?P<foo>.*):END", [], None),
-    (["START:bar:STOP"], "^START:(?P<foo>.*):END", [], None),
-    (["START:bar:END"], "^START:(?P<foo>.*):END", [{"foo": "bar"}], None),
+    ([""], ["^START:(?P<foo>.*):END"], [], None, None),
+    (["START:bar:STOP"], ["^START:(?P<foo>.*):END"], [], None, None),
+    (["START:bar:END"], ["^START:(?P<foo>.*):END"], [{"foo": "bar"}], None, None),
     (
         ["START:bar:baz:END"],
-        "^START:(?P<foo>.*):(?P<boo>.*):END",
+        ["^START:(?P<foo>.*):(?P<boo>.*):END"],
         [{"foo": "bar", "boo": "baz"}],
         None,
+        None,
+    ),
+    (
+        ["START:bar:END"],
+        ["^(START:(?P<foo>[a-z]+):END)|(START:(?P<boo>[0-9]+):END)"],
+        [{"foo": "bar", "boo": ""}],
+        None,
+        None,
+    ),
+    (
+        ["START:bar:baz:END"],
+        ["^START:(?P<foo>.*):baz:END", "^START:bar:(?P<boo>.*):END"],
+        [{"foo": "bar"}, {"boo": "baz"}],
+        None,
+        None,
+    ),
+    (
+        ["START:bar:END", "START:123:END"],
+        ["^START:(?P<foo>[a-z]+):END", "^START:(?P<boo>[0-9]+):END"],
+        [{"foo": "bar"}, {"boo": "123"}],
+        None,
+        None,
+    ),
+    (
+        ["START:bar:END", "START:123:END"],
+        ["^START:(?P<foo>[a-z]+):END", "^START:(?P<foo>[0-9]+):END"],
+        [{"foo": "bar"}, {"foo": "123"}],
+        None,
+        None,
+    ),
+    (
+        ["START:bar:END", "START:123:END"],
+        ["^START:(?P<foo>[a-z]+):END", "^START:(?P<foo>[0-9]+):END"],
+        [{"foo": ["bar", "123"]}],
+        None,
+        True,
+    ),
+    (
+        ["START:bar:baz:END"],
+        ["^START:(?P<foo>.*):baz:END", "^START:bar:(?P<boo>.*):END"],
+        [{"foo": "bar", "boo": "baz"}],
+        None,
+        True,
+    ),
+    (
+        ["START:bar:baz:END"],
+        ["^START:(?P<foo>.*):baz:END", "^START:bar:(?P<foo>.*):END"],
+        [{"foo": ["bar", "baz"]}],
+        None,
+        True,
     ),
     (
         ["START:bar:baz:END", "START:may:jun:END"],
-        "^START:(?P<foo>.*):(?P<boo>.*):END",
+        ["^START:(?P<foo>.*):(?P<boo>.*):END"],
         [{"foo": "bar", "boo": "baz"}, {"foo": "may", "boo": "jun"}],
         None,
+        None,
     ),
-    (["START:bar:END"], "^START:(?P<foo>.*):END", [{"foo": "bar"}], []),
-    (["START:bar:END"], "^START:(?P<foo>.*):END", [{"foo": "bar"}], ["boo"]),
+    (["START:bar:END"], ["^START:(?P<foo>.*):END"], [{"foo": "bar"}], [], None),
+    (["START:bar:END"], ["^START:(?P<foo>.*):END"], [{"foo": "bar"}], ["boo"], None),
     (
         ["START:bad_json:END"],
-        "^START:(?P<foo>.*):END",
+        ["^START:(?P<foo>.*):END"],
         [
             {
                 "foo": {
@@ -92,37 +143,66 @@ TEST_DATA_RECORDING = [
             }
         ],
         ["foo"],
+        None,
     ),
-    (["START::END"], "^START:(?P<foo>.*):END", [{"foo": {}}], ["foo"]),
+    (["START::END"], ["^START:(?P<foo>.*):END"], [{"foo": {}}], ["foo"], None),
     (
         ['START: {"one":1, "two":2} :END'],
-        "^START:(?P<foo>.*):END",
+        ["^START:(?P<foo>.*):END"],
         [{"foo": {"one": 1, "two": 2}}],
         ["foo"],
+        None,
     ),
     (
         ['START: {"one":1, "two":2} :STOP:oops:END'],
-        "^START:(?P<foo>.*):STOP:(?P<boo>.*):END",
+        ["^START:(?P<foo>.*):STOP:(?P<boo>.*):END"],
         [{"foo": {"one": 1, "two": 2}, "boo": "oops"}],
         ["foo"],
+        None,
     ),
     (
         ['START: {"one":1, "two":2} :STOP:{"oops":0}:END'],
-        "^START:(?P<foo>.*):STOP:(?P<boo>.*):END",
+        ["^START:(?P<foo>.*):STOP:(?P<boo>.*):END"],
         [{"foo": {"one": 1, "two": 2}, "boo": {"oops": 0}}],
         ["foo", "boo"],
+        None,
+    ),
+    (
+        ['START: {"one":1, "two":2} :STOP:{"oops":0}:END'],
+        ["^START:(?P<foo>.*):STOP:.*:END",
+         "^START:.*:STOP:(?P<boo>.*):END"
+        ],
+        [{"foo": {"one": 1, "two": 2}}, {"boo": {"oops": 0}}],
+        ["foo", "boo"],
+        None,
+    ),
+    (
+        ['START: {"one":1, "two":2} :STOP:{"oops":0}:END'],
+        ["^START:(?P<foo>.*):STOP:.*:END",
+         "^START:.*:STOP:(?P<foo>.*):END"
+        ],
+        [{"foo": [{"one": 1, "two": 2}, {"oops": 0}]}],
+        ["foo"],
+        True,
     ),
 ]
 
 
 @pytest.mark.parametrize(
-    "lines, pattern, expected_records, as_json",
+    "lines, patterns, expected_records, as_json, merge",
     TEST_DATA_RECORDING,
     ids=[
         "empty",
         "no match",
         "match 1 field",
         "match 2 fields",
+        "2 or-ed groups one miss",
+        "one line, two patters, match 2 fields -> 2 records",
+        "two lines, two patters -> 2 records",
+        "two lines, two patters same field -> 2 same records",
+        "two lines, two patters same field merge -> 1 records 2 values",
+        "one line, two patters, match 2 fields, merge -> 1 record",
+        "one line, two patters, match 1 field, merge -> 1 record list",
         "match 2 records",
         "as_json empty",
         "as_json no such field",
@@ -131,13 +211,16 @@ TEST_DATA_RECORDING = [
         "simple json",
         "plain field and json field",
         "two json fields",
+        "two json fields in two patterns -> 2 records",
+        "two json fields in two patterns merge -> 1 records 2 items",
     ],
 )
-def test_harness_parse_record(lines, pattern, expected_records, as_json):
+def test_harness_parse_record(lines, patterns, expected_records, as_json, merge):
     harness = Harness()
-    harness.record = {"regex": pattern}
-    harness.record_pattern = re.compile(pattern)
+    harness.record = {"regex": patterns}
+    harness.record_patterns = [re.compile(p) for p in patterns]
 
+    harness.record_merge = merge
     harness.record_as_json = as_json
     if as_json is not None:
         harness.record["as_json"] = as_json