twister: harness: recording: Allow multiple patterns

Extend Twister Harness 'recording' feature to allow multiple regular expression patterns to extract different types of records from test output. Add 'merge' recording mode to collect all extracted data fields into a single record object of the test instance. Export to CSV file now takes all field names occurred in the collected records, sort it alphabetically, and then use it for columns instead of using only the first record's fields. This is done to address possible situation when records have different set of fields. Adjust Twister documentation and test suite to the above changes. Signed-off-by: Dmitrii Golovanov <dmitrii.golovanov@intel.com>
2025-01-10 14:12:44 +01:00 · 2025-01-10 14:12:44 +01:00 · b4d22420e9
commit b4d22420e9
parent 9c5ed3b18f
5 changed files with 153 additions and 33 deletions
--- a/doc/develop/test/twister.rst
+++ b/doc/develop/test/twister.rst
@ -626,26 +626,43 @@ harness_config: <harness configuration options>
        Check the regular expression strings in orderly or randomly fashion

    record: <recording options> (optional)
-      regex: <regular expression> (required)
-        The regular expression with named subgroups to match data fields
-        at the test's output lines where the test provides some custom data
+      regex: <list of regular expressions> (required)
+        Regular expressions with named subgroups to match data fields found
+        in the test instance's output lines where it provides some custom data
        for further analysis. These records will be written into the build
        directory ``recording.csv`` file as well as ``recording`` property
        of the test suite object in ``twister.json``.

+        With several regular expressions given, each of them will be applied
+        to each output line producing either several different records from
+        the same output line, or different records from different lines,
+        or similar records from different lines.
+
+        The .CSV file will have as many columns as there are fields detected
+        in all records; missing values are filled by empty strings.
+
        For example, to extract three data fields ``metric``, ``cycles``,
        ``nanoseconds``:

        .. code-block:: yaml

          record:
-            regex: "(?P<metric>.*):(?P<cycles>.*) cycles, (?P<nanoseconds>.*) ns"
+            regex:
+              - "(?P<metric>.*):(?P<cycles>.*) cycles, (?P<nanoseconds>.*) ns"
+
+      merge: <True|False> (default False)
+        Allows to keep only one record in a test instance with all the data
+        fields extracted by the regular expressions. Fields with the same name
+        will be put into lists ordered as their appearance in recordings.
+        It is possible for such multi value fields to have different number
+        of values depending on the regex rules and the test's output.

      as_json: <list of regex subgroup names> (optional)
-        Data fields, extracted by the regular expression into named subgroups,
+        Data fields, extracted by the regular expressions into named subgroups,
        which will be additionally parsed as JSON encoded strings and written
        into ``twister.json`` as nested ``recording`` object properties.
-        The corresponding ``recording.csv`` columns will contain strings as-is.
+        The corresponding ``recording.csv`` columns will contain JSON strings
+        as-is.

        Using this option, a test log can convey layered data structures
        passed from the test image for further analysis with summary results,
--- a/scripts/pylib/twister/twisterlib/harness.py
+++ b/scripts/pylib/twister/twisterlib/harness.py
@ -53,7 +53,8 @@ class Harness:
        self.capture_coverage = False
        self.next_pattern = 0
        self.record = None
-        self.record_pattern = None
+        self.record_patterns = []
+        self.record_merge = False
        self.record_as_json = None
        self.recording = []
        self.ztest = False
@ -99,7 +100,8 @@ class Harness:
            self.ordered = config.get('ordered', True)
            self.record = config.get('record', {})
            if self.record:
-                self.record_pattern = re.compile(self.record.get("regex", ""))
+                self.record_patterns = [re.compile(p) for p in self.record.get("regex", [])]
+                self.record_merge = self.record.get("merge", False)
                self.record_as_json = self.record.get("as_json")

    def build(self):
@ -125,17 +127,27 @@ class Harness:
                    record[k] = { 'ERROR': { 'msg': str(parse_error), 'doc': record[k] } }
        return record

-    def parse_record(self, line) -> re.Match:
-        match = None
-        if self.record_pattern:
-            match = self.record_pattern.search(line)
+    def parse_record(self, line) -> int:
+        match_cnt = 0
+        for record_pattern in self.record_patterns:
+            match = record_pattern.search(line)
            if match:
+                match_cnt += 1
                rec = self.translate_record(
                    { k:v.strip() for k,v in match.groupdict(default="").items() }
                )
-                self.recording.append(rec)
-        return match
-    #
+                if self.record_merge and len(self.recording) > 0:
+                    for k,v in rec.items():
+                        if k in self.recording[0]:
+                            if isinstance(self.recording[0][k], list):
+                                self.recording[0][k].append(v)
+                            else:
+                                self.recording[0][k] = [self.recording[0][k], v]
+                        else:
+                            self.recording[0][k] = v
+                else:
+                    self.recording.append(rec)
+        return match_cnt

    def process_test(self, line):

--- a/scripts/pylib/twister/twisterlib/testinstance.py
+++ b/scripts/pylib/twister/twisterlib/testinstance.py
@ -105,9 +105,12 @@ class TestInstance:
                self.recording.extend(recording)

            filename = os.path.join(self.build_dir, fname_csv)
+            fieldnames = set()
+            for r in self.recording:
+                fieldnames.update(r)
            with open(filename, 'w') as csvfile:
                cw = csv.DictWriter(csvfile,
-                                    fieldnames = self.recording[0].keys(),
+                                    fieldnames = sorted(list(fieldnames)),
                                    lineterminator = os.linesep,
                                    quoting = csv.QUOTE_NONNUMERIC)
                cw.writeheader()
--- a/scripts/schemas/twister/testsuite-schema.yaml
+++ b/scripts/schemas/twister/testsuite-schema.yaml
@ -151,8 +151,13 @@ schema;scenario-schema:
          required: false
          mapping:
            "regex":
-              type: str
+              type: seq
              required: true
+              sequence:
+                - type: str
+            "merge":
+              type: bool
+              required: false
            "as_json":
              type: seq
              required: false
--- a/scripts/tests/twister/test_harness.py
+++ b/scripts/tests/twister/test_harness.py
@ -61,26 +61,77 @@ def process_logs(harness, logs):


 TEST_DATA_RECORDING = [
-    ([""], "^START:(?P<foo>.*):END", [], None),
-    (["START:bar:STOP"], "^START:(?P<foo>.*):END", [], None),
-    (["START:bar:END"], "^START:(?P<foo>.*):END", [{"foo": "bar"}], None),
+    ([""], ["^START:(?P<foo>.*):END"], [], None, None),
+    (["START:bar:STOP"], ["^START:(?P<foo>.*):END"], [], None, None),
+    (["START:bar:END"], ["^START:(?P<foo>.*):END"], [{"foo": "bar"}], None, None),
    (
        ["START:bar:baz:END"],
-        "^START:(?P<foo>.*):(?P<boo>.*):END",
+        ["^START:(?P<foo>.*):(?P<boo>.*):END"],
        [{"foo": "bar", "boo": "baz"}],
        None,
+        None,
+    ),
+    (
+        ["START:bar:END"],
+        ["^(START:(?P<foo>[a-z]+):END)|(START:(?P<boo>[0-9]+):END)"],
+        [{"foo": "bar", "boo": ""}],
+        None,
+        None,
+    ),
+    (
+        ["START:bar:baz:END"],
+        ["^START:(?P<foo>.*):baz:END", "^START:bar:(?P<boo>.*):END"],
+        [{"foo": "bar"}, {"boo": "baz"}],
+        None,
+        None,
+    ),
+    (
+        ["START:bar:END", "START:123:END"],
+        ["^START:(?P<foo>[a-z]+):END", "^START:(?P<boo>[0-9]+):END"],
+        [{"foo": "bar"}, {"boo": "123"}],
+        None,
+        None,
+    ),
+    (
+        ["START:bar:END", "START:123:END"],
+        ["^START:(?P<foo>[a-z]+):END", "^START:(?P<foo>[0-9]+):END"],
+        [{"foo": "bar"}, {"foo": "123"}],
+        None,
+        None,
+    ),
+    (
+        ["START:bar:END", "START:123:END"],
+        ["^START:(?P<foo>[a-z]+):END", "^START:(?P<foo>[0-9]+):END"],
+        [{"foo": ["bar", "123"]}],
+        None,
+        True,
+    ),
+    (
+        ["START:bar:baz:END"],
+        ["^START:(?P<foo>.*):baz:END", "^START:bar:(?P<boo>.*):END"],
+        [{"foo": "bar", "boo": "baz"}],
+        None,
+        True,
+    ),
+    (
+        ["START:bar:baz:END"],
+        ["^START:(?P<foo>.*):baz:END", "^START:bar:(?P<foo>.*):END"],
+        [{"foo": ["bar", "baz"]}],
+        None,
+        True,
    ),
    (
        ["START:bar:baz:END", "START:may:jun:END"],
-        "^START:(?P<foo>.*):(?P<boo>.*):END",
+        ["^START:(?P<foo>.*):(?P<boo>.*):END"],
        [{"foo": "bar", "boo": "baz"}, {"foo": "may", "boo": "jun"}],
        None,
+        None,
    ),
-    (["START:bar:END"], "^START:(?P<foo>.*):END", [{"foo": "bar"}], []),
-    (["START:bar:END"], "^START:(?P<foo>.*):END", [{"foo": "bar"}], ["boo"]),
+    (["START:bar:END"], ["^START:(?P<foo>.*):END"], [{"foo": "bar"}], [], None),
+    (["START:bar:END"], ["^START:(?P<foo>.*):END"], [{"foo": "bar"}], ["boo"], None),
    (
        ["START:bad_json:END"],
-        "^START:(?P<foo>.*):END",
+        ["^START:(?P<foo>.*):END"],
        [
            {
                "foo": {
@ -92,37 +143,66 @@ TEST_DATA_RECORDING = [
            }
        ],
        ["foo"],
+        None,
    ),
-    (["START::END"], "^START:(?P<foo>.*):END", [{"foo": {}}], ["foo"]),
+    (["START::END"], ["^START:(?P<foo>.*):END"], [{"foo": {}}], ["foo"], None),
    (
        ['START: {"one":1, "two":2} :END'],
-        "^START:(?P<foo>.*):END",
+        ["^START:(?P<foo>.*):END"],
        [{"foo": {"one": 1, "two": 2}}],
        ["foo"],
+        None,
    ),
    (
        ['START: {"one":1, "two":2} :STOP:oops:END'],
-        "^START:(?P<foo>.*):STOP:(?P<boo>.*):END",
+        ["^START:(?P<foo>.*):STOP:(?P<boo>.*):END"],
        [{"foo": {"one": 1, "two": 2}, "boo": "oops"}],
        ["foo"],
+        None,
    ),
    (
        ['START: {"one":1, "two":2} :STOP:{"oops":0}:END'],
-        "^START:(?P<foo>.*):STOP:(?P<boo>.*):END",
+        ["^START:(?P<foo>.*):STOP:(?P<boo>.*):END"],
        [{"foo": {"one": 1, "two": 2}, "boo": {"oops": 0}}],
        ["foo", "boo"],
+        None,
+    ),
+    (
+        ['START: {"one":1, "two":2} :STOP:{"oops":0}:END'],
+        ["^START:(?P<foo>.*):STOP:.*:END",
+         "^START:.*:STOP:(?P<boo>.*):END"
+        ],
+        [{"foo": {"one": 1, "two": 2}}, {"boo": {"oops": 0}}],
+        ["foo", "boo"],
+        None,
+    ),
+    (
+        ['START: {"one":1, "two":2} :STOP:{"oops":0}:END'],
+        ["^START:(?P<foo>.*):STOP:.*:END",
+         "^START:.*:STOP:(?P<foo>.*):END"
+        ],
+        [{"foo": [{"one": 1, "two": 2}, {"oops": 0}]}],
+        ["foo"],
+        True,
    ),
 ]


@pytest.mark.parametrize(
-    "lines, pattern, expected_records, as_json",
+    "lines, patterns, expected_records, as_json, merge",
    TEST_DATA_RECORDING,
    ids=[
        "empty",
        "no match",
        "match 1 field",
        "match 2 fields",
+        "2 or-ed groups one miss",
+        "one line, two patters, match 2 fields -> 2 records",
+        "two lines, two patters -> 2 records",
+        "two lines, two patters same field -> 2 same records",
+        "two lines, two patters same field merge -> 1 records 2 values",
+        "one line, two patters, match 2 fields, merge -> 1 record",
+        "one line, two patters, match 1 field, merge -> 1 record list",
        "match 2 records",
        "as_json empty",
        "as_json no such field",
@ -131,13 +211,16 @@ TEST_DATA_RECORDING = [
        "simple json",
        "plain field and json field",
        "two json fields",
+        "two json fields in two patterns -> 2 records",
+        "two json fields in two patterns merge -> 1 records 2 items",
    ],
 )
-def test_harness_parse_record(lines, pattern, expected_records, as_json):
+def test_harness_parse_record(lines, patterns, expected_records, as_json, merge):
    harness = Harness()
-    harness.record = {"regex": pattern}
-    harness.record_pattern = re.compile(pattern)
+    harness.record = {"regex": patterns}
+    harness.record_patterns = [re.compile(p) for p in patterns]

+    harness.record_merge = merge
    harness.record_as_json = as_json
    if as_json is not None:
        harness.record["as_json"] = as_json