twister: recording: Allow JSON data fields

Extend Twister Harness recording feature to allow selected data fields, extracted from the log by a regular expression, to be parsed into JSON objects and eventually reported in `twister.json` as `recording` list property of the test suite. With this extension, log records can convey layered data structures passed from a test image as summary results, traces, statistics, etc. This extension also allows flexible recording structure: a test image can output different types of data records incapsulated into a fixed set of fields, so `recording.csv` file columns are respected, whereas some of the columns keep strings with json-encoded semi-structured data. Signed-off-by: Dmitrii Golovanov <dmitrii.golovanov@intel.com>
2024-05-29 16:57:35 +02:00 · 2024-05-29 16:57:35 +02:00 · 35e313f9e5
commit 35e313f9e5
parent e9687c7e5c
4 changed files with 91 additions and 12 deletions
--- a/doc/develop/test/twister.rst
+++ b/doc/develop/test/twister.rst
@ -504,16 +504,55 @@ harness_config: <harness configuration options>
        The regular expression with named subgroups to match data fields
        at the test's output lines where the test provides some custom data
        for further analysis. These records will be written into the build
-        directory 'recording.csv' file as well as 'recording' property
-        of the test suite object in 'twister.json'.
+        directory ``recording.csv`` file as well as ``recording`` property
+        of the test suite object in ``twister.json``.

-        For example, to extract three data fields 'metric', 'cycles', 'nanoseconds':
+        For example, to extract three data fields ``metric``, ``cycles``,
+        ``nanoseconds``:

        .. code-block:: yaml

          record:
            regex: "(?P<metric>.*):(?P<cycles>.*) cycles, (?P<nanoseconds>.*) ns"

+      as_json: <list of regex subgroup names> (optional)
+        Data fields, extracted by the regular expression into named subgroups,
+        which will be additionally parsed as JSON encoded strings and written
+        into ``twister.json`` as nested ``recording`` object properties.
+        The corresponding ``recording.csv`` columns will contain strings as-is.
+
+        Using this option, a test log can convey layered data structures
+        passed from the test image for further analysis with summary results,
+        traces, statistics, etc.
+
+        For example, this configuration:
+
+        .. code-block:: yaml
+
+          record:
+            regex: "RECORD:(?P<type>.*):DATA:(?P<metrics>.*)"
+            as_json: [metrics]
+
+        when matched to a test log string:
+
+        .. code-block:: none
+
+          RECORD:jitter_drift:DATA:{"rollovers":0, "mean_us":1000.0}
+
+        will be reported in ``twister.json`` as:
+
+        .. code-block:: json
+
+          "recording":[
+              {
+                   "type":"jitter_drift",
+                   "metrics":{
+                       "rollovers":0,
+                       "mean_us":1000.0
+                   }
+              }
+          ]
+
    fixture: <expression>
        Specify a test case dependency on an external device(e.g., sensor),
        and identify setups that fulfill this dependency. It depends on
--- a/scripts/pylib/twister/twisterlib/harness.py
+++ b/scripts/pylib/twister/twisterlib/harness.py
@ -13,6 +13,7 @@ import logging
 import threading
 import time
 import shutil
+import json

 from twisterlib.error import ConfigurationError
 from twisterlib.environment import ZEPHYR_BASE, PYTEST_PLUGIN_INSTALLED
@ -57,6 +58,7 @@ class Harness:
        self.next_pattern = 0
        self.record = None
        self.record_pattern = None
+        self.record_as_json = None
        self.recording = []
        self.ztest = False
        self.detected_suite_names = []
@ -82,6 +84,7 @@ class Harness:
            self.record = config.get('record', {})
            if self.record:
                self.record_pattern = re.compile(self.record.get("regex", ""))
+                self.record_as_json = self.record.get("as_json")

    def build(self):
        pass
@ -92,12 +95,27 @@ class Harness:
        """
        return self.id

+    def translate_record(self, record: dict) -> dict:
+        if self.record_as_json:
+            for k in self.record_as_json:
+                if not k in record:
+                    continue
+                try:
+                    record[k] = json.loads(record[k]) if record[k] else {}
+                except json.JSONDecodeError as parse_error:
+                    logger.warning(f"HARNESS:{self.__class__.__name__}: recording JSON failed:"
+                                   f" {parse_error} for '{k}':'{record[k]}'")
+                    # Don't set the Harness state to failed for recordings.
+                    record[k] = { 'ERROR': { 'msg': str(parse_error), 'doc': record[k] } }
+        return record
+
    def parse_record(self, line) -> re.Match:
        match = None
        if self.record_pattern:
            match = self.record_pattern.search(line)
            if match:
-                self.recording.append({ k:v.strip() for k,v in match.groupdict(default="").items() })
+                rec = self.translate_record({ k:v.strip() for k,v in match.groupdict(default="").items() })
+                self.recording.append(rec)
        return match
    #

--- a/scripts/schemas/twister/testsuite-schema.yaml
+++ b/scripts/schemas/twister/testsuite-schema.yaml
@ -130,6 +130,11 @@ schema;scenario-schema:
            "regex":
              type: str
              required: true
+            "as_json":
+              type: seq
+              required: false
+              sequence:
+                - type: str
        "bsim_exe_name":
          type: str
          required: false
--- a/scripts/tests/twister/test_harness.py
+++ b/scripts/tests/twister/test_harness.py
@ -45,23 +45,40 @@ def process_logs(harness, logs):


 TEST_DATA_RECORDING = [
-                ([''], "^START:(?P<foo>.*):END", []),
-                (['START:bar:STOP'], "^START:(?P<foo>.*):END", []),
-                (['START:bar:END'], "^START:(?P<foo>.*):END", [{'foo':'bar'}]),
-                (['START:bar:baz:END'], "^START:(?P<foo>.*):(?P<boo>.*):END", [{'foo':'bar', 'boo':'baz'}]),
+                ([''], "^START:(?P<foo>.*):END", [], None),
+                (['START:bar:STOP'], "^START:(?P<foo>.*):END", [], None),
+                (['START:bar:END'], "^START:(?P<foo>.*):END", [{'foo':'bar'}], None),
+                (['START:bar:baz:END'], "^START:(?P<foo>.*):(?P<boo>.*):END", [{'foo':'bar', 'boo':'baz'}], None),
                (['START:bar:baz:END','START:may:jun:END'], "^START:(?P<foo>.*):(?P<boo>.*):END",
-                 [{'foo':'bar', 'boo':'baz'}, {'foo':'may', 'boo':'jun'}]),
+                 [{'foo':'bar', 'boo':'baz'}, {'foo':'may', 'boo':'jun'}], None),
+                (['START:bar:END'], "^START:(?P<foo>.*):END", [{'foo':'bar'}], []),
+                (['START:bar:END'], "^START:(?P<foo>.*):END", [{'foo':'bar'}], ['boo']),
+                (['START:bad_json:END'], "^START:(?P<foo>.*):END",
+                 [{'foo':{'ERROR':{'msg':'Expecting value: line 1 column 1 (char 0)', 'doc':'bad_json'}}}], ['foo']),
+                (['START::END'], "^START:(?P<foo>.*):END", [{'foo':{}}], ['foo']),
+                (['START: {"one":1, "two":2} :END'], "^START:(?P<foo>.*):END", [{'foo':{'one':1, 'two':2}}], ['foo']),
+                (['START: {"one":1, "two":2} :STOP:oops:END'], "^START:(?P<foo>.*):STOP:(?P<boo>.*):END",
+                   [{'foo':{'one':1, 'two':2},'boo':'oops'}], ['foo']),
+                (['START: {"one":1, "two":2} :STOP:{"oops":0}:END'], "^START:(?P<foo>.*):STOP:(?P<boo>.*):END",
+                   [{'foo':{'one':1, 'two':2},'boo':{'oops':0}}], ['foo','boo']),
                      ]
@pytest.mark.parametrize(
-    "lines, pattern, expected_records",
+    "lines, pattern, expected_records, as_json",
    TEST_DATA_RECORDING,
-    ids=["empty", "no match", "match 1 field", "match 2 fields", "match 2 records"]
+    ids=["empty", "no match", "match 1 field", "match 2 fields", "match 2 records",
+         "as_json empty", "as_json no such field", "error parsing json", "empty json value", "simple json",
+         "plain field and json field", "two json fields"
+        ]
 )
-def test_harness_parse_record(lines, pattern, expected_records):
+def test_harness_parse_record(lines, pattern, expected_records, as_json):
    harness = Harness()
    harness.record = { 'regex': pattern }
    harness.record_pattern = re.compile(pattern)

+    harness.record_as_json = as_json
+    if as_json is not None:
+        harness.record['as_json'] = as_json
+
    assert not harness.recording

    for line in lines: