twister: harness: recording: Allow multiple patterns

Extend Twister Harness 'recording' feature to allow multiple
regular expression patterns to extract different types of records
from test output.

Add 'merge' recording mode to collect all extracted data fields
into a single record object of the test instance.

Export to CSV file now takes all field names occurred in the collected
records, sort it alphabetically, and then use it for columns instead of
using only the first record's fields. This is done to address possible
situation when records have different set of fields.

Adjust Twister documentation and test suite to the above changes.

Signed-off-by: Dmitrii Golovanov <dmitrii.golovanov@intel.com>
This commit is contained in:
Dmitrii Golovanov 2025-01-10 14:12:44 +01:00 committed by Benjamin Cabé
parent 9c5ed3b18f
commit b4d22420e9
5 changed files with 153 additions and 33 deletions

View file

@ -626,26 +626,43 @@ harness_config: <harness configuration options>
Check the regular expression strings in orderly or randomly fashion
record: <recording options> (optional)
regex: <regular expression> (required)
The regular expression with named subgroups to match data fields
at the test's output lines where the test provides some custom data
regex: <list of regular expressions> (required)
Regular expressions with named subgroups to match data fields found
in the test instance's output lines where it provides some custom data
for further analysis. These records will be written into the build
directory ``recording.csv`` file as well as ``recording`` property
of the test suite object in ``twister.json``.
With several regular expressions given, each of them will be applied
to each output line producing either several different records from
the same output line, or different records from different lines,
or similar records from different lines.
The .CSV file will have as many columns as there are fields detected
in all records; missing values are filled by empty strings.
For example, to extract three data fields ``metric``, ``cycles``,
``nanoseconds``:
.. code-block:: yaml
record:
regex: "(?P<metric>.*):(?P<cycles>.*) cycles, (?P<nanoseconds>.*) ns"
regex:
- "(?P<metric>.*):(?P<cycles>.*) cycles, (?P<nanoseconds>.*) ns"
merge: <True|False> (default False)
Allows to keep only one record in a test instance with all the data
fields extracted by the regular expressions. Fields with the same name
will be put into lists ordered as their appearance in recordings.
It is possible for such multi value fields to have different number
of values depending on the regex rules and the test's output.
as_json: <list of regex subgroup names> (optional)
Data fields, extracted by the regular expression into named subgroups,
Data fields, extracted by the regular expressions into named subgroups,
which will be additionally parsed as JSON encoded strings and written
into ``twister.json`` as nested ``recording`` object properties.
The corresponding ``recording.csv`` columns will contain strings as-is.
The corresponding ``recording.csv`` columns will contain JSON strings
as-is.
Using this option, a test log can convey layered data structures
passed from the test image for further analysis with summary results,

View file

@ -53,7 +53,8 @@ class Harness:
self.capture_coverage = False
self.next_pattern = 0
self.record = None
self.record_pattern = None
self.record_patterns = []
self.record_merge = False
self.record_as_json = None
self.recording = []
self.ztest = False
@ -99,7 +100,8 @@ class Harness:
self.ordered = config.get('ordered', True)
self.record = config.get('record', {})
if self.record:
self.record_pattern = re.compile(self.record.get("regex", ""))
self.record_patterns = [re.compile(p) for p in self.record.get("regex", [])]
self.record_merge = self.record.get("merge", False)
self.record_as_json = self.record.get("as_json")
def build(self):
@ -125,17 +127,27 @@ class Harness:
record[k] = { 'ERROR': { 'msg': str(parse_error), 'doc': record[k] } }
return record
def parse_record(self, line) -> re.Match:
match = None
if self.record_pattern:
match = self.record_pattern.search(line)
def parse_record(self, line) -> int:
match_cnt = 0
for record_pattern in self.record_patterns:
match = record_pattern.search(line)
if match:
match_cnt += 1
rec = self.translate_record(
{ k:v.strip() for k,v in match.groupdict(default="").items() }
)
self.recording.append(rec)
return match
#
if self.record_merge and len(self.recording) > 0:
for k,v in rec.items():
if k in self.recording[0]:
if isinstance(self.recording[0][k], list):
self.recording[0][k].append(v)
else:
self.recording[0][k] = [self.recording[0][k], v]
else:
self.recording[0][k] = v
else:
self.recording.append(rec)
return match_cnt
def process_test(self, line):

View file

@ -105,9 +105,12 @@ class TestInstance:
self.recording.extend(recording)
filename = os.path.join(self.build_dir, fname_csv)
fieldnames = set()
for r in self.recording:
fieldnames.update(r)
with open(filename, 'w') as csvfile:
cw = csv.DictWriter(csvfile,
fieldnames = self.recording[0].keys(),
fieldnames = sorted(list(fieldnames)),
lineterminator = os.linesep,
quoting = csv.QUOTE_NONNUMERIC)
cw.writeheader()

View file

@ -151,8 +151,13 @@ schema;scenario-schema:
required: false
mapping:
"regex":
type: str
type: seq
required: true
sequence:
- type: str
"merge":
type: bool
required: false
"as_json":
type: seq
required: false

View file

@ -61,26 +61,77 @@ def process_logs(harness, logs):
TEST_DATA_RECORDING = [
([""], "^START:(?P<foo>.*):END", [], None),
(["START:bar:STOP"], "^START:(?P<foo>.*):END", [], None),
(["START:bar:END"], "^START:(?P<foo>.*):END", [{"foo": "bar"}], None),
([""], ["^START:(?P<foo>.*):END"], [], None, None),
(["START:bar:STOP"], ["^START:(?P<foo>.*):END"], [], None, None),
(["START:bar:END"], ["^START:(?P<foo>.*):END"], [{"foo": "bar"}], None, None),
(
["START:bar:baz:END"],
"^START:(?P<foo>.*):(?P<boo>.*):END",
["^START:(?P<foo>.*):(?P<boo>.*):END"],
[{"foo": "bar", "boo": "baz"}],
None,
None,
),
(
["START:bar:END"],
["^(START:(?P<foo>[a-z]+):END)|(START:(?P<boo>[0-9]+):END)"],
[{"foo": "bar", "boo": ""}],
None,
None,
),
(
["START:bar:baz:END"],
["^START:(?P<foo>.*):baz:END", "^START:bar:(?P<boo>.*):END"],
[{"foo": "bar"}, {"boo": "baz"}],
None,
None,
),
(
["START:bar:END", "START:123:END"],
["^START:(?P<foo>[a-z]+):END", "^START:(?P<boo>[0-9]+):END"],
[{"foo": "bar"}, {"boo": "123"}],
None,
None,
),
(
["START:bar:END", "START:123:END"],
["^START:(?P<foo>[a-z]+):END", "^START:(?P<foo>[0-9]+):END"],
[{"foo": "bar"}, {"foo": "123"}],
None,
None,
),
(
["START:bar:END", "START:123:END"],
["^START:(?P<foo>[a-z]+):END", "^START:(?P<foo>[0-9]+):END"],
[{"foo": ["bar", "123"]}],
None,
True,
),
(
["START:bar:baz:END"],
["^START:(?P<foo>.*):baz:END", "^START:bar:(?P<boo>.*):END"],
[{"foo": "bar", "boo": "baz"}],
None,
True,
),
(
["START:bar:baz:END"],
["^START:(?P<foo>.*):baz:END", "^START:bar:(?P<foo>.*):END"],
[{"foo": ["bar", "baz"]}],
None,
True,
),
(
["START:bar:baz:END", "START:may:jun:END"],
"^START:(?P<foo>.*):(?P<boo>.*):END",
["^START:(?P<foo>.*):(?P<boo>.*):END"],
[{"foo": "bar", "boo": "baz"}, {"foo": "may", "boo": "jun"}],
None,
None,
),
(["START:bar:END"], "^START:(?P<foo>.*):END", [{"foo": "bar"}], []),
(["START:bar:END"], "^START:(?P<foo>.*):END", [{"foo": "bar"}], ["boo"]),
(["START:bar:END"], ["^START:(?P<foo>.*):END"], [{"foo": "bar"}], [], None),
(["START:bar:END"], ["^START:(?P<foo>.*):END"], [{"foo": "bar"}], ["boo"], None),
(
["START:bad_json:END"],
"^START:(?P<foo>.*):END",
["^START:(?P<foo>.*):END"],
[
{
"foo": {
@ -92,37 +143,66 @@ TEST_DATA_RECORDING = [
}
],
["foo"],
None,
),
(["START::END"], "^START:(?P<foo>.*):END", [{"foo": {}}], ["foo"]),
(["START::END"], ["^START:(?P<foo>.*):END"], [{"foo": {}}], ["foo"], None),
(
['START: {"one":1, "two":2} :END'],
"^START:(?P<foo>.*):END",
["^START:(?P<foo>.*):END"],
[{"foo": {"one": 1, "two": 2}}],
["foo"],
None,
),
(
['START: {"one":1, "two":2} :STOP:oops:END'],
"^START:(?P<foo>.*):STOP:(?P<boo>.*):END",
["^START:(?P<foo>.*):STOP:(?P<boo>.*):END"],
[{"foo": {"one": 1, "two": 2}, "boo": "oops"}],
["foo"],
None,
),
(
['START: {"one":1, "two":2} :STOP:{"oops":0}:END'],
"^START:(?P<foo>.*):STOP:(?P<boo>.*):END",
["^START:(?P<foo>.*):STOP:(?P<boo>.*):END"],
[{"foo": {"one": 1, "two": 2}, "boo": {"oops": 0}}],
["foo", "boo"],
None,
),
(
['START: {"one":1, "two":2} :STOP:{"oops":0}:END'],
["^START:(?P<foo>.*):STOP:.*:END",
"^START:.*:STOP:(?P<boo>.*):END"
],
[{"foo": {"one": 1, "two": 2}}, {"boo": {"oops": 0}}],
["foo", "boo"],
None,
),
(
['START: {"one":1, "two":2} :STOP:{"oops":0}:END'],
["^START:(?P<foo>.*):STOP:.*:END",
"^START:.*:STOP:(?P<foo>.*):END"
],
[{"foo": [{"one": 1, "two": 2}, {"oops": 0}]}],
["foo"],
True,
),
]
@pytest.mark.parametrize(
"lines, pattern, expected_records, as_json",
"lines, patterns, expected_records, as_json, merge",
TEST_DATA_RECORDING,
ids=[
"empty",
"no match",
"match 1 field",
"match 2 fields",
"2 or-ed groups one miss",
"one line, two patters, match 2 fields -> 2 records",
"two lines, two patters -> 2 records",
"two lines, two patters same field -> 2 same records",
"two lines, two patters same field merge -> 1 records 2 values",
"one line, two patters, match 2 fields, merge -> 1 record",
"one line, two patters, match 1 field, merge -> 1 record list",
"match 2 records",
"as_json empty",
"as_json no such field",
@ -131,13 +211,16 @@ TEST_DATA_RECORDING = [
"simple json",
"plain field and json field",
"two json fields",
"two json fields in two patterns -> 2 records",
"two json fields in two patterns merge -> 1 records 2 items",
],
)
def test_harness_parse_record(lines, pattern, expected_records, as_json):
def test_harness_parse_record(lines, patterns, expected_records, as_json, merge):
harness = Harness()
harness.record = {"regex": pattern}
harness.record_pattern = re.compile(pattern)
harness.record = {"regex": patterns}
harness.record_patterns = [re.compile(p) for p in patterns]
harness.record_merge = merge
harness.record_as_json = as_json
if as_json is not None:
harness.record["as_json"] = as_json