From d2fd50f573cf0c07eceb07e831f0dbbecf817c16 Mon Sep 17 00:00:00 2001 From: Alex Bara Date: Fri, 15 Aug 2025 11:03:54 -0500 Subject: [PATCH 01/12] added collector, needs utest --- .../inband/journal/journal_collector.py | 113 ++++++++++++++++++ .../plugins/inband/journal/journal_plugin.py | 37 ++++++ .../plugins/inband/journal/journaldata.py | 32 +++++ 3 files changed, 182 insertions(+) create mode 100644 nodescraper/plugins/inband/journal/journal_collector.py create mode 100644 nodescraper/plugins/inband/journal/journal_plugin.py create mode 100644 nodescraper/plugins/inband/journal/journaldata.py diff --git a/nodescraper/plugins/inband/journal/journal_collector.py b/nodescraper/plugins/inband/journal/journal_collector.py new file mode 100644 index 0000000..1ed5b27 --- /dev/null +++ b/nodescraper/plugins/inband/journal/journal_collector.py @@ -0,0 +1,113 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +from nodescraper.base import InBandDataCollector +from nodescraper.connection.inband import TextFileArtifact +from nodescraper.enums import EventCategory, EventPriority, OSFamily +from nodescraper.models import TaskResult + +from .journaldata import JournalData + + +class JournalCollector(InBandDataCollector[JournalData, None]): + """Read journal log via journalctl.""" + + SUPPORTED_OS_FAMILY = {OSFamily.LINUX} + DATA_MODEL = JournalData + + CMD = "ls -1 /var/log/journal/*/system* 2>/dev/null || true" + + def _shell_quote(self, s: str) -> str: + return "'" + s.replace("'", "'\"'\"'") + "'" + + def _flat_name(self, path: str) -> str: + return "journalctl__" + path.lstrip("/").replace("/", "__") + ".jsonl" + + def _read_with_journalctl(self, path: str): + qp = self._shell_quote(path) + cmd = f"journalctl --no-pager --system --all --file={qp} --output=json" + res = self._run_sut_cmd(cmd, sudo=True, log_artifact=False, strip=False) + + if res.exit_code == 0 and res.stdout: + text = ( + res.stdout.decode("utf-8", "replace") + if isinstance(res.stdout, (bytes, bytearray)) + else res.stdout + ) + fname = self._flat_name(path) + self.result.artifacts.append(TextFileArtifact(filename=fname, contents=text)) + self.logger.info("Collected journal: %s", path) + return fname + + return None + + def _get_journals(self): + list_res = self._run_sut_cmd(self.CMD, sudo=True) + paths = [p.strip() for p in (list_res.stdout or "").splitlines() if p.strip()] + + if not paths: + self._log_event( + category=EventCategory.OS, + description="No /var/log/journal files found (including rotations).", + data={"list_exit_code": list_res.exit_code}, + priority=EventPriority.WARNING, + ) + return [] + + collected, failed = [], [] + for p in paths: + self.logger.debug("Reading journal file: %s", p) + fname = self._read_with_journalctl(p) + if fname: + collected.append(fname) + else: + failed.append(fname) + + if collected: + self._log_event( + category=EventCategory.OS, + description="Collected journal logs.", + data={"collected": collected}, + priority=EventPriority.INFO, + ) + self.result.message = self.result.message or "journalctl logs collected" + + if failed: + self._log_event( + category=EventCategory.OS, + description="Some journal files could not be read with journalctl.", + data={"failed": failed}, + priority=EventPriority.WARNING, + ) + + return collected + + def collect_data(self, args=None) -> tuple[TaskResult, JournalData | None]: + collected = self._get_journals() + if collected: + jd = JournalData(journal_logs=collected) + self.result.message = self.result.message or "Journal data collected" + return self.result, jd + return self.result, None diff --git a/nodescraper/plugins/inband/journal/journal_plugin.py b/nodescraper/plugins/inband/journal/journal_plugin.py new file mode 100644 index 0000000..72ccca5 --- /dev/null +++ b/nodescraper/plugins/inband/journal/journal_plugin.py @@ -0,0 +1,37 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +from nodescraper.base import InBandDataPlugin + +from .journal_collector import JournalCollector +from .journaldata import JournalData + + +class JournalPlugin(InBandDataPlugin[JournalData, None, None]): + """Plugin for collection of journal data""" + + DATA_MODEL = JournalData + + COLLECTOR = JournalCollector diff --git a/nodescraper/plugins/inband/journal/journaldata.py b/nodescraper/plugins/inband/journal/journaldata.py new file mode 100644 index 0000000..4792cc0 --- /dev/null +++ b/nodescraper/plugins/inband/journal/journaldata.py @@ -0,0 +1,32 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +from nodescraper.models import DataModel + + +class JournalData(DataModel): + """Data model for journal logs""" + + journal_logs: list[str] = None From 5da215b65d210107665f45a5a2bd3ee2a7327de8 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Tue, 19 Aug 2025 13:19:38 -0500 Subject: [PATCH 02/12] typo fix --- nodescraper/plugins/inband/journal/journal_collector.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nodescraper/plugins/inband/journal/journal_collector.py b/nodescraper/plugins/inband/journal/journal_collector.py index 1ed5b27..b3d8048 100644 --- a/nodescraper/plugins/inband/journal/journal_collector.py +++ b/nodescraper/plugins/inband/journal/journal_collector.py @@ -43,14 +43,14 @@ def _shell_quote(self, s: str) -> str: return "'" + s.replace("'", "'\"'\"'") + "'" def _flat_name(self, path: str) -> str: - return "journalctl__" + path.lstrip("/").replace("/", "__") + ".jsonl" + return "journalctl__" + path.lstrip("/").replace("/", "__") + ".json" def _read_with_journalctl(self, path: str): qp = self._shell_quote(path) cmd = f"journalctl --no-pager --system --all --file={qp} --output=json" res = self._run_sut_cmd(cmd, sudo=True, log_artifact=False, strip=False) - if res.exit_code == 0 and res.stdout: + if res.exit_code == 0: text = ( res.stdout.decode("utf-8", "replace") if isinstance(res.stdout, (bytes, bytearray)) @@ -107,7 +107,7 @@ def _get_journals(self): def collect_data(self, args=None) -> tuple[TaskResult, JournalData | None]: collected = self._get_journals() if collected: - jd = JournalData(journal_logs=collected) + data = JournalData(journal_logs=collected) self.result.message = self.result.message or "Journal data collected" - return self.result, jd + return self.result, data return self.result, None From ed246f105ec7ee5794a0ae09893813e6b777d9ea Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Tue, 19 Aug 2025 13:19:56 -0500 Subject: [PATCH 03/12] typo fix --- test/unit/plugin/test_journal_collector.py | 192 +++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 test/unit/plugin/test_journal_collector.py diff --git a/test/unit/plugin/test_journal_collector.py b/test/unit/plugin/test_journal_collector.py new file mode 100644 index 0000000..680a320 --- /dev/null +++ b/test/unit/plugin/test_journal_collector.py @@ -0,0 +1,192 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +import types + +from nodescraper.enums.systeminteraction import SystemInteractionLevel +from nodescraper.plugins.inband.journal.journal_collector import JournalCollector +from nodescraper.plugins.inband.journal.journaldata import JournalData + + +class DummyRes: + def __init__(self, command="", stdout="", exit_code=0, stderr=""): + self.command = command + self.stdout = stdout + self.exit_code = exit_code + self.stderr = stderr + + +def get_collector(monkeypatch, run_map, system_info, conn_mock): + c = JournalCollector( + system_info=system_info, + system_interaction_level=SystemInteractionLevel.INTERACTIVE, + connection=conn_mock, + ) + c.result = types.SimpleNamespace(artifacts=[], message=None) + c._events = [] + + def _log_event(**kw): + c._events.append(kw) + + def _run_sut_cmd(cmd, *args, **kwargs): + return run_map(cmd, *args, **kwargs) + + monkeypatch.setattr(c, "_log_event", _log_event, raising=True) + monkeypatch.setattr(c, "_run_sut_cmd", _run_sut_cmd, raising=True) + + return c + + +def test_get_journals_happy_path(monkeypatch, system_info, conn_mock): + paths = [ + "/var/log/journal/m1/system.journal", + "/var/log/journal/m1/system@0000000000000001-0000000000000002.journal", + "/var/log/journal/m2/system.journal", + ] + ls_out = "\n".join(paths) + "\n" + + def run_map(cmd, **kwargs): + if cmd.startswith("ls -1 /var/log/journal"): + return DummyRes(command=cmd, stdout=ls_out, exit_code=0) + + if cmd.startswith("journalctl ") and "--file=" in cmd: + if paths[0] in cmd: + return DummyRes(cmd, stdout='{"MESSAGE":"a"}\n', exit_code=0) + if paths[1] in cmd: + return DummyRes(cmd, stdout=b'{"MESSAGE":"b"}\n', exit_code=0) + if paths[2] in cmd: + return DummyRes(cmd, stdout='{"MESSAGE":"c"}\n', exit_code=0) + + return DummyRes(command=cmd, stdout="", exit_code=1, stderr="unexpected") + + c = get_collector(monkeypatch, run_map, system_info, conn_mock) + + collected = c._get_journals() + assert len(collected) == 3 + + expected_names = { + "journalctl__var__log__journal__m1__system.journal.json", + "journalctl__var__log__journal__m1__system@0000000000000001-0000000000000002.journal.json", + "journalctl__var__log__journal__m2__system.journal.json", + } + names = {a.filename for a in c.result.artifacts} + assert names == expected_names + + contents = {a.filename: a.contents for a in c.result.artifacts} + assert ( + contents["journalctl__var__log__journal__m1__system.journal.json"].strip() + == '{"MESSAGE":"a"}' + ) + assert ( + contents[ + "journalctl__var__log__journal__m1__system@0000000000000001-0000000000000002.journal.json" + ].strip() + == '{"MESSAGE":"b"}' + ) + assert ( + contents["journalctl__var__log__journal__m2__system.journal.json"].strip() + == '{"MESSAGE":"c"}' + ) + + assert any( + evt.get("description") == "Collected journal logs." + and getattr(evt.get("priority"), "name", str(evt.get("priority"))) == "INFO" + for evt in c._events + ) + assert c.result.message == "journalctl logs collected" + + +def test_get_journals_no_files(monkeypatch, system_info, conn_mock): + def run_map(cmd, **kwargs): + if cmd.startswith("ls -1 /var/log/journal"): + return DummyRes(command=cmd, stdout="", exit_code=0) + return DummyRes(command=cmd, stdout="", exit_code=1) + + c = get_collector(monkeypatch, run_map, system_info, conn_mock) + + collected = c._get_journals() + assert collected == [] + assert c.result.artifacts == [] + + assert any( + evt.get("description", "").startswith("No /var/log/journal files found") + and getattr(evt.get("priority"), "name", str(evt.get("priority"))) == "WARNING" + for evt in c._events + ) + + +def test_get_journals_partial_failure(monkeypatch, system_info, conn_mock): + ok_path = "/var/log/journal/m1/system.journal" + bad_path = "/var/log/journal/m1/system@bad.journal" + ls_out = ok_path + "\n" + bad_path + "\n" + + def run_map(cmd, **kwargs): + if cmd.startswith("ls -1 /var/log/journal"): + return DummyRes(command=cmd, stdout=ls_out, exit_code=0) + + if cmd.startswith("journalctl ") and "--file=" in cmd: + if ok_path in cmd: + return DummyRes(cmd, stdout='{"MESSAGE":"ok"}\n', exit_code=0) + if bad_path in cmd: + return DummyRes(cmd, stdout="", exit_code=1, stderr="cannot read") + + return DummyRes(command=cmd, stdout="", exit_code=1) + + c = get_collector(monkeypatch, run_map, system_info, conn_mock) + + collected = c._get_journals() + assert collected == ["journalctl__var__log__journal__m1__system.journal.json"] + assert [a.filename for a in c.result.artifacts] == [ + "journalctl__var__log__journal__m1__system.journal.json" + ] + + assert any( + evt.get("description") == "Some journal files could not be read with journalctl." + and getattr(evt.get("priority"), "name", str(evt.get("priority"))) == "WARNING" + for evt in c._events + ) + + +def test_collect_data_integration(monkeypatch, system_info, conn_mock): + dummy_path = "/var/log/journal/m1/system.journal" + ls_out = dummy_path + "\n" + + def run_map(cmd, **kwargs): + if cmd.startswith("ls -1 /var/log/journal"): + return DummyRes(command=cmd, stdout=ls_out, exit_code=0) + if cmd.startswith("journalctl ") and "--file=" in cmd and dummy_path in cmd: + return DummyRes(command=cmd, stdout='{"MESSAGE":"hello"}\n', exit_code=0) + return DummyRes(command=cmd, stdout="", exit_code=1) + + c = get_collector(monkeypatch, run_map, system_info, conn_mock) + + result, data = c.collect_data() + assert isinstance(data, JournalData) + + expected_name = "journalctl__var__log__journal__m1__system.journal.json" + assert data.journal_logs == [expected_name] + assert c.result.message == "journalctl logs collected" + + assert [a.filename for a in c.result.artifacts] == [expected_name] From 96c1e800f0e4db4e55ce6e9d58624a52efe56903 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Tue, 19 Aug 2025 13:35:54 -0500 Subject: [PATCH 04/12] added init file --- .../plugins/inband/journal/__init__.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 nodescraper/plugins/inband/journal/__init__.py diff --git a/nodescraper/plugins/inband/journal/__init__.py b/nodescraper/plugins/inband/journal/__init__.py new file mode 100644 index 0000000..946a821 --- /dev/null +++ b/nodescraper/plugins/inband/journal/__init__.py @@ -0,0 +1,28 @@ +############################################################################### +# +# MIT License +# +# Copyright (c) 2025 Advanced Micro Devices, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# +############################################################################### +from .journal_plugin import JournalPlugin + +__all__ = ["JournalPlugin"] From 2920d4c2e71c3fdecefca8d1c1bdaed8b0a91663 Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Tue, 19 Aug 2025 14:43:51 -0500 Subject: [PATCH 05/12] added docstring --- .../inband/journal/journal_collector.py | 39 ++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/nodescraper/plugins/inband/journal/journal_collector.py b/nodescraper/plugins/inband/journal/journal_collector.py index b3d8048..1ed8d79 100644 --- a/nodescraper/plugins/inband/journal/journal_collector.py +++ b/nodescraper/plugins/inband/journal/journal_collector.py @@ -40,12 +40,36 @@ class JournalCollector(InBandDataCollector[JournalData, None]): CMD = "ls -1 /var/log/journal/*/system* 2>/dev/null || true" def _shell_quote(self, s: str) -> str: + """single-quote fix. + + Args: + s (str): path + + Returns: + str: escaped path + """ return "'" + s.replace("'", "'\"'\"'") + "'" def _flat_name(self, path: str) -> str: + """Flatten path name + + Args: + path (str): path + + Returns: + str: flattened path name + """ return "journalctl__" + path.lstrip("/").replace("/", "__") + ".json" def _read_with_journalctl(self, path: str): + """Read journal logs using journalctl + + Args: + path (str): path for log to read + + Returns: + str|None: name of local journal log filed, or None if log was not read + """ qp = self._shell_quote(path) cmd = f"journalctl --no-pager --system --all --file={qp} --output=json" res = self._run_sut_cmd(cmd, sudo=True, log_artifact=False, strip=False) @@ -63,7 +87,12 @@ def _read_with_journalctl(self, path: str): return None - def _get_journals(self): + def _get_journals(self) -> list[str]: + """Read journal log files on remote system + + Returns: + list[str]: List of names of read logs + """ list_res = self._run_sut_cmd(self.CMD, sudo=True) paths = [p.strip() for p in (list_res.stdout or "").splitlines() if p.strip()] @@ -105,6 +134,14 @@ def _get_journals(self): return collected def collect_data(self, args=None) -> tuple[TaskResult, JournalData | None]: + """Collect journal lofs + + Args: + args (_type_, optional): Collection args. Defaults to None. + + Returns: + tuple[TaskResult, JournalData | None]: Tuple of results and data model or none. + """ collected = self._get_journals() if collected: data = JournalData(journal_logs=collected) From ee5fba3dc07390aaf358cbbe0fc37d0c211a417f Mon Sep 17 00:00:00 2001 From: Alexandra Bara Date: Thu, 28 Aug 2025 11:20:01 -0500 Subject: [PATCH 06/12] fixed mypy --- nodescraper/plugins/inband/journal/journaldata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nodescraper/plugins/inband/journal/journaldata.py b/nodescraper/plugins/inband/journal/journaldata.py index 4792cc0..932084b 100644 --- a/nodescraper/plugins/inband/journal/journaldata.py +++ b/nodescraper/plugins/inband/journal/journaldata.py @@ -29,4 +29,4 @@ class JournalData(DataModel): """Data model for journal logs""" - journal_logs: list[str] = None + journal_logs: list[str] = [] From bca73c090a191f89303365384a0fd9352f7a5ce6 Mon Sep 17 00:00:00 2001 From: Alex Bara Date: Mon, 8 Sep 2025 15:36:51 -0500 Subject: [PATCH 07/12] reading journal logs with journalctl only --- .../inband/journal/journal_collector.py | 99 ++------------ .../plugins/inband/journal/journaldata.py | 2 +- test/unit/plugin/test_journal_collector.py | 125 +----------------- 3 files changed, 11 insertions(+), 215 deletions(-) diff --git a/nodescraper/plugins/inband/journal/journal_collector.py b/nodescraper/plugins/inband/journal/journal_collector.py index 1ed8d79..5c6aedb 100644 --- a/nodescraper/plugins/inband/journal/journal_collector.py +++ b/nodescraper/plugins/inband/journal/journal_collector.py @@ -24,8 +24,7 @@ # ############################################################################### from nodescraper.base import InBandDataCollector -from nodescraper.connection.inband import TextFileArtifact -from nodescraper.enums import EventCategory, EventPriority, OSFamily +from nodescraper.enums import OSFamily from nodescraper.models import TaskResult from .journaldata import JournalData @@ -37,102 +36,20 @@ class JournalCollector(InBandDataCollector[JournalData, None]): SUPPORTED_OS_FAMILY = {OSFamily.LINUX} DATA_MODEL = JournalData - CMD = "ls -1 /var/log/journal/*/system* 2>/dev/null || true" - - def _shell_quote(self, s: str) -> str: - """single-quote fix. - - Args: - s (str): path - - Returns: - str: escaped path - """ - return "'" + s.replace("'", "'\"'\"'") + "'" - - def _flat_name(self, path: str) -> str: - """Flatten path name - - Args: - path (str): path - - Returns: - str: flattened path name - """ - return "journalctl__" + path.lstrip("/").replace("/", "__") + ".json" - - def _read_with_journalctl(self, path: str): + def _read_with_journalctl(self): """Read journal logs using journalctl - Args: - path (str): path for log to read - Returns: - str|None: name of local journal log filed, or None if log was not read + str|None: system journal read """ - qp = self._shell_quote(path) - cmd = f"journalctl --no-pager --system --all --file={qp} --output=json" + cmd = "journalctl --no-pager --system --all -o short-iso --output=json" res = self._run_sut_cmd(cmd, sudo=True, log_artifact=False, strip=False) if res.exit_code == 0: - text = ( - res.stdout.decode("utf-8", "replace") - if isinstance(res.stdout, (bytes, bytearray)) - else res.stdout - ) - fname = self._flat_name(path) - self.result.artifacts.append(TextFileArtifact(filename=fname, contents=text)) - self.logger.info("Collected journal: %s", path) - return fname + return res.stdout return None - def _get_journals(self) -> list[str]: - """Read journal log files on remote system - - Returns: - list[str]: List of names of read logs - """ - list_res = self._run_sut_cmd(self.CMD, sudo=True) - paths = [p.strip() for p in (list_res.stdout or "").splitlines() if p.strip()] - - if not paths: - self._log_event( - category=EventCategory.OS, - description="No /var/log/journal files found (including rotations).", - data={"list_exit_code": list_res.exit_code}, - priority=EventPriority.WARNING, - ) - return [] - - collected, failed = [], [] - for p in paths: - self.logger.debug("Reading journal file: %s", p) - fname = self._read_with_journalctl(p) - if fname: - collected.append(fname) - else: - failed.append(fname) - - if collected: - self._log_event( - category=EventCategory.OS, - description="Collected journal logs.", - data={"collected": collected}, - priority=EventPriority.INFO, - ) - self.result.message = self.result.message or "journalctl logs collected" - - if failed: - self._log_event( - category=EventCategory.OS, - description="Some journal files could not be read with journalctl.", - data={"failed": failed}, - priority=EventPriority.WARNING, - ) - - return collected - def collect_data(self, args=None) -> tuple[TaskResult, JournalData | None]: """Collect journal lofs @@ -142,9 +59,9 @@ def collect_data(self, args=None) -> tuple[TaskResult, JournalData | None]: Returns: tuple[TaskResult, JournalData | None]: Tuple of results and data model or none. """ - collected = self._get_journals() - if collected: - data = JournalData(journal_logs=collected) + journal_log = self._read_with_journalctl() + if journal_log: + data = JournalData(journal_log=journal_log) self.result.message = self.result.message or "Journal data collected" return self.result, data return self.result, None diff --git a/nodescraper/plugins/inband/journal/journaldata.py b/nodescraper/plugins/inband/journal/journaldata.py index 932084b..cb9691b 100644 --- a/nodescraper/plugins/inband/journal/journaldata.py +++ b/nodescraper/plugins/inband/journal/journaldata.py @@ -29,4 +29,4 @@ class JournalData(DataModel): """Data model for journal logs""" - journal_logs: list[str] = [] + journal_log: str diff --git a/test/unit/plugin/test_journal_collector.py b/test/unit/plugin/test_journal_collector.py index 680a320..6e6ede0 100644 --- a/test/unit/plugin/test_journal_collector.py +++ b/test/unit/plugin/test_journal_collector.py @@ -59,134 +59,13 @@ def _run_sut_cmd(cmd, *args, **kwargs): return c -def test_get_journals_happy_path(monkeypatch, system_info, conn_mock): - paths = [ - "/var/log/journal/m1/system.journal", - "/var/log/journal/m1/system@0000000000000001-0000000000000002.journal", - "/var/log/journal/m2/system.journal", - ] - ls_out = "\n".join(paths) + "\n" - - def run_map(cmd, **kwargs): - if cmd.startswith("ls -1 /var/log/journal"): - return DummyRes(command=cmd, stdout=ls_out, exit_code=0) - - if cmd.startswith("journalctl ") and "--file=" in cmd: - if paths[0] in cmd: - return DummyRes(cmd, stdout='{"MESSAGE":"a"}\n', exit_code=0) - if paths[1] in cmd: - return DummyRes(cmd, stdout=b'{"MESSAGE":"b"}\n', exit_code=0) - if paths[2] in cmd: - return DummyRes(cmd, stdout='{"MESSAGE":"c"}\n', exit_code=0) - - return DummyRes(command=cmd, stdout="", exit_code=1, stderr="unexpected") - - c = get_collector(monkeypatch, run_map, system_info, conn_mock) - - collected = c._get_journals() - assert len(collected) == 3 - - expected_names = { - "journalctl__var__log__journal__m1__system.journal.json", - "journalctl__var__log__journal__m1__system@0000000000000001-0000000000000002.journal.json", - "journalctl__var__log__journal__m2__system.journal.json", - } - names = {a.filename for a in c.result.artifacts} - assert names == expected_names - - contents = {a.filename: a.contents for a in c.result.artifacts} - assert ( - contents["journalctl__var__log__journal__m1__system.journal.json"].strip() - == '{"MESSAGE":"a"}' - ) - assert ( - contents[ - "journalctl__var__log__journal__m1__system@0000000000000001-0000000000000002.journal.json" - ].strip() - == '{"MESSAGE":"b"}' - ) - assert ( - contents["journalctl__var__log__journal__m2__system.journal.json"].strip() - == '{"MESSAGE":"c"}' - ) - - assert any( - evt.get("description") == "Collected journal logs." - and getattr(evt.get("priority"), "name", str(evt.get("priority"))) == "INFO" - for evt in c._events - ) - assert c.result.message == "journalctl logs collected" - - -def test_get_journals_no_files(monkeypatch, system_info, conn_mock): - def run_map(cmd, **kwargs): - if cmd.startswith("ls -1 /var/log/journal"): - return DummyRes(command=cmd, stdout="", exit_code=0) - return DummyRes(command=cmd, stdout="", exit_code=1) - - c = get_collector(monkeypatch, run_map, system_info, conn_mock) - - collected = c._get_journals() - assert collected == [] - assert c.result.artifacts == [] - - assert any( - evt.get("description", "").startswith("No /var/log/journal files found") - and getattr(evt.get("priority"), "name", str(evt.get("priority"))) == "WARNING" - for evt in c._events - ) - - -def test_get_journals_partial_failure(monkeypatch, system_info, conn_mock): - ok_path = "/var/log/journal/m1/system.journal" - bad_path = "/var/log/journal/m1/system@bad.journal" - ls_out = ok_path + "\n" + bad_path + "\n" - - def run_map(cmd, **kwargs): - if cmd.startswith("ls -1 /var/log/journal"): - return DummyRes(command=cmd, stdout=ls_out, exit_code=0) - - if cmd.startswith("journalctl ") and "--file=" in cmd: - if ok_path in cmd: - return DummyRes(cmd, stdout='{"MESSAGE":"ok"}\n', exit_code=0) - if bad_path in cmd: - return DummyRes(cmd, stdout="", exit_code=1, stderr="cannot read") - - return DummyRes(command=cmd, stdout="", exit_code=1) - - c = get_collector(monkeypatch, run_map, system_info, conn_mock) - - collected = c._get_journals() - assert collected == ["journalctl__var__log__journal__m1__system.journal.json"] - assert [a.filename for a in c.result.artifacts] == [ - "journalctl__var__log__journal__m1__system.journal.json" - ] - - assert any( - evt.get("description") == "Some journal files could not be read with journalctl." - and getattr(evt.get("priority"), "name", str(evt.get("priority"))) == "WARNING" - for evt in c._events - ) - - def test_collect_data_integration(monkeypatch, system_info, conn_mock): - dummy_path = "/var/log/journal/m1/system.journal" - ls_out = dummy_path + "\n" - def run_map(cmd, **kwargs): - if cmd.startswith("ls -1 /var/log/journal"): - return DummyRes(command=cmd, stdout=ls_out, exit_code=0) - if cmd.startswith("journalctl ") and "--file=" in cmd and dummy_path in cmd: - return DummyRes(command=cmd, stdout='{"MESSAGE":"hello"}\n', exit_code=0) - return DummyRes(command=cmd, stdout="", exit_code=1) + return DummyRes(command=cmd, stdout='{"MESSAGE":"hello"}\n', exit_code=0) c = get_collector(monkeypatch, run_map, system_info, conn_mock) result, data = c.collect_data() assert isinstance(data, JournalData) - expected_name = "journalctl__var__log__journal__m1__system.journal.json" - assert data.journal_logs == [expected_name] - assert c.result.message == "journalctl logs collected" - - assert [a.filename for a in c.result.artifacts] == [expected_name] + assert data.journal_log == '{"MESSAGE":"hello"}\n' From 28e43196e6b1ff575a8362a099e3aec4487229ab Mon Sep 17 00:00:00 2001 From: Alex Bara Date: Mon, 8 Sep 2025 15:48:19 -0500 Subject: [PATCH 08/12] updated failure case --- .../plugins/inband/journal/journal_collector.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/nodescraper/plugins/inband/journal/journal_collector.py b/nodescraper/plugins/inband/journal/journal_collector.py index 5c6aedb..e124954 100644 --- a/nodescraper/plugins/inband/journal/journal_collector.py +++ b/nodescraper/plugins/inband/journal/journal_collector.py @@ -24,7 +24,7 @@ # ############################################################################### from nodescraper.base import InBandDataCollector -from nodescraper.enums import OSFamily +from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus, OSFamily from nodescraper.models import TaskResult from .journaldata import JournalData @@ -47,6 +47,16 @@ def _read_with_journalctl(self): if res.exit_code == 0: return res.stdout + else: + self._log_event( + category=EventCategory.OS, + description="Error reading journalctl", + data={"command": res.command, "exit_code": res.exit_code}, + priority=EventPriority.ERROR, + console_log=True, + ) + self.result.message = "Could not read journalctl data" + self.result.status = ExecutionStatus.ERROR return None From c0f1e9ab1ca7e304dd760f81f48f8f39316a6263 Mon Sep 17 00:00:00 2001 From: Alex Bara Date: Tue, 9 Sep 2025 16:18:02 -0500 Subject: [PATCH 09/12] added log_model call for journal plugin + updated journaldata --- .../inband/journal/journal_collector.py | 23 +++++++++++++++---- .../plugins/inband/journal/journaldata.py | 17 +++++++++++++- test/unit/plugin/test_journal_collector.py | 2 +- 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/nodescraper/plugins/inband/journal/journal_collector.py b/nodescraper/plugins/inband/journal/journal_collector.py index e124954..1c69fb8 100644 --- a/nodescraper/plugins/inband/journal/journal_collector.py +++ b/nodescraper/plugins/inband/journal/journal_collector.py @@ -23,6 +23,9 @@ # SOFTWARE. # ############################################################################### +import io +import json + from nodescraper.base import InBandDataCollector from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus, OSFamily from nodescraper.models import TaskResult @@ -42,12 +45,10 @@ def _read_with_journalctl(self): Returns: str|None: system journal read """ - cmd = "journalctl --no-pager --system --all -o short-iso --output=json" + cmd = "journalctl --no-pager --system --all --output=json" res = self._run_sut_cmd(cmd, sudo=True, log_artifact=False, strip=False) - if res.exit_code == 0: - return res.stdout - else: + if res.exit_code != 0: self._log_event( category=EventCategory.OS, description="Error reading journalctl", @@ -57,8 +58,20 @@ def _read_with_journalctl(self): ) self.result.message = "Could not read journalctl data" self.result.status = ExecutionStatus.ERROR + return None + + raw = res.stdout + text = ( + raw.decode("utf-8", errors="surrogateescape") + if isinstance(raw, (bytes, bytearray)) + else raw + ) + + lines = [ln for ln in (line.strip() for line in text.splitlines()) if ln.startswith("{")] + array_like = "[" + ",".join(lines) + "]" + entries: list[dict] = json.load(io.StringIO(array_like)) - return None + return entries def collect_data(self, args=None) -> tuple[TaskResult, JournalData | None]: """Collect journal lofs diff --git a/nodescraper/plugins/inband/journal/journaldata.py b/nodescraper/plugins/inband/journal/journaldata.py index cb9691b..0b5d0cb 100644 --- a/nodescraper/plugins/inband/journal/journaldata.py +++ b/nodescraper/plugins/inband/journal/journaldata.py @@ -23,10 +23,25 @@ # SOFTWARE. # ############################################################################### +import json +import os + from nodescraper.models import DataModel class JournalData(DataModel): """Data model for journal logs""" - journal_log: str + journal_log: list[dict] + + def log_model(self, log_path: str): + """Log data model to a file + + Args: + log_path (str): log path + """ + log_name = os.path.join(log_path, "journal.log") + with open(log_name, "w", encoding="utf-8") as log_filename: + for e in self.journal_log: + log_filename.write(json.dumps(e, ensure_ascii=False, separators=(",", ":"))) + log_filename.write("\n") diff --git a/test/unit/plugin/test_journal_collector.py b/test/unit/plugin/test_journal_collector.py index 6e6ede0..2b1b910 100644 --- a/test/unit/plugin/test_journal_collector.py +++ b/test/unit/plugin/test_journal_collector.py @@ -68,4 +68,4 @@ def run_map(cmd, **kwargs): result, data = c.collect_data() assert isinstance(data, JournalData) - assert data.journal_log == '{"MESSAGE":"hello"}\n' + assert data.journal_log == [{"MESSAGE": "hello"}] From 1b1fc56d779c51e6104e99004c75aed72750e1f4 Mon Sep 17 00:00:00 2001 From: Alex Bara Date: Wed, 10 Sep 2025 10:46:55 -0500 Subject: [PATCH 10/12] iso --- nodescraper/connection/inband/inbandlocal.py | 2 +- .../inband/journal/journal_collector.py | 26 ++++++++++--------- .../plugins/inband/journal/journaldata.py | 2 +- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/nodescraper/connection/inband/inbandlocal.py b/nodescraper/connection/inband/inbandlocal.py index 8429366..f8dd609 100644 --- a/nodescraper/connection/inband/inbandlocal.py +++ b/nodescraper/connection/inband/inbandlocal.py @@ -54,7 +54,7 @@ def run_command( res = subprocess.run( command, - encoding="utf-8", + encoding=None, shell=True, timeout=timeout, capture_output=True, diff --git a/nodescraper/plugins/inband/journal/journal_collector.py b/nodescraper/plugins/inband/journal/journal_collector.py index 1c69fb8..d9dd39a 100644 --- a/nodescraper/plugins/inband/journal/journal_collector.py +++ b/nodescraper/plugins/inband/journal/journal_collector.py @@ -45,9 +45,10 @@ def _read_with_journalctl(self): Returns: str|None: system journal read """ - cmd = "journalctl --no-pager --system --all --output=json" + cmd = "journalctl --no-pager --system --all --output=short-iso" res = self._run_sut_cmd(cmd, sudo=True, log_artifact=False, strip=False) + if res.exit_code != 0: self._log_event( category=EventCategory.OS, @@ -60,21 +61,22 @@ def _read_with_journalctl(self): self.result.status = ExecutionStatus.ERROR return None - raw = res.stdout - text = ( - raw.decode("utf-8", errors="surrogateescape") - if isinstance(raw, (bytes, bytearray)) - else raw - ) + out = res.stdout + + if isinstance(out, (bytes, bytearray)): + try: + text = out.decode("utf-8") + except UnicodeDecodeError: + text = out.decode("utf-8", errors="replace") + else: + text = out - lines = [ln for ln in (line.strip() for line in text.splitlines()) if ln.startswith("{")] - array_like = "[" + ",".join(lines) + "]" - entries: list[dict] = json.load(io.StringIO(array_like)) + text = text.replace("\r\n", "\n").replace("\r", "\n").replace("\x00", "") + return text - return entries def collect_data(self, args=None) -> tuple[TaskResult, JournalData | None]: - """Collect journal lofs + """Collect journal logs Args: args (_type_, optional): Collection args. Defaults to None. diff --git a/nodescraper/plugins/inband/journal/journaldata.py b/nodescraper/plugins/inband/journal/journaldata.py index 0b5d0cb..57a4072 100644 --- a/nodescraper/plugins/inband/journal/journaldata.py +++ b/nodescraper/plugins/inband/journal/journaldata.py @@ -32,7 +32,7 @@ class JournalData(DataModel): """Data model for journal logs""" - journal_log: list[dict] + journal_log: str def log_model(self, log_path: str): """Log data model to a file From 0d9625c16a55b8da54287feb3ab9f90e7d71ce1b Mon Sep 17 00:00:00 2001 From: Alex Bara Date: Wed, 10 Sep 2025 14:57:03 -0500 Subject: [PATCH 11/12] decode fix --- nodescraper/connection/inband/inbandlocal.py | 2 +- .../inband/journal/journal_collector.py | 23 ++++++++----------- .../plugins/inband/journal/journaldata.py | 7 ++---- test/unit/plugin/test_journal_collector.py | 2 +- 4 files changed, 13 insertions(+), 21 deletions(-) diff --git a/nodescraper/connection/inband/inbandlocal.py b/nodescraper/connection/inband/inbandlocal.py index f8dd609..8429366 100644 --- a/nodescraper/connection/inband/inbandlocal.py +++ b/nodescraper/connection/inband/inbandlocal.py @@ -54,7 +54,7 @@ def run_command( res = subprocess.run( command, - encoding=None, + encoding="utf-8", shell=True, timeout=timeout, capture_output=True, diff --git a/nodescraper/plugins/inband/journal/journal_collector.py b/nodescraper/plugins/inband/journal/journal_collector.py index d9dd39a..15ce0ca 100644 --- a/nodescraper/plugins/inband/journal/journal_collector.py +++ b/nodescraper/plugins/inband/journal/journal_collector.py @@ -23,8 +23,7 @@ # SOFTWARE. # ############################################################################### -import io -import json +import base64 from nodescraper.base import InBandDataCollector from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus, OSFamily @@ -45,10 +44,9 @@ def _read_with_journalctl(self): Returns: str|None: system journal read """ - cmd = "journalctl --no-pager --system --all --output=short-iso" + cmd = "journalctl --no-pager --system --all --output=short-iso 2>&1 | base64 -w0" res = self._run_sut_cmd(cmd, sudo=True, log_artifact=False, strip=False) - if res.exit_code != 0: self._log_event( category=EventCategory.OS, @@ -61,20 +59,17 @@ def _read_with_journalctl(self): self.result.status = ExecutionStatus.ERROR return None - out = res.stdout - - if isinstance(out, (bytes, bytearray)): - try: - text = out.decode("utf-8") - except UnicodeDecodeError: - text = out.decode("utf-8", errors="replace") + if isinstance(res.stdout, (bytes, bytearray)): + b64 = ( + res.stdout if isinstance(res.stdout, str) else res.stdout.decode("ascii", "ignore") + ) + raw = base64.b64decode("".join(b64.split())) + text = raw.decode("utf-8", errors="replace") else: - text = out + text = res.stdout - text = text.replace("\r\n", "\n").replace("\r", "\n").replace("\x00", "") return text - def collect_data(self, args=None) -> tuple[TaskResult, JournalData | None]: """Collect journal logs diff --git a/nodescraper/plugins/inband/journal/journaldata.py b/nodescraper/plugins/inband/journal/journaldata.py index 57a4072..8c1d06d 100644 --- a/nodescraper/plugins/inband/journal/journaldata.py +++ b/nodescraper/plugins/inband/journal/journaldata.py @@ -23,7 +23,6 @@ # SOFTWARE. # ############################################################################### -import json import os from nodescraper.models import DataModel @@ -32,7 +31,7 @@ class JournalData(DataModel): """Data model for journal logs""" - journal_log: str + journal_log: str def log_model(self, log_path: str): """Log data model to a file @@ -42,6 +41,4 @@ def log_model(self, log_path: str): """ log_name = os.path.join(log_path, "journal.log") with open(log_name, "w", encoding="utf-8") as log_filename: - for e in self.journal_log: - log_filename.write(json.dumps(e, ensure_ascii=False, separators=(",", ":"))) - log_filename.write("\n") + log_filename.write(self.journal_log) diff --git a/test/unit/plugin/test_journal_collector.py b/test/unit/plugin/test_journal_collector.py index 2b1b910..6e6ede0 100644 --- a/test/unit/plugin/test_journal_collector.py +++ b/test/unit/plugin/test_journal_collector.py @@ -68,4 +68,4 @@ def run_map(cmd, **kwargs): result, data = c.collect_data() assert isinstance(data, JournalData) - assert data.journal_log == [{"MESSAGE": "hello"}] + assert data.journal_log == '{"MESSAGE":"hello"}\n' From 3254994a01a4293fafd3aa91628c441520c1752c Mon Sep 17 00:00:00 2001 From: Alex Bara Date: Thu, 11 Sep 2025 14:39:34 -0500 Subject: [PATCH 12/12] removed extra decode --- nodescraper/connection/inband/inbandlocal.py | 1 + .../plugins/inband/journal/journal_collector.py | 14 ++------------ 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/nodescraper/connection/inband/inbandlocal.py b/nodescraper/connection/inband/inbandlocal.py index 8429366..e36e460 100644 --- a/nodescraper/connection/inband/inbandlocal.py +++ b/nodescraper/connection/inband/inbandlocal.py @@ -56,6 +56,7 @@ def run_command( command, encoding="utf-8", shell=True, + errors="replace", timeout=timeout, capture_output=True, check=False, diff --git a/nodescraper/plugins/inband/journal/journal_collector.py b/nodescraper/plugins/inband/journal/journal_collector.py index 15ce0ca..90c44e5 100644 --- a/nodescraper/plugins/inband/journal/journal_collector.py +++ b/nodescraper/plugins/inband/journal/journal_collector.py @@ -23,7 +23,6 @@ # SOFTWARE. # ############################################################################### -import base64 from nodescraper.base import InBandDataCollector from nodescraper.enums import EventCategory, EventPriority, ExecutionStatus, OSFamily @@ -44,7 +43,7 @@ def _read_with_journalctl(self): Returns: str|None: system journal read """ - cmd = "journalctl --no-pager --system --all --output=short-iso 2>&1 | base64 -w0" + cmd = "journalctl --no-pager --system --output=short-iso" res = self._run_sut_cmd(cmd, sudo=True, log_artifact=False, strip=False) if res.exit_code != 0: @@ -59,16 +58,7 @@ def _read_with_journalctl(self): self.result.status = ExecutionStatus.ERROR return None - if isinstance(res.stdout, (bytes, bytearray)): - b64 = ( - res.stdout if isinstance(res.stdout, str) else res.stdout.decode("ascii", "ignore") - ) - raw = base64.b64decode("".join(b64.split())) - text = raw.decode("utf-8", errors="replace") - else: - text = res.stdout - - return text + return res.stdout def collect_data(self, args=None) -> tuple[TaskResult, JournalData | None]: """Collect journal logs