From a8ab758f7d0ecd810b312f4470541ccb97432675 Mon Sep 17 00:00:00 2001
From: Valerie <v.bartel.vb@gmail.com>
Date: Mon, 14 Apr 2025 13:53:55 +0200
Subject: [PATCH 1/5] update gitignore

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 3d75bbd0c..f6d9742f8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -229,3 +229,6 @@ doku/*
 **/workspace_status.json
 
 .pytest_cache/
+
+# tts model
+*/bitbots_tts/model/*

From 9f56b5209e95a92e52b00b71c7f332ab5260ea39 Mon Sep 17 00:00:00 2001
From: Valerie <v.bartel.vb@gmail.com>
Date: Mon, 14 Apr 2025 14:16:40 +0200
Subject: [PATCH 2/5] use piper instead of mimic

---
 bitbots_misc/bitbots_tts/bitbots_tts/tts.py | 35 ++++++++++++++++-----
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/bitbots_misc/bitbots_tts/bitbots_tts/tts.py b/bitbots_misc/bitbots_tts/bitbots_tts/tts.py
index 8a8afd0c5..66648b905 100755
--- a/bitbots_misc/bitbots_tts/bitbots_tts/tts.py
+++ b/bitbots_misc/bitbots_tts/bitbots_tts/tts.py
@@ -1,13 +1,15 @@
 #!/usr/bin/env python3
 
-import os
-import subprocess
+import io
 import time
 import traceback
+import wave
+from pathlib import Path
 
 import rclpy
 import requests
-from ament_index_python import get_package_prefix
+import sounddevice as sd
+from piper import PiperVoice
 from rcl_interfaces.msg import Parameter, SetParametersResult
 from rclpy.callback_groups import MutuallyExclusiveCallbackGroup
 from rclpy.executors import MultiThreadedExecutor
@@ -16,6 +18,12 @@
 
 from bitbots_msgs.msg import Audio
 
+# Load the Piper voice
+bb_tts_dir = Path(__file__).parent + "/model"  # TODO: check how to get nice relative paths
+model_path = bb_tts_dir + "/en_US-lessac-medium.onnx"
+config_path = bb_tts_dir + "/en_US-lessac-medium.onnx.json"
+voice = PiperVoice.load(model_path, config_path=config_path, use_cuda=False)
+
 
 def speak(text: str, publisher: Publisher, priority: int = 20, speaking_active: bool = True) -> None:
     """Utility method which can be used by other classes to easily publish a message."""
@@ -27,10 +35,23 @@ def speak(text: str, publisher: Publisher, priority: int = 20, speaking_active:
 
 
 def say(text: str) -> None:
-    """Start the shell `say.sh` script to output given text with mimic3. Beware: this is blocking."""
-    script_path = os.path.join(get_package_prefix("bitbots_tts"), "lib/bitbots_tts/say.sh")
-    process = subprocess.Popen((script_path, text))
-    process.wait()
+    """Use piper for speech synthesis and audio playback.
+    This is also used for speaking the ip adress during startup."""
+    synthesize_args = {
+        "speaker_id": 0,  # Adjust if you're using multi-speaker models
+        "length_scale": 1.0,
+        "noise_scale": 0.667,
+        "noise_w": 0.8,
+        "sentence_silence": 0.0,
+    }
+    with io.BytesIO() as buffer:
+        with wave.open(buffer, "wb") as wav_file:
+            voice.synthesize(text, wav_file, **synthesize_args)
+
+        buffer.seek(0)
+        with wave.open(buffer, "rb") as wav:
+            audio = wav.readframes(wav.getnframes())
+            sd.play(audio, samplerate=wav.getframerate(), blocking=True)
 
 
 class Speaker(Node):

From 8188c89ad19ec3ed718537250270bdb6d660b16c Mon Sep 17 00:00:00 2001
From: Valerie <v.bartel.vb@gmail.com>
Date: Mon, 14 Apr 2025 14:53:10 +0200
Subject: [PATCH 3/5] fix path and add config to setup.py

---
 bitbots_misc/bitbots_tts/bitbots_tts/tts.py | 6 +++---
 bitbots_misc/bitbots_tts/setup.py           | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/bitbots_misc/bitbots_tts/bitbots_tts/tts.py b/bitbots_misc/bitbots_tts/bitbots_tts/tts.py
index 66648b905..681a7bf14 100755
--- a/bitbots_misc/bitbots_tts/bitbots_tts/tts.py
+++ b/bitbots_misc/bitbots_tts/bitbots_tts/tts.py
@@ -19,9 +19,9 @@
 from bitbots_msgs.msg import Audio
 
 # Load the Piper voice
-bb_tts_dir = Path(__file__).parent + "/model"  # TODO: check how to get nice relative paths
-model_path = bb_tts_dir + "/en_US-lessac-medium.onnx"
-config_path = bb_tts_dir + "/en_US-lessac-medium.onnx.json"
+bb_tts_dir = Path(__file__).parent.parent / "model"  # TODO: check how to get nice relative paths
+model_path = bb_tts_dir / "en_US-lessac-medium.onnx"
+config_path = bb_tts_dir / "en_US-lessac-medium.onnx.json"
 voice = PiperVoice.load(model_path, config_path=config_path, use_cuda=False)
 
 
diff --git a/bitbots_misc/bitbots_tts/setup.py b/bitbots_misc/bitbots_tts/setup.py
index 29cedacd8..fc7b93835 100644
--- a/bitbots_misc/bitbots_tts/setup.py
+++ b/bitbots_misc/bitbots_tts/setup.py
@@ -12,6 +12,7 @@
         ("share/ament_index/resource_index/packages", ["resource/" + package_name]),
         ("share/" + package_name + "/config", glob.glob("config/*.yaml")),
         ("share/" + package_name + "/launch", glob.glob("launch/*.launch")),
+        ("share/" + package_name + "/model", glob.glob("model/*")),
     ],
     install_requires=[
         "setuptools",

From 66eb4c18ac6075d79a8753fcedd4ebb2c26a99c3 Mon Sep 17 00:00:00 2001
From: Valerie <v.bartel.vb@gmail.com>
Date: Mon, 14 Apr 2025 14:53:45 +0200
Subject: [PATCH 4/5] remove mimic server

---
 bitbots_misc/bitbots_tts/bitbots_tts/tts.py | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/bitbots_misc/bitbots_tts/bitbots_tts/tts.py b/bitbots_misc/bitbots_tts/bitbots_tts/tts.py
index 681a7bf14..754cfd943 100755
--- a/bitbots_misc/bitbots_tts/bitbots_tts/tts.py
+++ b/bitbots_misc/bitbots_tts/bitbots_tts/tts.py
@@ -1,13 +1,11 @@
 #!/usr/bin/env python3
 
 import io
-import time
 import traceback
 import wave
 from pathlib import Path
 
 import rclpy
-import requests
 import sounddevice as sd
 from piper import PiperVoice
 from rcl_interfaces.msg import Parameter, SetParametersResult
@@ -83,17 +81,6 @@ def __init__(self) -> None:
         # Subscribe to the speak topic
         self.create_subscription(Audio, "speak", self.speak_cb, 10, callback_group=MutuallyExclusiveCallbackGroup())
 
-        # Wait for the mimic server to start
-        while True:
-            try:
-                requests.get("http://localhost:59125")
-                break
-            except requests.exceptions.ConnectionError:
-                # log once per second that the server is not yet available
-                self.get_logger().info("Waiting for mimic server to start...", throttle_duration_sec=2.0)
-                time.sleep(0.5)
-                pass
-
         # Start processing the queue
         self.create_timer(0.1, self.run_speaker, callback_group=MutuallyExclusiveCallbackGroup())
 

From dac845a5535d598a7f0707b6fc7be9ff49ebb80e Mon Sep 17 00:00:00 2001
From: Valerie <v.bartel.vb@gmail.com>
Date: Mon, 14 Apr 2025 15:09:20 +0200
Subject: [PATCH 5/5] bytes to np array because of sd and add comments

---
 bitbots_misc/bitbots_tts/bitbots_tts/tts.py | 27 +++++++++++++++------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/bitbots_misc/bitbots_tts/bitbots_tts/tts.py b/bitbots_misc/bitbots_tts/bitbots_tts/tts.py
index 754cfd943..7b58ca46e 100755
--- a/bitbots_misc/bitbots_tts/bitbots_tts/tts.py
+++ b/bitbots_misc/bitbots_tts/bitbots_tts/tts.py
@@ -5,6 +5,7 @@
 import wave
 from pathlib import Path
 
+import numpy as np
 import rclpy
 import sounddevice as sd
 from piper import PiperVoice
@@ -36,11 +37,10 @@ def say(text: str) -> None:
     """Use piper for speech synthesis and audio playback.
     This is also used for speaking the ip adress during startup."""
     synthesize_args = {
-        "speaker_id": 0,  # Adjust if you're using multi-speaker models
-        "length_scale": 1.0,
-        "noise_scale": 0.667,
-        "noise_w": 0.8,
-        "sentence_silence": 0.0,
+        "length_scale": 1.0,  # Phoneme length, if lower -> faster
+        "noise_scale": 0.667,  # Generator noise, if lower -> more robotic
+        "noise_w": 0.8,  # Phoneme width noise, if lower -> more robotic
+        "sentence_silence": 0.1,  # seconds of silence after each sentence
     }
     with io.BytesIO() as buffer:
         with wave.open(buffer, "wb") as wav_file:
@@ -48,8 +48,21 @@ def say(text: str) -> None:
 
         buffer.seek(0)
         with wave.open(buffer, "rb") as wav:
-            audio = wav.readframes(wav.getnframes())
-            sd.play(audio, samplerate=wav.getframerate(), blocking=True)
+            framerate = wav.getframerate()
+            sampwidth = wav.getsampwidth()
+            nchannels = wav.getnchannels()
+            nframes = wav.getnframes()
+            audio_bytes = wav.readframes(nframes)
+
+            # bytes to np array
+            dtype_map = {1: np.int8, 2: np.int16, 4: np.int32}
+            if sampwidth not in dtype_map:
+                raise ValueError(f"Unsupported sample width: {sampwidth}")
+            audio = np.frombuffer(audio_bytes, dtype=dtype_map[sampwidth])
+            if nchannels > 1:
+                audio = audio.reshape(-1, nchannels)
+
+            sd.play(audio, samplerate=framerate, blocking=True)
 
 
 class Speaker(Node):