From a8ab758f7d0ecd810b312f4470541ccb97432675 Mon Sep 17 00:00:00 2001 From: Valerie Date: Mon, 14 Apr 2025 13:53:55 +0200 Subject: [PATCH 1/5] update gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 3d75bbd0c..f6d9742f8 100644 --- a/.gitignore +++ b/.gitignore @@ -229,3 +229,6 @@ doku/* **/workspace_status.json .pytest_cache/ + +# tts model +*/bitbots_tts/model/* From 9f56b5209e95a92e52b00b71c7f332ab5260ea39 Mon Sep 17 00:00:00 2001 From: Valerie Date: Mon, 14 Apr 2025 14:16:40 +0200 Subject: [PATCH 2/5] use piper instead of mimic --- bitbots_misc/bitbots_tts/bitbots_tts/tts.py | 35 ++++++++++++++++----- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/bitbots_misc/bitbots_tts/bitbots_tts/tts.py b/bitbots_misc/bitbots_tts/bitbots_tts/tts.py index 8a8afd0c5..66648b905 100755 --- a/bitbots_misc/bitbots_tts/bitbots_tts/tts.py +++ b/bitbots_misc/bitbots_tts/bitbots_tts/tts.py @@ -1,13 +1,15 @@ #!/usr/bin/env python3 -import os -import subprocess +import io import time import traceback +import wave +from pathlib import Path import rclpy import requests -from ament_index_python import get_package_prefix +import sounddevice as sd +from piper import PiperVoice from rcl_interfaces.msg import Parameter, SetParametersResult from rclpy.callback_groups import MutuallyExclusiveCallbackGroup from rclpy.executors import MultiThreadedExecutor @@ -16,6 +18,12 @@ from bitbots_msgs.msg import Audio +# Load the Piper voice +bb_tts_dir = Path(__file__).parent + "/model" # TODO: check how to get nice relative paths +model_path = bb_tts_dir + "/en_US-lessac-medium.onnx" +config_path = bb_tts_dir + "/en_US-lessac-medium.onnx.json" +voice = PiperVoice.load(model_path, config_path=config_path, use_cuda=False) + def speak(text: str, publisher: Publisher, priority: int = 20, speaking_active: bool = True) -> None: """Utility method which can be used by other classes to easily publish a message.""" @@ -27,10 +35,23 @@ def speak(text: str, publisher: Publisher, priority: int = 20, speaking_active: def say(text: str) -> None: - """Start the shell `say.sh` script to output given text with mimic3. Beware: this is blocking.""" - script_path = os.path.join(get_package_prefix("bitbots_tts"), "lib/bitbots_tts/say.sh") - process = subprocess.Popen((script_path, text)) - process.wait() + """Use piper for speech synthesis and audio playback. + This is also used for speaking the ip adress during startup.""" + synthesize_args = { + "speaker_id": 0, # Adjust if you're using multi-speaker models + "length_scale": 1.0, + "noise_scale": 0.667, + "noise_w": 0.8, + "sentence_silence": 0.0, + } + with io.BytesIO() as buffer: + with wave.open(buffer, "wb") as wav_file: + voice.synthesize(text, wav_file, **synthesize_args) + + buffer.seek(0) + with wave.open(buffer, "rb") as wav: + audio = wav.readframes(wav.getnframes()) + sd.play(audio, samplerate=wav.getframerate(), blocking=True) class Speaker(Node): From 8188c89ad19ec3ed718537250270bdb6d660b16c Mon Sep 17 00:00:00 2001 From: Valerie Date: Mon, 14 Apr 2025 14:53:10 +0200 Subject: [PATCH 3/5] fix path and add config to setup.py --- bitbots_misc/bitbots_tts/bitbots_tts/tts.py | 6 +++--- bitbots_misc/bitbots_tts/setup.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/bitbots_misc/bitbots_tts/bitbots_tts/tts.py b/bitbots_misc/bitbots_tts/bitbots_tts/tts.py index 66648b905..681a7bf14 100755 --- a/bitbots_misc/bitbots_tts/bitbots_tts/tts.py +++ b/bitbots_misc/bitbots_tts/bitbots_tts/tts.py @@ -19,9 +19,9 @@ from bitbots_msgs.msg import Audio # Load the Piper voice -bb_tts_dir = Path(__file__).parent + "/model" # TODO: check how to get nice relative paths -model_path = bb_tts_dir + "/en_US-lessac-medium.onnx" -config_path = bb_tts_dir + "/en_US-lessac-medium.onnx.json" +bb_tts_dir = Path(__file__).parent.parent / "model" # TODO: check how to get nice relative paths +model_path = bb_tts_dir / "en_US-lessac-medium.onnx" +config_path = bb_tts_dir / "en_US-lessac-medium.onnx.json" voice = PiperVoice.load(model_path, config_path=config_path, use_cuda=False) diff --git a/bitbots_misc/bitbots_tts/setup.py b/bitbots_misc/bitbots_tts/setup.py index 29cedacd8..fc7b93835 100644 --- a/bitbots_misc/bitbots_tts/setup.py +++ b/bitbots_misc/bitbots_tts/setup.py @@ -12,6 +12,7 @@ ("share/ament_index/resource_index/packages", ["resource/" + package_name]), ("share/" + package_name + "/config", glob.glob("config/*.yaml")), ("share/" + package_name + "/launch", glob.glob("launch/*.launch")), + ("share/" + package_name + "/model", glob.glob("model/*")), ], install_requires=[ "setuptools", From 66eb4c18ac6075d79a8753fcedd4ebb2c26a99c3 Mon Sep 17 00:00:00 2001 From: Valerie Date: Mon, 14 Apr 2025 14:53:45 +0200 Subject: [PATCH 4/5] remove mimic server --- bitbots_misc/bitbots_tts/bitbots_tts/tts.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/bitbots_misc/bitbots_tts/bitbots_tts/tts.py b/bitbots_misc/bitbots_tts/bitbots_tts/tts.py index 681a7bf14..754cfd943 100755 --- a/bitbots_misc/bitbots_tts/bitbots_tts/tts.py +++ b/bitbots_misc/bitbots_tts/bitbots_tts/tts.py @@ -1,13 +1,11 @@ #!/usr/bin/env python3 import io -import time import traceback import wave from pathlib import Path import rclpy -import requests import sounddevice as sd from piper import PiperVoice from rcl_interfaces.msg import Parameter, SetParametersResult @@ -83,17 +81,6 @@ def __init__(self) -> None: # Subscribe to the speak topic self.create_subscription(Audio, "speak", self.speak_cb, 10, callback_group=MutuallyExclusiveCallbackGroup()) - # Wait for the mimic server to start - while True: - try: - requests.get("http://localhost:59125") - break - except requests.exceptions.ConnectionError: - # log once per second that the server is not yet available - self.get_logger().info("Waiting for mimic server to start...", throttle_duration_sec=2.0) - time.sleep(0.5) - pass - # Start processing the queue self.create_timer(0.1, self.run_speaker, callback_group=MutuallyExclusiveCallbackGroup()) From dac845a5535d598a7f0707b6fc7be9ff49ebb80e Mon Sep 17 00:00:00 2001 From: Valerie Date: Mon, 14 Apr 2025 15:09:20 +0200 Subject: [PATCH 5/5] bytes to np array because of sd and add comments --- bitbots_misc/bitbots_tts/bitbots_tts/tts.py | 27 +++++++++++++++------ 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/bitbots_misc/bitbots_tts/bitbots_tts/tts.py b/bitbots_misc/bitbots_tts/bitbots_tts/tts.py index 754cfd943..7b58ca46e 100755 --- a/bitbots_misc/bitbots_tts/bitbots_tts/tts.py +++ b/bitbots_misc/bitbots_tts/bitbots_tts/tts.py @@ -5,6 +5,7 @@ import wave from pathlib import Path +import numpy as np import rclpy import sounddevice as sd from piper import PiperVoice @@ -36,11 +37,10 @@ def say(text: str) -> None: """Use piper for speech synthesis and audio playback. This is also used for speaking the ip adress during startup.""" synthesize_args = { - "speaker_id": 0, # Adjust if you're using multi-speaker models - "length_scale": 1.0, - "noise_scale": 0.667, - "noise_w": 0.8, - "sentence_silence": 0.0, + "length_scale": 1.0, # Phoneme length, if lower -> faster + "noise_scale": 0.667, # Generator noise, if lower -> more robotic + "noise_w": 0.8, # Phoneme width noise, if lower -> more robotic + "sentence_silence": 0.1, # seconds of silence after each sentence } with io.BytesIO() as buffer: with wave.open(buffer, "wb") as wav_file: @@ -48,8 +48,21 @@ def say(text: str) -> None: buffer.seek(0) with wave.open(buffer, "rb") as wav: - audio = wav.readframes(wav.getnframes()) - sd.play(audio, samplerate=wav.getframerate(), blocking=True) + framerate = wav.getframerate() + sampwidth = wav.getsampwidth() + nchannels = wav.getnchannels() + nframes = wav.getnframes() + audio_bytes = wav.readframes(nframes) + + # bytes to np array + dtype_map = {1: np.int8, 2: np.int16, 4: np.int32} + if sampwidth not in dtype_map: + raise ValueError(f"Unsupported sample width: {sampwidth}") + audio = np.frombuffer(audio_bytes, dtype=dtype_map[sampwidth]) + if nchannels > 1: + audio = audio.reshape(-1, nchannels) + + sd.play(audio, samplerate=framerate, blocking=True) class Speaker(Node):