-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_data.py
More file actions
78 lines (62 loc) · 2.31 KB
/
generate_data.py
File metadata and controls
78 lines (62 loc) · 2.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
import pyttsx3
import soundfile as sf
import sounddevice as sd
import numpy as np
# Output directory
output_dir = os.path.join("Backend", "Audio_files", "Synthetic")
os.makedirs(output_dir, exist_ok=True)
# Initialize TTS engine
engine = pyttsx3.init()
# Test phrases imitating patients
phrases = [
("help", "Help me please"),
("help", "I need help"),
("help", "Nurse please"),
("water", "Water please"),
("water", "I am thirsty"),
("water", "Need a drink"),
("emergency", "Emergency"),
("emergency", "I fell down"),
("emergency", "Chest pain"),
("yes", "Yes"),
("no", "No"),
("unknown", "What is the time"),
]
print(f"Generating synthetic audio in {output_dir}...")
for intent, text in phrases:
filename = f"{intent}_{text.replace(' ', '_').lower()}.wav"
filepath = os.path.join(output_dir, filename)
# Save to temporary file (standard format)
temp_file = "temp.wav"
engine.save_to_file(text, temp_file)
engine.runAndWait()
# Process with soundfile/numpy instead of pydub to avoid audioop issues in Py3.13
try:
data, samplerate = sf.read(temp_file)
# Convert to mono if needed
if len(data.shape) > 1:
data = data.mean(axis=1)
# Resample to 16000 Hz (simple decimation/interpolation)
# Note: Proper resampling needs scipy, but for TTS this is often acceptable
# Or better: just read into correct rate if possible, but sf.read just reads file
# If sampling rate is different, we must resample.
target_rate = 16000
if samplerate != target_rate:
# Basic resampling using linear interpolation
duration = len(data) / samplerate
new_len = int(duration * target_rate)
data = np.interp(
np.linspace(0.0, 1.0, new_len),
np.linspace(0.0, 1.0, len(data)),
data
)
# Save as 16-bit PCM WAV
sf.write(filepath, data, target_rate, subtype='PCM_16')
print(f"✅ Generated: {filename}")
except Exception as e:
print(f"❌ Failed to convert {filename}: {e}")
# Cleanup
if os.path.exists("temp.wav"):
os.remove("temp.wav")
print("\nDone! Now use these files to test your API.")