Azure AI services - Recognize and synthesize speech:
1. Create 'Speech service' in Azure
C# Code:
using System;
using System.Threading.Tasks;
using Microsoft.Extensions.Configuration;
using System.Media;
// Import namespaces
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
// dotnet add package Microsoft.CognitiveServices.Speech --version 1.30.0
// dotnet add package System.Windows.Extensions --version 4.6.0
namespace speaking_clock
{
class Program
{
private static SpeechConfig speechConfig;
static async Task Main(string[] args)
{
try
{
// Get config settings from AppSettings
// IConfigurationBuilder builder =
new ConfigurationBuilder().AddJsonFile("appsettings.json");
// IConfigurationRoot configuration = builder.Build();
string aiSvcKey = "C7A2c2oHTErWxabILyi7SCucDFXJ3w3AAAYACOGSRaz";
//configuration["SpeechKey"];
string aiSvcRegion = "eastus"; // configuration["SpeechRegion"];
// Configure speech service
speechConfig = SpeechConfig.FromSubscription(aiSvcKey, aiSvcRegion);
Console.WriteLine("Ready to use speech service in " + speechConfig.Region);
// Configure voice
speechConfig.SpeechSynthesisVoiceName = "en-US-AriaNeural";
// Get spoken input
string command = "";
command = "what time is it?"; //await TranscribeCommand();
if (command.ToLower() == "what time is it?")
{
await TellTime();
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
static async Task<string> TranscribeCommand()
{
string command = "";
// Configure speech recognition
using AudioConfig audioConfig = AudioConfig.FromDefaultMicrophoneInput();
using SpeechRecognizer speechRecognizer =
new SpeechRecognizer(speechConfig, audioConfig);
Console.WriteLine("Speak now...");
// Configure speech recognition
// string audioFile = "time.wav";
// SoundPlayer wavPlayer = new SoundPlayer(audioFile);
// wavPlayer.Play();
// using AudioConfig audioConfig = AudioConfig.FromWavFileInput(audioFile);
// using SpeechRecognizer speechRecognizer =
new SpeechRecognizer(speechConfig, audioConfig);
// Process speech input
SpeechRecognitionResult speech = await speechRecognizer.RecognizeOnceAsync();
if (speech.Reason == ResultReason.RecognizedSpeech)
{
command = speech.Text;
Console.WriteLine(command);
}
else
{
Console.WriteLine(speech.Reason);
if (speech.Reason == ResultReason.Canceled)
{
var cancellation = CancellationDetails.FromResult(speech);
Console.WriteLine(cancellation.Reason);
Console.WriteLine(cancellation.ErrorDetails);
}
}
// Return the command
return command;
}
static async Task TellTime()
{
var now = DateTime.Now;
string responseText = "The time is " + now.Hour.ToString() + ":" +
now.Minute.ToString("D2");
// Configure speech synthesis
speechConfig.SpeechSynthesisVoiceName = "en-GB-RyanNeural";
using SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer(speechConfig);
// Synthesize spoken output
string responseSsml = $@"
<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>
<voice name='en-GB-LibbyNeural'>
{responseText}
<break strength='weak'/>
Time to end this lab!
</voice>
</speak>";
SpeechSynthesisResult speak = await speechSynthesizer.SpeakSsmlAsync(responseSsml);
if (speak.Reason != ResultReason.SynthesizingAudioCompleted)
{
Console.WriteLine(speak.Reason);
}
// Print the response
Console.WriteLine(responseText);
}
}
}
OutPut:
Python Code:
from dotenv import load_dotenv
from datetime import datetime
from playsound import playsound
import os
# Import namespaces
import azure.cognitiveservices.speech as speech_sdk
# pip install azure-cognitiveservices-speech==1.30.0
# pip install python-dotenv
# pip install playsound==1.2.2
def main():
try:
global speech_config
# Get Configuration Settings
load_dotenv()
ai_key = 'C7A2c2oHTErWxabILyi7SBAACYeBjFXJ3w3AAAYACOGSRaz' # os.getenv('SPEECH_KEY')
ai_region = 'eastus' # os.getenv('SPEECH_REGION')
# Configure speech service
speech_config = speech_sdk.SpeechConfig(subscription=ai_key, region=ai_region)
print('Ready to use speech service in:', speech_config.region)
# Get spoken input
command = 'what time is it?' #TranscribeCommand()
if command.lower() == 'what time is it?':
TellTime()
except Exception as ex:
print(ex)
def TranscribeCommand():
command = ''
# Configure speech recognition
# audio_config = speech_sdk.AudioConfig(use_default_microphone=True)
# speech_recognizer =
speech_sdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
# print('Speak now...')
# Configure speech recognition
current_dir = os.getcwd()
audioFile = current_dir + '\\time.wav'
playsound(audioFile)
audio_config = speech_sdk.AudioConfig(filename=audioFile)
speech_recognizer = speech_sdk.SpeechRecognizer(speech_config, audio_config)
# Process speech input
speech = speech_recognizer.recognize_once_async().get()
if speech.reason == speech_sdk.ResultReason.RecognizedSpeech:
command = speech.text
print(command)
else:
print(speech.reason)
if speech.reason == speech_sdk.ResultReason.Canceled:
cancellation = speech.cancellation_details
print(cancellation.reason)
print(cancellation.error_details)
# Return the command
return command
def TellTime():
now = datetime.now()
response_text = 'The time is {}:{:02d}'.format(now.hour, now.minute)
# Configure speech synthesis
speech_config.speech_synthesis_voice_name = "en-GB-RyanNeural"
speech_synthesizer = speech_sdk.SpeechSynthesizer(speech_config=speech_config)
# Synthesize spoken output
# speak = speech_synthesizer.speak_text_async(response_text).get()
# if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted:
# print(speak.reason)
# Synthesize spoken output
responseSsml = " \
<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'> \
<voice name='en-GB-LibbyNeural'> \
{} \
<break strength='weak'/> \
Time to end this lab! \
</voice> \
</speak>".format(response_text)
speak = speech_synthesizer.speak_ssml_async(responseSsml).get()
if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted:
print(speak.reason)
# Print the response
print(response_text)
if __name__ == "__main__":
main()
OutPut:
No comments:
Post a Comment