Microsoft Power Platform, SharePoint, Azure, AWS, Google Cloud, DevOps, AI, ML: Azure AI services

Wednesday, January 29, 2025

Azure AI services - Recognize and synthesize speech

Azure AI services - Recognize and synthesize speech:

1. Create 'Speech service' in Azure

C# Code:

using System;
using System.Threading.Tasks;
using Microsoft.Extensions.Configuration;
using System.Media;

// Import namespaces
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;

// dotnet add package Microsoft.CognitiveServices.Speech --version 1.30.0
// dotnet add package System.Windows.Extensions --version 4.6.0 

namespace speaking_clock
{
    class Program
    {
        private static SpeechConfig speechConfig;
        static async Task Main(string[] args)
        {
            try
            {
                // Get config settings from AppSettings
                // IConfigurationBuilder builder = 
                new ConfigurationBuilder().AddJsonFile("appsettings.json");
                // IConfigurationRoot configuration = builder.Build();
                string aiSvcKey = "C7A2c2oHTErWxabILyi7SCucDFXJ3w3AAAYACOGSRaz"; 
                //configuration["SpeechKey"];
                string aiSvcRegion = "eastus"; // configuration["SpeechRegion"];

                // Configure speech service
                speechConfig = SpeechConfig.FromSubscription(aiSvcKey, aiSvcRegion);
                Console.WriteLine("Ready to use speech service in " + speechConfig.Region);

                // Configure voice
                speechConfig.SpeechSynthesisVoiceName = "en-US-AriaNeural";

                // Get spoken input
                string command = "";
                command = "what time is it?"; //await TranscribeCommand();
                if (command.ToLower() == "what time is it?")
                {
                    await TellTime();
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.Message);
            }
        }

        static async Task<string> TranscribeCommand()
        {
            string command = "";

            // Configure speech recognition
            using AudioConfig audioConfig = AudioConfig.FromDefaultMicrophoneInput();
            using SpeechRecognizer speechRecognizer = 
            new SpeechRecognizer(speechConfig, audioConfig);
            Console.WriteLine("Speak now...");

            // Configure speech recognition
            // string audioFile = "time.wav";
            // SoundPlayer wavPlayer = new SoundPlayer(audioFile);
            // wavPlayer.Play();
            // using AudioConfig audioConfig = AudioConfig.FromWavFileInput(audioFile);
            // using SpeechRecognizer speechRecognizer = 
            new SpeechRecognizer(speechConfig, audioConfig);

            // Process speech input
            SpeechRecognitionResult speech = await speechRecognizer.RecognizeOnceAsync();
            if (speech.Reason == ResultReason.RecognizedSpeech)
            {
                command = speech.Text;
                Console.WriteLine(command);
            }
            else
            {
                Console.WriteLine(speech.Reason);
                if (speech.Reason == ResultReason.Canceled)
                {
                    var cancellation = CancellationDetails.FromResult(speech);
                    Console.WriteLine(cancellation.Reason);
                    Console.WriteLine(cancellation.ErrorDetails);
                }
            }

            // Return the command
            return command;
        }

        static async Task TellTime()
        {
            var now = DateTime.Now;
            string responseText = "The time is " + now.Hour.ToString() + ":" + 
            now.Minute.ToString("D2");

            // Configure speech synthesis
            speechConfig.SpeechSynthesisVoiceName = "en-GB-RyanNeural";
            using SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer(speechConfig);

            // Synthesize spoken output
            string responseSsml = $@"
                <speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'>
                    <voice name='en-GB-LibbyNeural'>
                        {responseText}
                        <break strength='weak'/>
                        Time to end this lab!
                    </voice>
                </speak>";
            SpeechSynthesisResult speak = await speechSynthesizer.SpeakSsmlAsync(responseSsml);
            if (speak.Reason != ResultReason.SynthesizingAudioCompleted)
            {
                Console.WriteLine(speak.Reason);
            }

            // Print the response
            Console.WriteLine(responseText);
        }
    }
}

OutPut:

Python Code:

from dotenv import load_dotenv  
from datetime import datetime  
from playsound import playsound
import os  

# Import namespaces  
import azure.cognitiveservices.speech as speech_sdk  

# pip install azure-cognitiveservices-speech==1.30.0
# pip install python-dotenv
# pip install playsound==1.2.2

def main():  
    try:  
        global speech_config  

        # Get Configuration Settings  
        load_dotenv()  
        ai_key = 'C7A2c2oHTErWxabILyi7SBAACYeBjFXJ3w3AAAYACOGSRaz'  # os.getenv('SPEECH_KEY')  
        ai_region = 'eastus'  # os.getenv('SPEECH_REGION')  

        # Configure speech service  
        speech_config = speech_sdk.SpeechConfig(subscription=ai_key, region=ai_region)  
        print('Ready to use speech service in:', speech_config.region)  

        # Get spoken input  
        command =  'what time is it?' #TranscribeCommand()  
        if command.lower() == 'what time is it?':  
            TellTime()  

    except Exception as ex:  
        print(ex)  

def TranscribeCommand():  
    command = ''  

    # Configure speech recognition  
    # audio_config = speech_sdk.AudioConfig(use_default_microphone=True)  
    # speech_recognizer = 
    speech_sdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)  
    # print('Speak now...')  
    
     # Configure speech recognition
    current_dir = os.getcwd()
    audioFile = current_dir + '\\time.wav'
    playsound(audioFile)
    audio_config = speech_sdk.AudioConfig(filename=audioFile)
    speech_recognizer = speech_sdk.SpeechRecognizer(speech_config, audio_config)

    # Process speech input  
    speech = speech_recognizer.recognize_once_async().get()  

    if speech.reason == speech_sdk.ResultReason.RecognizedSpeech:  
        command = speech.text  
        print(command)  
    else:  
        print(speech.reason)  
        if speech.reason == speech_sdk.ResultReason.Canceled:  
            cancellation = speech.cancellation_details  
            print(cancellation.reason)  
            print(cancellation.error_details)  

    # Return the command  
    return command  

def TellTime():  
    now = datetime.now()  
    response_text = 'The time is {}:{:02d}'.format(now.hour, now.minute)  

    # Configure speech synthesis  
    speech_config.speech_synthesis_voice_name = "en-GB-RyanNeural"  
    speech_synthesizer = speech_sdk.SpeechSynthesizer(speech_config=speech_config)  

    # Synthesize spoken output  
    # speak = speech_synthesizer.speak_text_async(response_text).get()  
    # if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted:  
    #     print(speak.reason) 
    
     # Synthesize spoken output
    responseSsml = " \
        <speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'> \
             <voice name='en-GB-LibbyNeural'> \
                 {} \
                <break strength='weak'/> \
                Time to end this lab! \
            </voice> \
        </speak>".format(response_text)
    speak = speech_synthesizer.speak_ssml_async(responseSsml).get()
    
    if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted:
        print(speak.reason) 

    # Print the response  
    print(response_text)  

if __name__ == "__main__":  
    main()

OutPut:

Microsoft Power Platform, SharePoint, Azure, AWS, Google Cloud, DevOps, AI, ML

Wednesday, January 29, 2025

Azure AI services - Recognize and synthesize speech

No comments:

Post a Comment

Featured Post

Hybrid Connections WebSockets in Azure Relay

Popular posts

Search This Blog