Azure AI services - Speech service
Source: https://github.com/MicrosoftLearning/mslearn-ai-language
1. Create 'Speech service' in Azure - copy key and region
C# Code:
Python Code:
Source: https://github.com/MicrosoftLearning/mslearn-ai-language
1. Create 'Speech service' in Azure - copy key and region
C# Code:
dotnet add package Microsoft.CognitiveServices.Speech --version 1.30.0
dotnet add package System.Windows.Extensions --version 4.6.0
OutPut:
using System;
using System.Threading.Tasks;
using Microsoft.Extensions.Configuration;
using System.Collections.Generic;
using System.Text;
// Import namespaces
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using Microsoft.CognitiveServices.Speech.Translation;
using System.Media;
namespace speech_translation
{
class Program
{
private static SpeechConfig speechConfig;
private static SpeechTranslationConfig translationConfig;
static async Task Main(string[] args)
{
try
{
// Get config settings from AppSettings
IConfigurationBuilder builder =
new ConfigurationBuilder().AddJsonFile("appsettings.json");
IConfigurationRoot configuration = builder.Build();
string aiSvcKey = "1RBACOGNMJb"; //configuration["SpeechKey"];
string aiSvcRegion = "eastus"; //configuration["SpeechRegion"];
// Set console encoding to unicode
Console.InputEncoding = Encoding.Unicode;
Console.OutputEncoding = Encoding.Unicode;
// Configure translation
translationConfig = SpeechTranslationConfig.FromSubscription(aiSvcKey,
aiSvcRegion);
translationConfig.SpeechRecognitionLanguage = "en-US";
translationConfig.AddTargetLanguage("fr");
translationConfig.AddTargetLanguage("es");
translationConfig.AddTargetLanguage("hi");
Console.WriteLine("Ready to translate from " +
translationConfig.SpeechRecognitionLanguage);
// Configure speech
speechConfig = SpeechConfig.FromSubscription(aiSvcKey, aiSvcRegion);
string targetLanguage = "";
while (targetLanguage != "quit")
{
Console.WriteLine("\nEnter a target language\n fr = French\n es =
Spanish\n hi = Hindi\n Enter anything else to stop\n");
targetLanguage = Console.ReadLine().ToLower();
if (translationConfig.TargetLanguages.Contains(targetLanguage))
{
await Translate(targetLanguage);
}
else
{
targetLanguage = "quit";
}
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
static async Task Translate(string targetLanguage)
{
string translation = "";
// Translate speech
using AudioConfig audioConfig = AudioConfig.FromDefaultMicrophoneInput();
using TranslationRecognizer translator = new TranslationRecognizer(translationConfig,
audioConfig);
Console.WriteLine("Speak now...");
TranslationRecognitionResult result = await translator.RecognizeOnceAsync();
Console.WriteLine($"Translating '{result.Text}'");
translation = result.Translations[targetLanguage];
Console.OutputEncoding = Encoding.UTF8;
Console.WriteLine(translation);
// Translate speech
// string audioFile = "station.wav";
// SoundPlayer wavPlayer = new SoundPlayer(audioFile);
// wavPlayer.Play();
// using AudioConfig audioConfig = AudioConfig.FromWavFileInput(audioFile);
// using TranslationRecognizer translator = new TranslationRecognizer(translationConfig,
audioConfig);
// Console.WriteLine("Getting speech from file...");
// TranslationRecognitionResult result = await translator.RecognizeOnceAsync();
// Console.WriteLine($"Translating '{result.Text}'");
// translation = result.Translations[targetLanguage];
// Console.OutputEncoding = Encoding.UTF8;
// Console.WriteLine(translation);
// Synthesize translation
var voices = new Dictionary<string, string>
{
["fr"] = "fr-FR-HenriNeural",
["es"] = "es-ES-ElviraNeural",
["hi"] = "hi-IN-MadhurNeural"
};
speechConfig.SpeechSynthesisVoiceName = voices[targetLanguage];
using SpeechSynthesizer speechSynthesizer = new SpeechSynthesizer(speechConfig);
SpeechSynthesisResult speak = await speechSynthesizer.SpeakTextAsync(translation);
if (speak.Reason != ResultReason.SynthesizingAudioCompleted)
{
Console.WriteLine(speak.Reason);
}
}
}
}
OutPut:
Python Code:
pip install azure-cognitiveservices-speech==1.30.0
pip install playsound==1.3.0
from dotenv import load_dotenv
from datetime import datetime
import os
# Import namespaces
import azure.cognitiveservices.speech as speech_sdk
def main():
try:
global speech_config
global translation_config
# Get Configuration Settings
load_dotenv()
ai_key = '1RBACOGNMJb' #os.getenv('SPEECH_KEY')
ai_region = 'eastus' # os.getenv('SPEECH_REGION')
# Configure translation
translation_config = speech_sdk.translation.SpeechTranslationConfig(subscription=ai_key,
region=ai_region)
translation_config.speech_recognition_language = 'en-US'
translation_config.add_target_language('fr')
translation_config.add_target_language('es')
translation_config.add_target_language('hi')
print('Ready to translate from', translation_config.speech_recognition_language)
# Configure speech
speech_config = speech_sdk.SpeechConfig(subscription=ai_key, region=ai_region)
# Get user input
targetLanguage = ''
while targetLanguage != 'quit':
targetLanguage = input('\nEnter a target language\n fr = French\n es = Spanish\n hi =
Hindi\n Enter anything else to stop\n').lower()
if targetLanguage in translation_config.target_languages:
Translate(targetLanguage)
else:
targetLanguage = 'quit'
except Exception as ex:
print(ex)
def Translate(targetLanguage):
translation = ''
# Translate speech
audio_config = speech_sdk.AudioConfig(use_default_microphone=True)
translator = speech_sdk.translation.TranslationRecognizer(translation_config=translation_config,
audio_config=audio_config)
print("Speak now...")
result = translator.recognize_once_async().get()
if result.reason == speech_sdk.ResultReason.TranslatedSpeech:
print('Translating "{}"'.format(result.text))
for language, translation in result.translations.items():
print('Translation in {}: {}'.format(language, translation))
else:
print("No speech could be recognized or translation failed.")
# Synthesize translation
voices = {
"fr": "fr-FR-HenriNeural",
"es": "es-ES-ElviraNeural",
"hi": "hi-IN-MadhurNeural"
}
# Assuming `targetLanguage` and `translation` are defined
targetLanguage = "fr" # Replace this with the actual target language
translation = "Bonjour tout le monde" # Replace this with the actual translation
# Set the speech synthesis voice name based on the target language
speech_config.speech_synthesis_voice_name = voices.get(targetLanguage)
# Initialize the speech synthesizer
speech_synthesizer = speech_sdk.SpeechSynthesizer(speech_config)
# Synthesize the translated text to speech
speak = speech_synthesizer.speak_text_async(translation).get()
# Check if the synthesis was successful
if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted:
print(speak.reason)
else:
print("Speech synthesis completed successfully.")
if __name__ == "__main__":
main()