Azure AI services - Read Text in Images:
Source :
https://github.com/MicrosoftLearning/AI-102-AIEngineer
1. Azure AI services | Azure AI services multi-service account
https://github.com/MicrosoftLearning/AI-102-AIEngineer
1. Azure AI services | Azure AI services multi-service account
C# Code:
using Microsoft.Azure.CognitiveServices.Vision.ComputerVision;
using Microsoft.Azure.CognitiveServices.Vision.ComputerVision.Models;
using System;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
// dotnet add package Microsoft.Azure.CognitiveServices.Vision.ComputerVision --version 6.0.0
namespace read_text
{
class Program
{
private static ComputerVisionClient cvClient;
static async Task Main(string[] args)
{
try
{
// Get config settings from AppSettings
// IConfigurationBuilder builder =
new ConfigurationBuilder().AddJsonFile("appsettings.json");
// IConfigurationRoot configuration = builder.Build();
string cogSvcEndpoint = "https://multiserviceaccount1.cognitiveservices.azure.com/";
// configuration["CognitiveServicesEndpoint"];
string cogSvcKey = "AjShjnYv3s56Ne4keUlZIqXJ799BAACYeBjFXJ3w3AAAEACOGAIif";
// configuration["CognitiveServiceKey"];
ApiKeyServiceClientCredentials credentials =
new ApiKeyServiceClientCredentials(cogSvcKey);
cvClient = new ComputerVisionClient(credentials)
{
Endpoint = cogSvcEndpoint
};
// Menu for text reading functions
Console.WriteLine("1: Use Read API for image\n2: Use Read API for document\n3:
Read handwriting\nAny other key to quit");
Console.WriteLine("Enter a number:");
string command = Console.ReadLine();
string imageFile;
switch (command)
{
case "1":
imageFile = "images/Lincoln.jpg";
await GetTextRead(imageFile);
break;
case "2":
imageFile = "images/Rome.pdf";
await GetTextRead(imageFile);
break;
case "3":
imageFile = "images/Note.jpg";
await GetTextRead(imageFile);
break;
default:
break;
}
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
static async Task GetTextRead(string imageFile)
{
Console.WriteLine($"Reading text in {imageFile}\n");
// Use Read API to read text in image
using (var imageData = File.OpenRead(imageFile))
{
var readOp = await cvClient.ReadInStreamAsync(imageData);
// Get the async operation ID so we can check for the results
string operationLocation = readOp.OperationLocation;
string operationId = operationLocation.Substring(operationLocation.Length - 36);
// Wait for the asynchronous operation to complete
ReadOperationResult results;
do
{
Thread.Sleep(1000);
results = await cvClient.GetReadResultAsync(Guid.Parse(operationId));
}
while ((results.Status == OperationStatusCodes.Running ||
results.Status == OperationStatusCodes.NotStarted));
// If the operation was successfully, process the text line by line
if (results.Status == OperationStatusCodes.Succeeded)
{
var textUrlFileResults = results.AnalyzeResult.ReadResults;
foreach (ReadResult page in textUrlFileResults)
{
foreach (Line line in page.Lines)
{
Console.WriteLine(line.Text);
// Uncomment the following line if you'd like to see the bounding box
//Console.WriteLine(line.BoundingBox);
}
}
}
}
}
}
}
Python Code:
# pip install python-dotenv
# pip install pillow
# pip install matplotlib
# pip install azure-cognitiveservices-vision-computervision==0.7.0
from dotenv import load_dotenv
import os
import time
from PIL import Image, ImageDraw
from matplotlib import pyplot as plt
# Import namespaces
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from msrest.authentication import CognitiveServicesCredentials
def main():
global cv_client
try:
# Get Configuration Settings
load_dotenv()
cog_endpoint = "https://multiserviceaccount1.cognitiveservices.azure.com/"
#os.getenv('COG_SERVICE_ENDPOINT')
cog_key = "AjShjnYv3s56Ne4keUlZBAACYeBjFXJ3w3AAAEACOGAIif"
# os.getenv('COG_SERVICE_KEY')
# Authenticate Azure AI Vision client
credential = CognitiveServicesCredentials(cog_key)
cv_client = ComputerVisionClient(cog_endpoint, credential)
# Menu for text reading functions
print('1: Use Read API for image\n2: Use Read API for document\n3: Read
handwriting\nAny other key to quit')
command = input('Enter a number:')
if command == '1':
image_file = os.path.join('images','Lincoln.jpg')
GetTextRead(image_file)
elif command =='2':
image_file = os.path.join('images','Rome.pdf')
GetTextRead(image_file)
elif command =='3':
image_file = os.path.join('images','Note.jpg')
GetTextRead(image_file)
except Exception as ex:
print(ex)
def GetTextRead(image_file):
print('Reading text in {}\n'.format(image_file))
# Use Read API to read text in image
with open(image_file, mode="rb") as image_data:
read_op = cv_client.read_in_stream(image_data, raw=True)
# Get the async operation ID so we can check for the results
operation_location = read_op.headers["Operation-Location"]
operation_id = operation_location.split("/")[-1]
# Wait for the asynchronous operation to complete
while True:
read_results = cv_client.get_read_result(operation_id)
if read_results.status not in [OperationStatusCodes.running,
OperationStatusCodes.not_started]:
break
time.sleep(1)
# If the operation was successfully, process the text line by line
if read_results.status == OperationStatusCodes.succeeded:
for page in read_results.analyze_result.read_results:
for line in page.lines:
print(line.text)
# Uncomment the following line if you'd like to see the bounding box
# print(line.bounding_box)
if __name__ == "__main__":
main()
Input:
Output:
Input:
Save as PDF file.
Output:
Input:
Output:
No comments:
Post a Comment