Friday, January 24, 2025

Azure AI services - Document intelligence - Extract Data from Forms

1. Create Azure AI services - Document intelligence in Azure Portal
2. Run Powershell script to create Storage account
3. Train the Model
4. Test the Model 

Required Dlls;
dotnet add package Azure.Core --version 1.44.1
dotnet add package Azure.AI.FormRecognizer --version 4.1.0
dotnet add package Azure.AI.FormRecognizer --version 3.0.0
dotnet add package Tabulate.NET --version 1.0.5

@echo off

rem Set variable values
set subscription_id=129b2bb6-asdf-asdf-83ba-85bf570bebca
set resource_group=rg1
set location=eastus
set expiry_date=2026-01-01T00:00:00Z

rem Get random numbers to create unique resource names
set unique_id=!random!!random!

rem Create a storage account in your Azure resource group
echo Creating storage...
call az storage account create --name ai102form!unique_id! --subscription !subscription_id! --resource-group !resource_group! --location !location! --sku Standard_LRS --encryption-services blob --default-action Allow --output none --allow-blob-public-access true

echo Uploading files...
rem Get storage key to create a container in the storage account
for /f "tokens=*" %%a in (
'az storage account keys list --subscription !subscription_id! --resource-group !resource_group! --account-name ai102form!unique_id! --query "[?keyName=='key1'].{keyName:keyName, permissions:permissions, value:value}"'
) do (
set key_json=!key_json!%%a
set key_string=!key_json:[ { "keyName": "key1", "permissions": "Full", "value": "=!
set AZURE_STORAGE_KEY=!key_string:" } ]=!
rem Create container
call az storage container create --account-name ai102form!unique_id! --name sampleforms --auth-mode key --account-key %AZURE_STORAGE_KEY% --output none
rem Upload files from your local sampleforms folder to a container called sampleforms in the storage account
rem Each file is uploaded as a blob
call az storage blob upload-batch -d sampleforms -s ./sample-forms --account-name ai102form!unique_id! --auth-mode key --account-key %AZURE_STORAGE_KEY%  --output none
rem Set a variable value for future use
set STORAGE_ACCT_NAME=ai102form!unique_id!

rem Get a Shared Access Signature (a signed URI that points to one or more storage resources) for the blobs in sampleforms  
for /f "tokens=*" %%a in (
'az storage container generate-sas --account-name ai102form!unique_id! --name sampleforms --expiry !expiry_date! --permissions rwl'
) do (
set SAS_TOKEN=%%a

rem Print the generated Shared Access Signature URI, which is used by Azure Storage to authorize access to the storage resource
echo -------------------------------------
echo SAS URI: !URI!

Run the code : dotnet run


3. Train the Model

using System;
using System.IO;
using System.Collections.Generic;
using System.Threading.Tasks;
using Microsoft.Extensions.Configuration;

// import namespaces
using Azure;
using Azure.AI.FormRecognizer;
using Azure.AI.FormRecognizer.Models;
using Azure.AI.FormRecognizer.Training;

namespace train_model
    class Program
        static async Task Main(string[] args)
                // Get configuration settings
                // IConfigurationBuilder builder = new ConfigurationBuilder().AddJsonFile("appsettings.json");
                // IConfigurationRoot configuration = builder.Build();
                // string formEndpoint = configuration["FormEndpoint"];
                // string formKey = configuration["FormKey"];
                // string trainingStorageUri = configuration["StorageUri"];

                string formEndpoint = "";
                // "YOUR_FORM_RECOGNIZER_KEY"
                string formKey = "1E7gEDsZ2pUAiximoBAACYeBjFXJ3w3AAALACOGu5mm";
                // "YOUR_SAS_URI"
                string trainingStorageUri = "https://8IqQY2WOeCJRHTPFg%3D";

                // Authenticate Form Training Client
                var credential = new AzureKeyCredential(formKey);
                var trainingClient = new FormTrainingClient(new Uri(formEndpoint), credential);

                // Train model
                CustomFormModel model = await trainingClient
                .StartTrainingAsync(new Uri(trainingStorageUri), useTrainingLabels: true)

                // Get model info
                Console.WriteLine($"Custom Model Info:");
                Console.WriteLine($"    Model Id: {model.ModelId}");
                Console.WriteLine($"    Model Status: {model.Status}");
                Console.WriteLine($"    Training model started on: {model.TrainingStartedOn}");
                Console.WriteLine($"    Training model completed on: {model.TrainingCompletedOn}");
            catch (Exception ex)

4. Test the Model 

using System;
using System.IO;
using System.Collections.Generic;
using System.Threading.Tasks;
using Microsoft.Extensions.Configuration;

// import namespaces
using Azure;
using Azure.AI.FormRecognizer;
using Azure.AI.FormRecognizer.Models;
using Azure.AI.FormRecognizer.Training;

namespace test_model
    class Program
        static async Task Main(string[] args)
                // Get configuration settings from AppSettings
                // IConfigurationBuilder builder = new ConfigurationBuilder().AddJsonFile("appsettings.json");
                // IConfigurationRoot configuration = builder.Build();
                // string formEndpoint = configuration["FormEndpoint"];
                // string formKey = configuration["FormKey"];
                // string modelId = configuration["ModelId"];

                // "YOUR_FORM_RECOGNIZER_ENDPOINT";                 string formEndpoint = "";
                // "YOUR_FORM_RECOGNIZER_KEY";                 string formKey = "1E7gEDsZ2pUAiximAAALACOGu5mm";
                // "YOUR_MODEL_ID";
                string modelId = "7891e019-9cc2-48a9-a9e6-08ac408484c5";

                // Authenticate Azure AI Document Intelligence Client
                var credential = new AzureKeyCredential(formKey);
                var recognizerClient = new FormRecognizerClient(new Uri(formEndpoint), credential);

                // Get form url for testing  
                string image_file = "test1.jpg";
                using (var image_data = File.OpenRead(image_file))
                    // Use trained model with new form
                    RecognizedFormCollection forms = await recognizerClient
                    .StartRecognizeCustomForms(modelId, image_data)

                    foreach (RecognizedForm form in forms)
                        Console.WriteLine($"Form of type: {form.FormType}");
                        foreach (FormField field in form.Fields.Values)
                            Console.WriteLine($"Field '{field.Name}':");

                            if (field.LabelData != null)
                                Console.WriteLine($"    Label: '{field.LabelData.Text}'");

                            Console.WriteLine($"    Value: '{field.ValueData.Text}'");
                            Console.WriteLine($"    Confidence: {field.Confidence}");
            catch (Exception ex)


