unit SpeechRecognition;

// This is a version of Jim McKeeth's Text To Speech component, which can be found online at:
// https://github.com/jimmckeeth/FireMonkey-Android-Voice/tree/master/Components

interface

uses
  System.SysUtils, System.Classes, System.Messaging;

type
  TGuesses = array of String;
  TConfidenceScores = array of Single;
  TRecognitionResult = record
    Guess: string;
    Confidence: Single;
  end;
  TRecognitionResults = array of TRecognitionResult;
  TRecognitionEvent = procedure(Sender: TObject; Guess: String; Confidence: Single) of object;
  TRecognitionCommandEvent = procedure(Sender: TObject; Guess: String) of object;
  TRecognitionEventEx = procedure(Sender: TObject; Guesses: TRecognitionResults) of object;

  TSpeechRecognition = class(TComponent)
  private
  const
    RecognizerRequestCode = 987654321; //arbitrary value
    DefaultPrompt = 'Speak now';
    DefaultLanguage = 'en-US';
  private
    FListening: Boolean;
    FLanguage: String;
    FOnRecognition: TRecognitionEvent;
    FOnRecognitionEx: TRecognitionEventEx;
    FPrompt: String;
    FTempCommands: TStrings;
    FOnCommand: TRecognitionCommandEvent;
    FAlwaysGuesses: Boolean;
    FSubscriptionCookie: Integer;
    procedure IntentCallback(const Sender: TObject; const M: TMessage);
    procedure DoListen;
    procedure ProcessesGuesses(AGuesses: TRecognitionResults);
    function StorePrompt: Boolean;
    function StoreLanguage: Boolean;
  public
    procedure Listen; overload;
    procedure ListenFor(const ACommands: TStrings); overload;
    procedure ListenFor(const ACommands: TGuesses); overload;
    constructor Create(AOwner: TComponent); override;
    destructor Destroy; override;
  published
    property Prompt: String read FPrompt write FPrompt stored StorePrompt;
    property AlwaysGuesses: Boolean read FAlwaysGuesses write FAlwaysGuesses default True;
    property Language: String read FLanguage write FLanguage stored StoreLanguage;
    property OnRecognition: TRecognitionEvent read FOnRecognition write FOnRecognition;
    property OnRecognitionEx: TRecognitionEventEx read FOnRecognitionEx write FOnRecognitionEx;
    property OnCommand: TRecognitionCommandEvent read FOnCommand write FOnCommand;
  end;

procedure Register;

implementation

{$IFDEF ANDROID}
uses
  Androidapi.JNI.speech, FMX.Helpers.Android, FMX.Platform.Android,
  Androidapi.JNIBridge, Androidapi.JNI.Os,
  Androidapi.JNI.GraphicsContentViewText, Androidapi.JNI.JavaTypes,
  Androidapi.Helpers, Androidapi.JNI.App;
{$ENDIF}

procedure Register;
begin
  RegisterComponents('Android', [TSpeechRecognition]);
end;

{ TSpeechRecognition }

constructor TSpeechRecognition.Create(AOwner: TComponent);
begin
  inherited;
  FLanguage := DefaultLanguage;
  FPrompt := DefaultPrompt;
  FTempCommands := TStringList.Create;
  FListening := False;
  FAlwaysGuesses := True;
  {$IFDEF ANDROID}
  if TAndroidHelper.Activity.getPackageManager.queryIntentActivities(
    TJIntent.JavaClass.init(TJRecognizerIntent.JavaClass.ACTION_RECOGNIZE_SPEECH), 0).size = 0 then
  begin
    // Recogniser not installed
    FSubscriptionCookie := -1;
  end
  else
  begin
    FSubscriptionCookie := TMessageManager.DefaultManager.SubscribeToMessage(
      TMessageResultNotification, IntentCallback);
  end;
  {$ENDIF}
end;

destructor TSpeechRecognition.Destroy;
begin
  FTempCommands.Free;
  {$IFDEF ANDROID}
  if FSubscriptionCookie <> -1 then
  begin
    TMessageManager.DefaultManager.Unsubscribe(TMessageResultNotification, IntentCallback);
  end;
  {$ENDIF}
  inherited;
end;

procedure TSpeechRecognition.DoListen;
{$IFDEF ANDROID}
var
  RecognizerIntent: JIntent;
  {$ENDIF}
begin
  FListening := True;
  {$IFDEF ANDROID}
  if FSubscriptionCookie <> -1 then
  begin
    RecognizerIntent := TJIntent.JavaClass.init(TJRecognizerIntent.JavaClass.ACTION_RECOGNIZE_SPEECH);
    RecognizerIntent.putExtra(TJRecognizerIntent.JavaClass.EXTRA_LANGUAGE_MODEL,
                        TJRecognizerIntent.JavaClass.LANGUAGE_MODEL_FREE_FORM);
    RecognizerIntent.putExtra(TJRecognizerIntent.JavaClass.EXTRA_PROMPT,
                        StringToJString(FPrompt));
    RecognizerIntent.putExtra(TJRecognizerIntent.JavaClass.EXTRA_MAX_RESULTS, 5); // default 5
    RecognizerIntent.putExtra(TJRecognizerIntent.JavaClass.EXTRA_LANGUAGE,
                        StringToJString(FLanguage));

    MainActivity.startActivityForResult(RecognizerIntent, RecognizerRequestCode);
  end;
  {$ENDIF}
end;

function TSpeechRecognition.StoreLanguage: Boolean;
begin
  result := FLanguage <> DefaultLanguage;
end;

function TSpeechRecognition.StorePrompt: Boolean;
begin
  Result := FPrompt <> DefaultPrompt;
end;

{$IFDEF ANDROID}
function GetTextFromRecognizer(Intent: JIntent): TRecognitionResults;
var
  guesses: JArrayList;
  guess: JObject;
  confidenceScores: TJavaArray<Single>;
  x: Integer;
begin
  guesses := intent.getStringArrayListExtra(TJRecognizerIntent.JavaClass.EXTRA_RESULTS);
  confidenceScores := intent.getFloatArrayExtra(TJRecognizerIntent.JavaClass.EXTRA_CONFIDENCE_SCORES);
  setlength(Result, guesses.size);
  for x := 0 to guesses.Size - 1 do
  begin
    guess := guesses.get(x);
    Result[x].Guess := JStringToString(guess.toString);
    Result[x].Confidence := confidenceScores[x];
  end;
end;
{$ENDIF}

procedure TSpeechRecognition.IntentCallback(const Sender: TObject; const M: TMessage);
{$IFDEF ANDROID}
var
  Notification: TMessageResultNotification;
  Guesses: TRecognitionResults;
  {$ENDIF}
begin
  FListening := False;
  {$IFDEF ANDROID}
  // onActivityResult message received, process it
  Notification := TMessageResultNotification(M);
  if (Notification.ResultCode = TJActivity.JavaClass.RESULT_OK) and
     (Notification.RequestCode = RecognizerRequestCode) then
  begin
    Guesses := GetTextFromRecognizer(Notification.Value);
    ProcessesGuesses(Guesses);
  end;
  {$ENDIF}
end;

function MatchGuessesToCommands(AGuesses: TRecognitionResults; ACommands: TStrings): string;
var
  c, g: Integer;
begin
  for c := 0 to Pred(ACommands.Count) do
    for g := 0 to Pred(Length(AGuesses)) do
      if SameText(AGuesses[g].Guess, ACommands[c]) then
        exit(ACommands[c]);
  Result := '';
end;

procedure TSpeechRecognition.ProcessesGuesses(AGuesses: TRecognitionResults);
var
  Command: String;
  LookingForCommand: Boolean;
begin
  if (FTempCommands.Count > 0) and Assigned(FOnCommand) then
  begin
    LookingForCommand := True;
    if FTempCommands.Count > 0 then
    begin
      Command := MatchGuessesToCommands(AGuesses, FTempCommands);
      FOnCommand(self, Command);
    end;
  end
  else
    LookingForCommand := False;
  if not LookingForCommand or FAlwaysGuesses then
  begin
    if Assigned(FOnRecognition) then
    begin
      if Length(AGuesses) > 0 then
        FOnRecognition(Self, AGuesses[0].Guess, AGuesses[0].Confidence)
      else
        FOnRecognition(Self, '', 0);
    end;
    if Assigned(FOnRecognitionEx) then FOnRecognitionEx(Self, AGuesses);
  end;
end;


procedure TSpeechRecognition.Listen;
begin
  if FListening then Exit;
  FTempCommands.Clear;
  DoListen;
end;

procedure TSpeechRecognition.ListenFor(const ACommands: TStrings);
begin
  if not FListening then
  begin
    if ACommands <> nil then
      FTempCommands.Assign(ACommands)
    else
      FTempCommands.Clear;
  end;
  DoListen;
end;

procedure TSpeechRecognition.ListenFor(const ACommands: TGuesses);
var
  i: Integer;
begin
  if FListening then Exit;
  FTempCommands.Clear;
  for i := 0 to Pred(Length(ACommands)) do
    FTempCommands.Add(ACommands[i]);
  DoListen;
end;

end.
