unit NLPUtils;

{
    Part of AdvancedChatAI.
    For GNU/Linux 64 bit version.
    Version: 1.
    Written on FreePascal (https://freepascal.org/).
    Copyright (C) 2025-2026 Artyomov Alexander
    Used https://chat.deepseek.com/
    http://self-made-free.ru/
    aralni@mail.ru

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
}

{$MODE OBJFPC}{$H+}{$RANGECHECKS ON}

interface

uses
  SysUtils, StrUtils,types;

type
  TStringArray = array of String;  // Объявляем тип для array of String
  TDoubleArray = array of Double;  // Объявляем тип для array of Double

function Tokenize(const text: String): TStringArray;
function BagOfWords(const text: String; const vocabulary: TStringArray): TDoubleArray;

implementation

function Tokenize(const text: String): TStringArray;
begin
  Result := SplitString(text, ' ');
end;

function BagOfWords(const text: String; const vocabulary: TStringArray): TDoubleArray;
var
  i: Integer;
begin
  SetLength(Result, Length(vocabulary));
  for i := 0 to High(vocabulary) do
  begin
    if Pos(vocabulary[i], text) > 0 then
      Result[i] := 1.0
    else
      Result[i] := 0.0;
  end;
end;

end.