{$MODE OBJFPC}{$H+}{$OPTIMIZATION LEVEL3}

{
    Part of AdvancedChatAI.
    For GNU/Linux 64 bit version.
    Version: 1.
    Written on FreePascal (https://freepascal.org/).
    Copyright (C) 2025-2026 Artyomov Alexander
    Used https://chat.deepseek.com/
    http://self-made-free.ru/
    aralni@mail.ru

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
}

program ii3_embed;

uses
  SysUtils, Classes, GetOpts, fpjson, jsonparser, LinearAlgebra,Math;

const
  Version = '1.1';
  FormatText = 0;
  FormatBinary = 1;
  FormatJSON = 2;

var
  OptDim: Integer = 128;
  OptInput: string = '';
  OptOutput: string = '';
  OptVerbose: Boolean = False;
  OptDeterministic: Boolean = False;
  OptFormat: Integer = FormatText;

procedure PrintHelp;
begin
  WriteLn('ii3-embed v', Version, ' - генератор векторных представлений');
  WriteLn('Использование: ii3-embed [опции]');
  WriteLn('Опции:');
  WriteLn('  -d, --dim=N       Размерность вектора (по умолчанию: 128)');
  WriteLn('  -i, --input=FILE  Входной файл (stdin если не указан)');
  WriteLn('  -o, --output=FILE Выходной файл (stdout если не указан)');
  WriteLn('  -b, --binary      Бинарный формат вывода');
  WriteLn('  -j, --json        JSON формат вывода');
  WriteLn('  -s, --deterministic Детерминированный режим');
  WriteLn('  -v, --verbose     Подробный вывод');
  WriteLn('  -h, --help        Эта справка');
  WriteLn('Примеры:');
  WriteLn('  echo "текст" | ./ii3-embed -d 256 -b > data.bin');
  WriteLn('  ./ii3-embed -i input.txt -o vectors.json -j');
end;

function EmbedText(const Text: string; Dimensions: Integer): TDoubleArray;
var
  i, hash: Integer;
begin
  if OptDeterministic then
  begin
    SetLength(Result, Dimensions);
    hash := 0;
    for i := 1 to Length(Text) do
      hash := hash + Ord(Text[i]);
    
    for i := 0 to High(Result) do
      Result[i] := Frac((hash * (i+1)) * 0.618033988749895);
  end
  else
  begin
    Result := RandomVector(Dimensions);
    for i := 0 to Min(Length(Text), Dimensions)-1 do
      Result[i] := Result[i] + Ord(Text[i+1]) / 256.0;
  end;
  
  Result := NormalizeVector(Result);
end;

procedure SaveBinary(const Vector: TDoubleArray; const Filename: string);
var
  f: File;
  size: Integer;
begin
  AssignFile(f, Filename);
  Rewrite(f, 1);
  try
    size := Length(Vector);
    BlockWrite(f, size, SizeOf(Integer));
    BlockWrite(f, Vector[0], size * SizeOf(Double));
  finally
    CloseFile(f);
  end;
end;

function VectorToJSON(const Vector: TDoubleArray): string;
var
  json: TJSONArray;
  i: Integer;
begin
  json := TJSONArray.Create;
  try
    for i := 0 to High(Vector) do
      json.Add(Vector[i]);
    Result := json.AsJSON;
  finally
    json.Free;
  end;
end;

procedure ProcessInput;
var
  Input: TextFile;
  Output: TextFile;
  OutputBin: File;
  Line: string;
  Vector: TDoubleArray;
i:Integer;
begin
  // Настройка ввода
  if OptInput = '' then
    Assign(Input, '')
  else
    Assign(Input, OptInput);
  Reset(Input);

  // Настройка вывода
  case OptFormat of
    FormatBinary:
      begin
        if OptOutput = '' then
        begin
          WriteLn(ErrOutput, 'Бинарный вывод требует указания файла');
          Halt(1);
        end;
        AssignFile(OutputBin, OptOutput);
        Rewrite(OutputBin, 1);
      end;
    else
      if OptOutput = '' then
        Assign(Output, '')
      else
        Assign(Output, OptOutput);
      Rewrite(Output);
  end;

  try
    while not Eof(Input) do
    begin
      ReadLn(Input, Line);
      Line := Trim(Line);
      if Line = '' then Continue;
      
      Vector := EmbedText(Line, OptDim);
      
      case OptFormat of
        FormatBinary:
          begin
            BlockWrite(OutputBin, Vector[0], Length(Vector) * SizeOf(Double));
          end;
        FormatJSON:
          begin
            WriteLn(Output, VectorToJSON(Vector));
          end;
        else // Text
          begin
            Write(Output, Line, #9);
            for i := 0 to High(Vector) do
            begin
              if i > 0 then Write(Output, ' ');
              Write(Output, Format('%.6f', [Vector[i]]));
            end;
            WriteLn(Output);
          end;
      end;
      
      if OptVerbose then
        WriteLn(ErrOutput, 'Обработано: ', Line);
    end;
  finally
    Close(Input);
    case OptFormat of
      FormatBinary: CloseFile(OutputBin);
      else Close(Output);
    end;
  end;
end;

var
  c: Char;
begin
  // Разбор аргументов
  repeat
    c := GetOpt('d:i:o:bjsvh');
    case c of
      'd': OptDim := StrToIntDef(OptArg, 128);
      'i': OptInput := OptArg;
      'o': OptOutput := OptArg;
      'b': OptFormat := FormatBinary;
      'j': OptFormat := FormatJSON;
      's': OptDeterministic := True;
      'v': OptVerbose := True;
      'h', '?':
        begin
          PrintHelp;
          Halt(0);
        end;
    end;
  until c = EndOfOptions;

  try
    ProcessInput;
  except
    on E: Exception do
    begin
      WriteLn(ErrOutput, 'Ошибка: ', E.Message);
      Halt(1);
    end;
  end;
end.