unit Word2VecConverterUnit;

{
    Part of AdvancedChatAI.
    For GNU/Linux 64 bit version.
    Version: 1.
    Written on FreePascal (https://freepascal.org/).
    Copyright (C) 2025-2026 Artyomov Alexander
    Used https://chat.deepseek.com/
    http://self-made-free.ru/
    aralni@mail.ru

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
}


{$MODE OBJFPC}{$H+}{$RANGECHECKS ON}
interface

procedure ConvertTextModelToBinary(const TextModelFile, BinaryModelFile: string);

implementation

uses
  SysUtils, Classes;

procedure ConvertTextModelToBinary(const TextModelFile, BinaryModelFile: string);
var
  Input: TextFile;
  Output: File;
  Line: string;
  Parts: TStringArray;
  Vector: array of Single;
  VocabSize, DimSize, i, j, WordLen: Integer;
begin
  AssignFile(Input, TextModelFile);
  Reset(Input);
  AssignFile(Output, BinaryModelFile);
  Rewrite(Output, 1);

  try
    // Читаем заголовок
    ReadLn(Input, Line);
    Parts := Line.Split([' ']);
    VocabSize := StrToInt(Parts[0]);
    DimSize := StrToInt(Parts[1]);

    // Записываем заголовок
    BlockWrite(Output, VocabSize, SizeOf(Integer));
    BlockWrite(Output, DimSize, SizeOf(Integer));

    // Конвертируем вектора
    for i := 0 to VocabSize - 1 do
    begin
      ReadLn(Input, Line);
      Parts := Line.Split([' ']);

      // Слово + вектор
      SetLength(Vector, DimSize);
      for j := 0 to DimSize - 1 do
        Vector[j] := StrToFloat(Parts[j+1]);

      // Записываем длину слова
      WordLen := Length(Parts[0]);
      BlockWrite(Output, WordLen, SizeOf(Integer));
      
      // Само слово
      BlockWrite(Output, Parts[0][1], WordLen);
      
      // Вектор
      BlockWrite(Output, Vector[0], DimSize * SizeOf(Single));
    end;
  finally
    CloseFile(Input);
    CloseFile(Output);
  end;
end;

end.