unit Word2VecOptimizer;

{
    Part of AdvancedChatAI.
    For GNU/Linux 64 bit version.
    Version: 1.
    Written on FreePascal (https://freepascal.org/).
    Copyright (C) 2025-2026 Artyomov Alexander
    Used https://chat.deepseek.com/
    http://self-made-free.ru/
    aralni@mail.ru

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
}


{$MODE OBJFPC}{$H+}
interface

uses
  SysUtils, Classes, rustringunit, MatrixOps,Math;

procedure QuantizeModel(const SourceModel, TargetModel: string; Bits: Integer = 8);
procedure ConvertToRustringFormat(const SourceModel, TargetModel: string);

implementation

procedure QuantizeModel(const SourceModel, TargetModel: string; Bits: Integer);
var
  FIn, FOut: File;
  MinVal, MaxVal, Scale: Single;
  Vec: array of Single;
  Quantized: array of Byte;
  VocabSize, DimSize, i, j: Integer;
Magic : string = 'W2VQ';
Word:string;
WordLen: Integer;
  RusWord:rustring;
begin
  AssignFile(FIn, SourceModel);
  Reset(FIn, 1);
  AssignFile(FOut, TargetModel);
  Rewrite(FOut, 1);

  try
    // Читаем заголовок
    BlockRead(FIn, VocabSize, SizeOf(Integer));
    BlockRead(FIn, DimSize, SizeOf(Integer));
    
    // Записываем заголовок с флагом квантования
    BlockWrite(FOut, Magic[1], Length(Magic));
    BlockWrite(FOut, Bits, SizeOf(Integer));
    BlockWrite(FOut, VocabSize, SizeOf(Integer));
    BlockWrite(FOut, DimSize, SizeOf(Integer));

    SetLength(Vec, DimSize);
    SetLength(Quantized, DimSize);

    for i := 0 to VocabSize - 1 do
    begin
      // Читаем слово
      BlockRead(FIn, WordLen, SizeOf(Integer));
      Word := StringOfChar(' ', WordLen);
      BlockRead(FIn, Word[1], WordLen);

      // Читаем вектор
      BlockRead(FIn, Vec[0], DimSize * SizeOf(Single));

      // Квантование
      MinVal := MinValue(Vec);
      MaxVal := MaxValue(Vec);
      Scale := (Power(2, Bits) - 1) / (MaxVal - MinVal);

      for j := 0 to DimSize - 1 do
        Quantized[j] := Round((Vec[j] - MinVal) * Scale);

      // Записываем в rustring формате
      RusWord := UTF8ToRus(Word);
      WordLen := Length(RusWord);
      BlockWrite(FOut, WordLen, SizeOf(Integer));
      BlockWrite(FOut, RusWord[0], WordLen);

      // Записываем квантованный вектор
      BlockWrite(FOut, MinVal, SizeOf(Single));
      BlockWrite(FOut, MaxVal, SizeOf(Single));
      BlockWrite(FOut, Quantized[0], DimSize);
    end;
  finally
    CloseFile(FIn);
    CloseFile(FOut);
  end;
end;

procedure ConvertToRustringFormat(const SourceModel, TargetModel: string);
var
  FIn, FOut: File;
  VocabSize, DimSize, i: Integer;
WordLen: Integer;
Word:string;
Magic:string = 'W2VR';
VecSize:Int64;
Vec: array of Single;
RusWord:rustring;
begin
  AssignFile(FIn, SourceModel);
  Reset(FIn, 1);
  AssignFile(FOut, TargetModel);
  Rewrite(FOut, 1);

  try
    // Читаем заголовок
    BlockRead(FIn, VocabSize, SizeOf(Integer));
    BlockRead(FIn, DimSize, SizeOf(Integer));
    
    // Записываем заголовок
    BlockWrite(FOut, Magic[1], Length(Magic));
    BlockWrite(FOut, VocabSize, SizeOf(Integer));
    BlockWrite(FOut, DimSize, SizeOf(Integer));

    for i := 0 to VocabSize - 1 do
    begin
      // Читаем слово
      BlockRead(FIn, WordLen, SizeOf(Integer));
      Word := StringOfChar(' ', WordLen);
      BlockRead(FIn, Word[1], WordLen);

      // Конвертируем в rustring
      RusWord := UTF8ToRus(Word);
      WordLen := Length(RusWord);
      BlockWrite(FOut, WordLen, SizeOf(Integer));
      BlockWrite(FOut, RusWord[0], WordLen);

      // Копируем вектор
      VecSize := DimSize * SizeOf(Single);
      SetLength(Vec, DimSize);
      BlockRead(FIn, Vec[0], VecSize);
      BlockWrite(FOut, Vec[0], VecSize);
    end;
  finally
    CloseFile(FIn);
    CloseFile(FOut);
  end;
end;

end.