unit ru1unit;

{
    Part of AdvancedChatAI.
    For GNU/Linux 64 bit version.
    Version: 1.
    Written on FreePascal (https://freepascal.org/).
    Copyright (C) 2025-2026 Artyomov Alexander
    Used https://chat.deepseek.com/
    http://self-made-free.ru/
    aralni@mail.ru

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
}


{$MODE OBJFPC}{$H+}
{$RANGECHECKS ON}
{$MODESWITCH ADVANCEDRECORDS}
{$CODEPAGE UTF8}
//{$MODESWITCH UNICODESTRINGS}

interface

uses
  SysUtils, LazUTF8, LazUnicode, rustringcoreunit, ucs4unit, ucs4functionsunit;

type
  PRu1 = ^Ru1;
  Ru1 = packed object
  private
    FData: PByte;
    FLength: Integer;
    FCapacity: Integer;
    procedure Grow(MinCapacity: Integer); inline;
    function GetByte(Index: Integer): Byte; inline;
    procedure SetByte(Index: Integer; Value: Byte); inline;
  public
    procedure Init; inline;
    procedure Init(Length: Integer); inline;
    procedure Clear; inline;
    function Concat(const S: Ru1): Ru1;
    procedure FromUTF8(const S: String);
    function ToUTF8: UTF8String;
    property Length: Integer read FLength;
    property Bytes[Index: Integer]: Byte read GetByte write SetByte; default;
    function IntToStr(Value: Integer): Ru1; static;
    function StrToIntDef(Default: Integer = 0): Integer;
    function ToLower: Ru1;
    function ToUpper: Ru1;
    procedure ToLow;
    procedure ToUp;
  end;

  Ru1Ext = packed object(Ru1)
  public
    function SubString(StartPos, Len: Integer): Ru1;
    function IndexOf(const SubStr: Ru1; StartPos: Integer = 0): Integer;
    function Replace(const OldPattern, NewPattern: Ru1): Ru1;
    function Trim: Ru1;
  end;

function UTF8ToRus(const S: UTF8String): Ru1;
function RusToUTF8(const R: Ru1): UTF8String;
function IsCharInRustring(Char: DWord): Boolean;  // <-- Новая функция

implementation

procedure Ru1.Init;
begin
  FData := nil;
  FLength := 0;
  FCapacity := 0;
end;

procedure Ru1.Init(Length: Integer);
begin
  FLength := Length;
  FCapacity := Length;
  FData := GetMem(Length);
end;

procedure Ru1.Clear;
begin
  if FData <> nil then
    FreeMem(FData);
  FData := nil;
  FLength := 0;
  FCapacity := 0;
end;

procedure Ru1.Grow(MinCapacity: Integer);
var
  NewCapacity: Integer;
begin
  if FCapacity = 0 then
    NewCapacity := 8
  else
    NewCapacity := FCapacity * 2;

  if NewCapacity < MinCapacity then
    NewCapacity := MinCapacity;

  ReallocMem(FData, NewCapacity);
  FCapacity := NewCapacity;
end;

function Ru1.GetByte(Index: Integer): Byte;
begin
  if (Index < 0) or (Index >= FLength) then
    raise Exception.Create('Index out of bounds');
  Result := FData[Index];
end;

procedure Ru1.SetByte(Index: Integer; Value: Byte);
begin
  if (Index < 0) or (Index >= FLength) then
    raise Exception.Create('Index out of bounds');
  FData[Index] := Value;
end;

function Ru1.Concat(const S: Ru1): Ru1;
begin
  Result.Init;
  if Self.FLength + S.FLength = 0 then Exit;

  Result.Init(Self.FLength + S.FLength);
  if Self.FLength > 0 then
    Move(Self.FData^, Result.FData^, Self.FLength);
  if S.FLength > 0 then
    Move(S.FData^, Result.FData[Self.FLength], S.FLength);
end;

// Новая функция: проверяет, входит ли символ UCS4 в кодировку Ru1
function IsCharInRustring(Char: DWord): Boolean;
begin
  case Char of
    // Цифры (0x00-0x09)
    $0030..$0039: Exit(True);  // '0'-'9'

    // Латинские прописные (0x10-0x29)
    $0041..$005A: Exit(True);  // 'A'-'Z'

    // Русские прописные (0x2A-0x4A)
    $0410..$042F, $0401: Exit(True);  // 'А'-'Я', 'Ё'

    // Латинские строчные (0x90-0xA9)
    $0061..$007A: Exit(True);  // 'a'-'z'

    // Русские строчные (0xAA-0xCA)
    $0430..$044F, $0451: Exit(True);  // 'а'-'я', 'ё'

    // Спецсимволы (0xCB-0xEE)
    $002A, $0028, $0029, $002D, $005F,  // * ( ) - _
    $0020, $0021, $003B, $003A, $003F,  // space ! ; : ?
    $002E, $002C, $0009, $007C, $2019,  // . , tab | ’
    $0040, $0023, $0024, $0025, $005E,  // @ # $ % ^
    $0026, $002F, $002B, $005C, $003E,  // & / + \ >
    $003C, $003D, $007B, $007D, $005B,  // < = { } [
    $005D, $0027, $0022, $0060, $2116,  // ] ' " ` №
    $000A: Exit(True);                   // line feed
  else
    Result := False;
  end;
end;

// Обновлённый метод: конвертирует UTF8 в Ru1 через UCS4
procedure Ru1.FromUTF8(const S: String);
var
  UCS4Str: ucs4;
  I, ValidCount: Integer;
begin
  Clear;
  if S = '' then Exit;
  // Конвертируем UTF8 в UCS4
UCS4Str.Init;
  UCS4Str.FromUTF8(S);
  // Подсчитываем, сколько символов можно преобразовать в Ru1
  ValidCount := 0;
  for I := 0 to UCS4Str.Length - 1 do
    if IsCharInRustring(UCS4Str[I]) then
      Inc(ValidCount);

  if ValidCount = 0 then Exit;

  // Выделяем память и заполняем Ru1
  Init(ValidCount);
  ValidCount := 0;
  for I := 0 to UCS4Str.Length - 1 do
  begin
    if IsCharInRustring(UCS4Str[I]) then
    begin
      FData[ValidCount] := FindCharCode(WideChar(UCS4Str[I]));
      Inc(ValidCount);
    end;
  end;
UCS4Str.Clear;
end;

// Обновлённый метод: конвертирует Ru1 в UTF8 через UCS4
function Ru1.ToUTF8: UTF8String;
var
  UCS4Str: ucs4;
  I: Integer;
begin
  if FLength = 0 then Exit('');

  // Конвертируем Ru1 в UCS4
  UCS4Str.Init(FLength);
  for I := 0 to FLength - 1 do
    UCS4Str[I] := Ord(FindCharByCode(FData[I]));

  // Конвертируем UCS4 в UTF8
  Result := UCS4Str.ToUTF8;
  UCS4Str.Clear;
end;

// Остальные методы RustringExt остаются без изменений
function Ru1Ext.SubString(StartPos, Len: Integer): Ru1;
begin
  if (StartPos < 0) or (StartPos >= FLength) then
  begin
    Result.Init;
    Exit;
  end;
  if StartPos + Len > FLength then
    Len := FLength - StartPos;
  Result.Init(Len);
  Move(FData[StartPos], Result.FData^, Len);
end;

function Ru1Ext.IndexOf(const SubStr: Ru1; StartPos: Integer = 0): Integer;
var
  I, J: Integer;
  Found: Boolean;
begin
  if (SubStr.Length = 0) or (FLength = 0) or (SubStr.Length > FLength) or (StartPos >= FLength) then
    Exit(-1);
  for I := StartPos to FLength - SubStr.Length do
  begin
    Found := True;
    for J := 0 to SubStr.Length - 1 do
      if FData[I + J] <> SubStr[J] then
      begin
        Found := False;
        Break;
      end;
    if Found then
      Exit(I);
  end;
  Result := -1;
end;

function Ru1Ext.Replace(const OldPattern, NewPattern: Ru1): Ru1;
var
  I, Pos: Integer;
  Temp: Ru1;
begin
  if (OldPattern.Length = 0) or (FLength = 0) then
    Exit(Self);
  Temp.Init;
  I := 0;
  while I < FLength do
  begin
    Pos := IndexOf(OldPattern, I);
    if Pos = -1 then
    begin
      Temp := Temp.Concat(SubString(I, FLength - I));
      Break;
    end;
    Temp := Temp.Concat(SubString(I, Pos - I));
    Temp := Temp.Concat(NewPattern);
    I := Pos + OldPattern.Length;
  end;
  Result := Temp;
end;

function Ru1Ext.Trim: Ru1;
var
  Start, Len: Integer;
begin
  if FLength = 0 then Exit(Self);
  Start := 0;
  while (Start < FLength) and (FData[Start] = $20) do Inc(Start);
  Len := FLength;
  while (Len > Start) and (FData[Len - 1] = $20) do Dec(Len);
  Len := Len - Start;
  if Len <= 0 then Exit(Default(Ru1));
  Result := SubString(Start, Len);
end;

function UTF8ToRus(const S: UTF8String): Ru1;
begin
  Result.FromUTF8(S);
end;

function RusToUTF8(const R: Ru1): UTF8String;
begin
  Result := R.ToUTF8;
end;

// =============================================
//  Преобразование чисел (для Rustring)
// =============================================

// Конвертирует Integer в Rustring (аналог IntToStr)
function Ru1.IntToStr(Value: Integer): Ru1; static;
var
  I, Len: Integer;
  IsNegative: Boolean;
  Temp: array[0..15] of Byte; // Максимум для 64-битного Int64
begin
  Result.Init;
  if Value = 0 then
  begin
    Result.Init(1);
    Result.Bytes[0] := $00; // '0'
    Exit;
  end;

  IsNegative := Value < 0;
  if IsNegative then
    Value := -Value;

  Len := 0;
  while Value > 0 do
  begin
    Temp[Len] := $00 + (Value mod 10); // Цифры 0-9 ($00-$09)
    Value := Value div 10;
    Inc(Len);
  end;

  Result.Init(Len + Ord(IsNegative));
  if IsNegative then
    Result.Bytes[0] := $CE; // '-' (код в Rustring)

  for I := 0 to Len - 1 do
    Result.Bytes[Result.Length - I - 1] := Temp[I];
end;

// Конвертирует Rustring в Integer (аналог StrToIntDef)
function Ru1.StrToIntDef(Default: Integer = 0): Integer;
var
  I, Start: Integer;
  IsNegative: Boolean;
begin
  if Length = 0 then Exit(Default);
  Result := 0;
  Start := 0;
  IsNegative := (Bytes[0] = $CE); // '-' в Rustring
  if IsNegative then
    Start := 1;

  for I := Start to Length - 1 do
  begin
    if (Bytes[I] > $09) then // Не цифра
      Exit(Default);
    Result := Result * 10 + Bytes[I];
  end;

  if IsNegative then
    Result := -Result;
end;

// =============================================
//  Изменение регистра (для Rustring)
// =============================================

function Ru1.ToLower: Ru1;
var
  I: Integer;
begin
  Result.Init(Length);
  for I := 0 to Length - 1 do
   if (FData[I] > $09) and (FData[I] < $CB) then
    // Сбрасываем старший бит (преобразуем в строчную)
    Result.Bytes[I] := Bytes[I] or $80 else Result.Bytes[I] := Bytes[I];
end;

function Ru1.ToUpper: Ru1;
var
  I: Integer;
begin
  Result.Init(Length);
  for I := 0 to Length - 1 do //writeln(IntToHex(FData[I]));
   if (FData[I] > $09) and (FData[I] < $CB) then
    // Устанавливаем старший бит (преобразуем в прописную)
    Result.Bytes[I] := Bytes[I] and $7F else Result.Bytes[I] := Bytes[I];
end;

procedure Ru1.ToLow;
var
  I: Integer;
begin
  for I := 0 to Length - 1 do
   if (FData[I] > $09) and (FData[I] < $CB) then
    // Сбрасываем старший бит (преобразуем в строчную)
    FData[I] := FData[I] or $80;
end;

procedure Ru1.ToUp;
var
  I: Integer;
begin
  for I := 0 to Length - 1 do
   if (FData[I] > $09) and (FData[I] < $CB) then
    // Устанавливаем старший бит (преобразуем в прописную)
    FData[I] := FData[I] and $7F;
end;

end.