unit ucs4unit;

{
    UCS4 unit.
    For GNU/Linux 64 bit version.
    Version: 1.
    Written on FreePascal (https://freepascal.org/).
    Copyright (C) 2025  Artyomov Alexander
    Used https://chat.deepseek.com/
    http://self-made-free.ru/
    aralni@mail.ru

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
}

{$MODE OBJFPC}{$H+}
{$RANGECHECKS ON}
{$OPTIMIZATION LEVEL3}
{$INLINE ON}

interface

uses SysUtils, LazUTF8;

type
  ucs4 = packed object
  private
    FData: PDWord;
    FLength: DWord;
    FCapacity: DWord;
    procedure Grow(MinCapacity: DWord); inline;
    function GetChar(Index: DWord): DWord; inline;
    procedure SetChar(Index: DWord; Value: Dword); inline;
  public
    property Length: DWord read FLength;
    property Chars[Index: DWord]: DWord read GetChar write SetChar; default;
    
    procedure Init; inline;
    procedure Init(l:DWord); inline;
    procedure Clear; inline;
    procedure Reverse;
    function IsRTL: Boolean;
    function Concat(const S: ucs4): ucs4;
    procedure FromUTF8(const S: string);
    function ToUTF8: string;
  end;

implementation

procedure ucs4.Init;
begin
  FData := nil;
  FLength := 0;
  FCapacity := 0;
end;

procedure ucs4.Init(l:DWord);
begin
  FLength := l;
  FCapacity := l;
  FData := GetMem(l*SizeOf(DWord));
end;

procedure ucs4.Grow(MinCapacity: DWord);
var
  NewCapacity: DWord;
begin
  if FCapacity = 0 then
    NewCapacity := 8
  else
    NewCapacity := FCapacity * 2;
    
  if NewCapacity < MinCapacity then
    NewCapacity := MinCapacity;
    
  ReallocMem(FData, NewCapacity * SizeOf(DWord));
  FCapacity := NewCapacity;
end;

function ucs4.GetChar(Index: DWord): DWord; inline;
begin
//  {$IFDEF RANGECHECKS}
//  if Index >= FLength then
//    raise Exception.Create('Index out of bounds');
//  {$ENDIF}
Exit(FData[Index]);
end;

procedure ucs4.SetChar(Index: DWord; Value: Dword); inline;
begin
//  {$IFDEF RANGECHECKS}
//  if Index >= FLength then
//    raise Exception.Create('Index out of bounds');
//  {$ENDIF}
FData[Index] := Value;
end;

procedure ucs4.Clear;
begin
  if FData <> nil then
  begin
    FreeMem(FData);
    FData := nil;
  end;
  FLength := 0;
  FCapacity := 0;
end;

procedure ucs4.Reverse;
var
  I: DWord;
  Tmp: DWord;
  P1, P2: PDWord;
begin
  if FLength <= 1 then Exit;
  
  P1 := @FData[0];
  P2 := @FData[FLength-1];
  
  while P1 < P2 do
  begin
    Tmp := P1^;
    P1^ := P2^;
    P2^ := Tmp;
    Inc(P1);
    Dec(P2);
  end;
end;

function ucs4.IsRTL: Boolean;
var
  I: DWord;
begin
  for I := 0 to FLength - 1 do
    if (FData[I] >= $0590) and (FData[I] <= $08FF) then
      Exit(True);
  Result := False;
end;

function ucs4.Concat(const S: ucs4): ucs4;
begin
  Result.Init;
  if Self.FLength + S.FLength = 0 then Exit;
  
  GetMem(Result.FData, (Self.FLength + S.FLength) * SizeOf(DWord));
  Result.FCapacity := Self.FLength + S.FLength;
  Result.FLength := Result.FCapacity;
  
  if Self.FLength > 0 then
    Move(Self.FData^, Result.FData^, Self.FLength * SizeOf(DWord));
    
  if S.FLength > 0 then
    Move(S.FData^, Result.FData[Self.FLength], S.FLength * SizeOf(DWord));
end;

procedure ucs4.FromUTF8(const S: string);
var
  UTF8Ptr: PChar;
  CharLen: Integer;
  Count, Pos: DWord;
begin
  Clear;
  if S = '' then Exit;

  // Первый проход - подсчет символов
  Count := 0;
  UTF8Ptr := PChar(S);
  while UTF8Ptr^ <> #0 do
  begin
    UTF8CodepointToUnicode(UTF8Ptr, CharLen);
    Inc(UTF8Ptr, CharLen);
    Inc(Count);
  end;

  // Выделение памяти
  if Count > FCapacity then
    Grow(Count);
  FLength := Count;

  // Второй проход - заполнение
  UTF8Ptr := PChar(S);
  Pos := 0;
  while UTF8Ptr^ <> #0 do
  begin
    FData[Pos] := UTF8CodepointToUnicode(UTF8Ptr, CharLen);
    Inc(UTF8Ptr, CharLen);
    Inc(Pos);
  end;
end;

function ucs4.ToUTF8: string;
var
  I, Len: Integer;
  P: PChar;
begin
  if FLength = 0 then Exit('');

  // Максимально возможный размер (4 байта на символ)
  SetLength(Result, FLength * 4);
  P := PChar(Result);
  
  for I := 0 to FLength - 1 do
    Inc(P, UnicodeToUTF8(FData[I], P));
  
  // Корректируем длину под реальный размер
  SetLength(Result, P - PChar(Result));
end;

end.