unit rustringcoreunit;
{$mode objfpc}{$H+}
{$modeswitch advancedrecords}
{$MODESWITCH UNICODESTRINGS}
{$CODEPAGE UTF8}

{
    Part of AdvancedChatAI.
    For GNU/Linux 64 bit version.
    Version: 1.
    Written on FreePascal (https://freepascal.org/).
    Copyright (C) 2025-2026 Artyomov Alexander
    Used https://chat.deepseek.com/
    http://self-made-free.ru/
    aralni@mail.ru

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
}


interface

uses
  Classes, SysUtils;

type
  TRusCharMap = record
    Char: UnicodeChar;
    Code: Byte;
  end;

const
  RusCharMap: array of TRusCharMap = (
// Цифры (0x00-0x09)
(Char: '0'; Code: $00), (Char: '1'; Code: $01),
(Char: '2'; Code: $02), (Char: '3'; Code: $03),
(Char: '4'; Code: $04), (Char: '5'; Code: $05),
(Char: '6'; Code: $06), (Char: '7'; Code: $07),
(Char: '8'; Code: $08), (Char: '9'; Code: $09),

// Латинские прописные (0x10-0x29)
(Char: 'A'; Code: $10), (Char: 'B'; Code: $11),
(Char: 'C'; Code: $12), (Char: 'D'; Code: $13),
(Char: 'E'; Code: $14), (Char: 'F'; Code: $15),
(Char: 'G'; Code: $16), (Char: 'H'; Code: $17),
(Char: 'I'; Code: $18), (Char: 'J'; Code: $19),
(Char: 'K'; Code: $1A), (Char: 'L'; Code: $1B),
(Char: 'M'; Code: $1C), (Char: 'N'; Code: $1D),
(Char: 'O'; Code: $1E), (Char: 'P'; Code: $1F),
(Char: 'Q'; Code: $20), (Char: 'R'; Code: $21),
(Char: 'S'; Code: $22), (Char: 'T'; Code: $23),
(Char: 'U'; Code: $24), (Char: 'V'; Code: $25),
(Char: 'W'; Code: $26), (Char: 'X'; Code: $27),
(Char: 'Y'; Code: $28), (Char: 'Z'; Code: $29),

// Русские прописные (0x2A-0x4A) в алфавитном порядке
(Char: 'А'; Code: $2A), (Char: 'Б'; Code: $2B),
(Char: 'В'; Code: $2C), (Char: 'Г'; Code: $2D),
(Char: 'Д'; Code: $2E), (Char: 'Е'; Code: $2F),
(Char: 'Ё'; Code: $30), (Char: 'Ж'; Code: $31),
(Char: 'З'; Code: $32), (Char: 'И'; Code: $33),
(Char: 'Й'; Code: $34), (Char: 'К'; Code: $35),
(Char: 'Л'; Code: $36), (Char: 'М'; Code: $37),
(Char: 'Н'; Code: $38), (Char: 'О'; Code: $39),
(Char: 'П'; Code: $3A), (Char: 'Р'; Code: $3B), 
(Char: 'С'; Code: $3C), (Char: 'Т'; Code: $3D), 
(Char: 'У'; Code: $3E), (Char: 'Ф'; Code: $3F), 
(Char: 'Х'; Code: $40), (Char: 'Ц'; Code: $41), 
(Char: 'Ч'; Code: $42), (Char: 'Ш'; Code: $43), 
(Char: 'Щ'; Code: $44), (Char: 'Ъ'; Code: $45), 
(Char: 'Ы'; Code: $46), (Char: 'Ь'; Code: $47), 
(Char: 'Э'; Code: $48), (Char: 'Ю'; Code: $49), 
(Char: 'Я'; Code: $4A),

// Латинские строчные (0x90-0xA9)
(Char: 'a'; Code: $90), (Char: 'b'; Code: $91),
(Char: 'c'; Code: $92), (Char: 'd'; Code: $93),
(Char: 'e'; Code: $94), (Char: 'f'; Code: $95),
(Char: 'g'; Code: $96), (Char: 'h'; Code: $97),
(Char: 'i'; Code: $98), (Char: 'j'; Code: $99),
(Char: 'k'; Code: $9A), (Char: 'l'; Code: $9B),
(Char: 'm'; Code: $9C), (Char: 'n'; Code: $9D),
(Char: 'o'; Code: $9E), (Char: 'p'; Code: $9F),
(Char: 'q'; Code: $A0), (Char: 'r'; Code: $A1),
(Char: 's'; Code: $A2), (Char: 't'; Code: $A3),
(Char: 'u'; Code: $A4), (Char: 'v'; Code: $A5),
(Char: 'w'; Code: $A6), (Char: 'x'; Code: $A7),
(Char: 'y'; Code: $A8), (Char: 'z'; Code: $A9),

// Русские строчные (0xAA-0xCA) в алфавитном порядке
(Char: 'а'; Code: $AA), (Char: 'б'; Code: $AB),
(Char: 'в'; Code: $AC), (Char: 'г'; Code: $AD),
(Char: 'д'; Code: $AE), (Char: 'е'; Code: $AF),
(Char: 'ё'; Code: $B0), (Char: 'ж'; Code: $B1),
(Char: 'з'; Code: $B2), (Char: 'и'; Code: $B3),
(Char: 'й'; Code: $B4), (Char: 'к'; Code: $B5),
(Char: 'л'; Code: $B6), (Char: 'м'; Code: $B7),
(Char: 'н'; Code: $B8), (Char: 'о'; Code: $B9),
(Char: 'п'; Code: $BA), (Char: 'р'; Code: $BB),
(Char: 'с'; Code: $BC), (Char: 'т'; Code: $BD),
(Char: 'у'; Code: $BE), (Char: 'ф'; Code: $BF),
(Char: 'х'; Code: $C0), (Char: 'ц'; Code: $C1),
(Char: 'ч'; Code: $C2), (Char: 'ш'; Code: $C3),
(Char: 'щ'; Code: $C4), (Char: 'ъ'; Code: $C5),
(Char: 'ы'; Code: $C6), (Char: 'ь'; Code: $C7),
(Char: 'э'; Code: $C8), (Char: 'ю'; Code: $C9),
(Char: 'я'; Code: $CA),

// Спецсимволы (0xCB-0xE8)
(Char: '*'; Code: $CB), (Char: '('; Code: $CC),
(Char: ')'; Code: $CD), (Char: '-'; Code: $CE),
(Char: '_'; Code: $CF),
(Char: ' '; Code: $D0),  // Пробел
(Char: '!'; Code: $D1),
(Char: ';'; Code: $D2),  // Точка с запятой
(Char: ':'; Code: $D3),  // Двоеточие
(Char: '?'; Code: $D4),  // Вопросительный знак
(Char: '.'; Code: $D5),  // Точка
(Char: ','; Code: $D6),  // Запятая
(Char: #9;  Code: $D7),  // Табуляция
(Char: '|'; Code: $D8),  // Вертикальная черта
(Char: '’'; Code: $D9),  // Апостроф
(Char: '@'; Code: $DA), (Char: '#'; Code: $DB),
(Char: '$'; Code: $DC), (Char: '%'; Code: $DD),
(Char: '^'; Code: $DE), (Char: '&'; Code: $DF),
(Char: '/'; Code: $E0), (Char: '+'; Code: $E1),
(Char: '\'; Code: $E2), (Char: '>'; Code: $E3),
(Char: '<'; Code: $E4), (Char: '='; Code: $E5),
(Char: '{'; Code: $E6), (Char: '}'; Code: $E7),
(Char: '['; Code: $E8), (Char: ']'; Code: $E9),
(Char: ''''; Code: $EA), (Char: '"'; Code: $EB),
(Char: '`'; Code: $EC), (Char: '№'; Code: $ED),
(Char: #10; Code: $EE),
// ... (остальные спецсимволы)

    // Символ для неизвестных символов
    (Char: '?'; Code: $FF)
  );

function FindCharCode(C: WideChar): Byte;
function FindCharByCode(Code: Byte): WideChar;

implementation

function FindCharCode(C: UnicodeChar): Byte;
var i:LongInt;
begin
  // Быстрая проверка цифр
  if (C >= '0') and (C <= '9') then
    Exit(Ord(C) - Ord('0'));

  // Явная обработка специальных символов
//  case C of
//    ' ': Exit($0A);  // Пробел
//    '.': Exit($0F);  // Точка
//    ',': Exit($A0);  // Запятая - новый код
//    '!': Exit($0B);
//    '?': Exit($0E);
//    ';': Exit($0C);
//    ':': Exit($0D);
    // Остальные специальные символы...
//  end;

  // Поиск в таблице
  for I := 0 to High(RusCharMap) do
    if RusCharMap[I].Char = C then
      Exit(RusCharMap[I].Code);

  // Неизвестный символ
  Result := $FF;
end;

function FindCharByCode(Code: Byte): UnicodeChar;
var
  I: Integer;
begin
  // Быстрая проверка цифр
  if Code <= $09 then
    Exit(UnicodeChar(Ord('0') + Code));

  // Явная обработка специальных символов
//  case Code of
//    $0A: Exit(' ');  // Пробел
//    $0B: Exit('!');  // Восклицательный знак
//    $0C: Exit(';');  // Точка с запятой
//    $0D: Exit(':');  // Двоеточие
//    $0E: Exit('?');  // Вопросительный знак
//    $0F: Exit('.');  // Точка
//  end;

  // Поиск в таблице
  for I := 0 to High(RusCharMap) do
    if RusCharMap[I].Code = Code then
      Exit(RusCharMap[I].Char);

  // Неизвестный код - возвращаем '?'
  Result := '?';
end;

procedure CheckCodeConflicts;
var
  I, J: Integer;
begin
  for I := 0 to High(RusCharMap) do
    for J := I+1 to High(RusCharMap) do
      if RusCharMap[I].Code = RusCharMap[J].Code then
        raise Exception.CreateFmt(
          'Конфликт кодов: %s (%x) и %s (%x)',
          [RusCharMap[I].Char, RusCharMap[I].Code,
           RusCharMap[J].Char, RusCharMap[J].Code]);
end;

initialization
  CheckCodeConflicts;
end.