unit PostProcessor;
{$MODE OBJFPC}{$H+}{$RANGECHECKS ON}

{
    Part of AdvancedChatAI.
    For GNU/Linux 64 bit version.
    Version: 1.
    Written on FreePascal (https://freepascal.org/).
    Copyright (C) 2025-2026 Artyomov Alexander
    Used https://chat.deepseek.com/
    http://self-made-free.ru/
    aralni@mail.ru

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
}


interface

uses
  SysUtils, Classes, UniversalFileReader, LazUTF8, ucs4unit;

type
  TAttachment = record
    FileName,
    ContentType,
    TextContent: string;
    Content: TBytes;
  end;

  TLanguage = (langUnknown, langRussian, langEnglish, langOther);

  TUserInput = record
    Message: string;
    Attachments: array of TAttachment;
    Timestamp: TDateTime;
    Language: TLanguage;
    IsMultiline: Boolean;
  end;

function GetUserInput(const PostFileName: string; FilesList: TStringList): TUserInput;
function LanguageToStr(Lang: TLanguage): string;
procedure SaveTempAttachment(const Attachment: TAttachment; const DestDir: string);
function UTF8CharToUCS4(const AChar: string): DWord;

implementation

function UTF8CharToUCS4(const AChar: string): DWord;
var u: ucs4;
begin
u:=default(ucs4);
u.Init;
u.FromUTF8(AChar);
if u.Length > 0 then Result:=u[0] else Result:=0;
u.Clear;
end;

function DetectLanguage(const Text: string): TLanguage;
var
  i, ruCount, enCount: Integer;
  u8Char: string;
  charCode: DWord;
begin
  if Text = '' then Exit(langUnknown);
  ruCount := 0;
  enCount := 0;
  for i := 1 to UTF8Length(Text) do begin
    u8Char := UTF8Copy(Text, i, 1);
    charCode := UTF8CharToUCS4(u8Char);
    case charCode of
      $430..$44F, $410..$42F, $451, $401: // а-я, А-Я, ё, Ё
        Inc(ruCount);
      $61..$7A, $41..$5A: // a-z, A-Z
        Inc(enCount);
    end;
  end;
  if (ruCount > enCount) and (ruCount > 5) then
    Exit(langRussian)
  else if (enCount > ruCount) and (enCount > 5) then
    Exit(langEnglish)
  else
    Exit(langOther);
end;

function GetUserInput(const PostFileName: string; FilesList: TStringList): TUserInput;
var
  line: string;
  postFile: Text;
  i: Integer;
  fileReader: TUniversalFileReader;
  ext: string;
begin
  // Инициализация результата
  Result.Message := '';
  Result.Timestamp := Now;
  SetLength(Result.Attachments, 0);
  Result.IsMultiline := False;
  // Чтение сообщения
  if FileExists(PostFileName) then begin
    AssignFile(postFile, PostFileName);
    Reset(postFile);
    try
      while not Eof(postFile) do begin
        ReadLn(postFile, line);
        Result.Message := Result.Message + line + #10;
      end;
      Result.IsMultiline := True;
    finally
      CloseFile(postFile);
    end;
  end;
  // Определение языка сообщения
  Result.Language := DetectLanguage(Result.Message);
  // Чтение вложений
  if FilesList <> nil then begin
    SetLength(Result.Attachments, FilesList.Count);
    for i := 0 to FilesList.Count - 1 do begin
      Result.Attachments[i].FileName := FilesList[i];
      ext := LowerCase(ExtractFileExt(Result.Attachments[i].FileName));
      // Определяем ContentType по расширению
      if (ext = '.txt') or (ext = '.pas') or (ext = '.inc') or (ext = '.csv') then
        Result.Attachments[i].ContentType := 'text/plain'
      else if (ext = '.jpg') or (ext = '.jpeg') then
        Result.Attachments[i].ContentType := 'image/jpeg'
      else if (ext = '.png') then
        Result.Attachments[i].ContentType := 'image/png'
      else if (ext = '.pdf') then
        Result.Attachments[i].ContentType := 'application/pdf'
      else
        Result.Attachments[i].ContentType := 'application/octet-stream';
      // Загружаем содержимое файла
      with TFileStream.Create(Result.Attachments[i].FileName, fmOpenRead) do
      try
        SetLength(Result.Attachments[i].Content, Size);
        ReadBuffer(Result.Attachments[i].Content[0], Size);
      finally
        Free;
      end;
      // Для текстовых файлов сохраняем текстовое содержимое
      if Pos('text/', Result.Attachments[i].ContentType) = 1 then begin
        fileReader := TUniversalFileReader.Create(Result.Attachments[i].FileName);
        try
          if fileReader.IsTextFile then
            Result.Attachments[i].TextContent := fileReader.ReadAsText;
        finally
          fileReader.Free;
        end;
      end;
    end;
  end;
end;

function LanguageToStr(Lang: TLanguage): string;
begin
  case Lang of
    langRussian: Result := 'Русский';
    langEnglish: Result := 'English';
    langOther: Result := 'Другой';
    else Result := 'Неизвестен';
  end;
end;

procedure SaveTempAttachment(const Attachment: TAttachment; const DestDir: string);
var
  destPath: string;
begin
  if not DirectoryExists(DestDir) then ForceDirectories(DestDir);
  destPath := IncludeTrailingPathDelimiter(DestDir) + ExtractFileName(Attachment.FileName);
  with TFileStream.Create(destPath, fmCreate) do
  try
    if Length(Attachment.Content) > 0 then
      WriteBuffer(Attachment.Content[0], Length(Attachment.Content));
  finally
    Free;
  end;
end;

end.