unit MarkovChainUnit;
{$MODE OBJFPC}{$H+}{$RANGECHECKS ON}

{
    Part of AdvancedChatAI.
    For GNU/Linux 64 bit version.
    Version: 1.
    Written on FreePascal (https://freepascal.org/).
    Copyright (C) 2025-2026 Artyomov Alexander
    Used https://chat.deepseek.com/
    http://self-made-free.ru/
    aralni@mail.ru

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
}


interface

uses
  Classes, SysUtils, Contnrs, StrUtils;

type
  TMarkovState = class
  public
    Word: string;
    NextWords: TStringList;
    constructor Create(const AWord: string);
    destructor Destroy; override;
    procedure AddNextWord(const NextWord: string);
    function GetRandomNextWord: string;
  end;

  TMarkovChain = class
  private
    FStates: TFPHashObjectList;
    FOrder: Integer;
    function FindState(const Word: string): TMarkovState;
    function CreateState(const Word: string): TMarkovState;
    function SplitText(const Text: string): TStringArray;
    function GetStateCount: Integer;
  public
    constructor Create(AOrder: Integer = 1);
    destructor Destroy; override;
    procedure Learn(const Text: string);
    procedure LearnFromFile(const FileName: string);
    function GenerateSentence(StartWord: string; MaxLength: Integer): string;
    function GeneratePoem(Lines: Integer; LineLength: Integer): string;
    property StateCount: Integer read GetStateCount;
    procedure Retrain;
  end;

implementation

constructor TMarkovState.Create(const AWord: string);
begin
  Word := AWord;
  NextWords := TStringList.Create;
  NextWords.Duplicates := dupAccept;
  NextWords.Sorted := False;
end;

destructor TMarkovState.Destroy;
begin
  NextWords.Free;
  inherited;
end;

procedure TMarkovState.AddNextWord(const NextWord: string);
begin
  NextWords.Add(NextWord);
end;

function TMarkovState.GetRandomNextWord: string;
begin
  if NextWords.Count = 0 then
    Result := ''
  else
    Result := NextWords[Random(NextWords.Count)];
end;

constructor TMarkovChain.Create(AOrder: Integer = 1);
begin
  FOrder := AOrder;
  FStates := TFPHashObjectList.Create(True);
end;

destructor TMarkovChain.Destroy;
begin
  FStates.Free;
  inherited;
end;

function TMarkovChain.FindState(const Word: string): TMarkovState;
begin
  Result := TMarkovState(FStates.Find(Word));
end;

function TMarkovChain.CreateState(const Word: string): TMarkovState;
begin
  Result := TMarkovState.Create(Word);
  FStates.Add(Word, Result);
end;

function TMarkovChain.SplitText(const Text: string): TStringArray;
var
  TempList: TStringList;
  i: Integer;
begin
  TempList := TStringList.Create;
  try
    // Простая реализация разделения на слова
    TempList.Delimiter := ' ';
    TempList.DelimitedText := Text;

    // Удаляем пустые строки и знаки препинания
    for i := TempList.Count - 1 downto 0 do
    begin
      TempList[i] := TempList[i].Trim;
      if TempList[i] = '' then
        TempList.Delete(i);
    end;

    SetLength(Result, TempList.Count);
    for i := 0 to TempList.Count - 1 do
      Result[i] := TempList[i];
  finally
    TempList.Free;
  end;
end;

procedure TMarkovChain.Learn(const Text: string);
var
  Words: TStringArray;
  i, j: Integer;
  CurrentState: TMarkovState;
  Gram, NextWord: string;
begin
  Words := SplitText(Text);

  for i := 0 to High(Words) - FOrder do
  begin
    // Формируем n-грамму (состояние)
    Gram := Words[i];
    for j := 1 to FOrder - 1 do
      Gram := Gram + ' ' + Words[i + j];

    NextWord := Words[i + FOrder];

    CurrentState := FindState(Gram);
    if not Assigned(CurrentState) then
      CurrentState := CreateState(Gram);

    CurrentState.AddNextWord(NextWord);
  end;
end;

procedure TMarkovChain.LearnFromFile(const FileName: string);
var
  Lines: TStringList;
  i: Integer;
begin
  Lines := TStringList.Create;
  try
    Lines.LoadFromFile(FileName);
    for i := 0 to Lines.Count - 1 do
      Learn(Lines[i]);
  finally
    Lines.Free;
  end;
end;

function TMarkovChain.GenerateSentence(StartWord: string; MaxLength: Integer): string;
var
  CurrentState: TMarkovState;
  NextWord: string;
  Count: Integer;
begin
  Result := StartWord;
  Count := 1;

  CurrentState := FindState(StartWord);
  while Assigned(CurrentState) and (Count < MaxLength) do
  begin
    NextWord := CurrentState.GetRandomNextWord;
    if NextWord = '' then Break;

    Result := Result + ' ' + NextWord;
    Inc(Count);

    CurrentState := FindState(NextWord);
  end;
end;

function TMarkovChain.GeneratePoem(Lines: Integer; LineLength: Integer): string;
var
  i: Integer;
begin
  Result := '';
  for i := 1 to Lines do
  begin
    Result := Result + GenerateSentence('любовь', LineLength) + #13#10;
  end;
end;

function TMarkovChain.GetStateCount: Integer;
begin
  if Assigned(FStates) then
    Result := FStates.Count
  else
    Result := 0;
end;

procedure TMarkovChain.Retrain;
begin
  // Реализация переобучения модели
  FStates.Clear;
  // Здесь можно перезагрузить данные из файла или другого источника
end;

end.