unit KMeans;

{
    Part of AdvancedChatAI.
    For GNU/Linux 64 bit version.
    Version: 1.
    Written on FreePascal (https://freepascal.org/).
    Copyright (C) 2025-2026 Artyomov Alexander
    Used https://chat.deepseek.com/
    http://self-made-free.ru/
    aralni@mail.ru

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
}

{$MODE OBJFPC}{$H+}{$RANGECHECKS ON}
interface

uses
  SysUtils, Math, DataUtils;

type
TIntegerArray = array of integer;
  TKMeans = record
    centroids: TDoubleMatrix;  // Центроиды кластеров
    labels: TIntegerArray;     // Метки кластеров для каждой точки
  end;

function ArraysEqual(const a, b: TDoubleMatrix): Boolean;
procedure TrainKMeans(var model: TKMeans; const x: TDoubleMatrix; k: Integer; maxIter: Integer = 100);
function PredictKMeans(const model: TKMeans; const x: TDoubleArray): Integer;

implementation

function EuclideanDistance(const a, b: TDoubleArray): Double;
var
  i: Integer;
begin
  Result := 0;
  for i := 0 to High(a) do
    Result := Result + Sqr(a[i] - b[i]);
  Result := Sqrt(Result);
end;

function ArraysEqual(const a, b: TDoubleMatrix): Boolean;
var
  i, j: Integer;
begin
  if Length(a) <> Length(b) then
    Exit(False);
  for i := 0 to High(a) do
  begin
    if Length(a[i]) <> Length(b[i]) then
      Exit(False);
    for j := 0 to High(a[i]) do
      if a[i][j] <> b[i][j] then
        Exit(False);
  end;
  Result := True;
end;

procedure TrainKMeans(var model: TKMeans; const x: TDoubleMatrix; k: Integer; maxIter: Integer = 100);
var
  i, j, l, iter: Integer;
  minDist, dist: Double;
  clusterSizes: TIntegerArray;
  newCentroids: TDoubleMatrix;
begin
  if Length(x) = 0 then
    raise Exception.Create('Данные не загружены');

  if k > Length(x) then
    raise Exception.Create('Количество кластеров больше количества точек');

  // Инициализация центроидов случайными точками
  SetLength(model.centroids, k, Length(x[0]));
  for i := 0 to k - 1 do
    model.centroids[i] := Copy(x[Random(Length(x))], 0, Length(x[0]));

  SetLength(model.labels, Length(x));
  SetLength(clusterSizes, k);
  SetLength(newCentroids, k, Length(x[0]));

  for iter := 1 to maxIter do
  begin
    // Шаг 1: Назначение точек кластерам
    for i := 0 to High(x) do
    begin
      minDist := MaxDouble;
      for j := 0 to k - 1 do
      begin
        dist := EuclideanDistance(x[i], model.centroids[j]);
        if dist < minDist then
        begin
          minDist := dist;
          model.labels[i] := j;
        end;
      end;
    end;

    // Шаг 2: Пересчет центроидов
    for j := 0 to k - 1 do
    begin
      for l := 0 to High(newCentroids[j]) do
        newCentroids[j][l] := 0;
      clusterSizes[j] := 0;
    end;

    for i := 0 to High(x) do
    begin
      for l := 0 to High(x[i]) do
        newCentroids[model.labels[i]][l] := newCentroids[model.labels[i]][l] + x[i][l];
      Inc(clusterSizes[model.labels[i]]);
    end;

    for j := 0 to k - 1 do
    begin
      if clusterSizes[j] > 0 then
        for l := 0 to High(newCentroids[j]) do
          newCentroids[j][l] := newCentroids[j][l] / clusterSizes[j]
      else
        // Если кластер пуст, переназначаем центроид случайной точке
        newCentroids[j] := Copy(x[Random(Length(x))], 0, Length(x[0]));
    end;

    // Проверка на сходимость
    if ArraysEqual(model.centroids, newCentroids) then
      Break;

    model.centroids := newCentroids;
  end;
end;

function PredictKMeans(const model: TKMeans; const x: TDoubleArray): Integer;
var
  j: Integer;
  minDist, dist: Double;
begin
  minDist := MaxDouble;
  Result := -1;
  for j := 0 to High(model.centroids) do
  begin
    dist := EuclideanDistance(x, model.centroids[j]);
    if dist < minDist then
    begin
      minDist := dist;
      Result := j;
    end;
  end;
end;

end.