unit DataUtils;
{$MODE OBJFPC}{$H+}{$RANGECHECKS ON}

{
    Part of AdvancedChatAI.
    For GNU/Linux 64 bit version.
    Version: 1.
    Written on FreePascal (https://freepascal.org/).
    Copyright (C) 2025-2026 Artyomov Alexander
    Used https://chat.deepseek.com/
    http://self-made-free.ru/
    aralni@mail.ru

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as
    published by the Free Software Foundation, either version 3 of the
    License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
}

interface

uses
  SysUtils, Classes,streamex,strutils,Math;

const
  KEY_NAMES: array[0..11] of String = ('C', 'C#', 'D', 'D#', 'E', 'F', 
                                      'F#', 'G', 'G#', 'A', 'A#', 'B');

type
  TNote = packed record
    tone, duration: Word;
  end;
  TNoteArray = array of TNote;
  TDoubleArray = array of Double;
  TDoubleMatrix = array of TDoubleArray;

procedure LoadData(const filename: String; var data: TDoubleMatrix);
procedure NormalizeData(var data: TDoubleMatrix);
procedure AddPolynomialFeatures(var data: TDoubleMatrix; degree: Integer);

implementation

procedure AddPolynomialFeatures(var data: TDoubleMatrix; degree: Integer);
var
  i, j, k, originalCols: Integer;
begin
  if Length(data) = 0 then
    raise Exception.Create('Data is empty');

  originalCols := Length(data[0]);
  for i := 0 to High(data) do
  begin
    for j := 2 to degree do
    begin
      for k := 0 to originalCols - 1 do
      begin
        SetLength(data[i], Length(data[i]) + 1);
        data[i][High(data[i])] := Power(data[i][k], j);
      end;
    end;
  end;
end;

procedure LoadData(const filename: String; var data: TDoubleMatrix);
var
  fileStream: TFileStream;
  reader: TStreamReader;
  line: String;
  i, j, validCols: Integer;
  values: TStringArray;
  value: Double;
  isHeader: Boolean;
begin
  if not FileExists(filename) then
    raise Exception.Create('File not found: ' + filename);

  fileStream := TFileStream.Create(filename, fmOpenRead);
  try
    reader := TStreamReader.Create(fileStream);
    try
      i := 0;
      isHeader := True; // Предполагаем, что первая строка - заголовок
      while not reader.Eof do
      begin
        line := reader.ReadLine;
        values := SplitString(line, ',');
        
        // Пропускаем пустые строки
        if Length(values) = 0 then Continue;
        
        // Пропускаем строку заголовка
        if isHeader then
        begin
          isHeader := False;
          Continue;
        end;
        
        SetLength(data, i + 1);
        validCols := 0;
        
        for j := 0 to High(values) do
        begin
          // Пытаемся преобразовать в число, пропускаем если не удаётся
          if TryStrToFloat(values[j], value) then
          begin
            if validCols >= Length(data[i]) then
              SetLength(data[i], validCols + 1);
            data[i][validCols] := value;
            Inc(validCols);
          end;
        end;
        
        Inc(i);
      end;
    finally
      reader.Free;
    end;
  finally
    fileStream.Free;
  end;
end;

procedure NormalizeData(var data: TDoubleMatrix);
var
  i, j: Integer;
  minVal, maxVal, range: Double;
begin
  if Length(data) = 0 then
    raise Exception.Create('Data is empty');

  for j := 0 to High(data[0]) do
  begin
    minVal := data[0][j];
    maxVal := data[0][j];

    // Находим min и max в столбце
    for i := 0 to High(data) do
    begin
      if data[i][j] < minVal then minVal := data[i][j];
      if data[i][j] > maxVal then maxVal := data[i][j];
    end;

    // Нормализуем с проверкой деления на ноль
    range := maxVal - minVal;
    if range = 0 then
    begin
      // Если все значения одинаковые, устанавливаем в 0.5 или оставляем как есть
      for i := 0 to High(data) do
        data[i][j] := 0.5; // или data[i][j] := data[i][j] - minVal;
    end
    else
    begin
      for i := 0 to High(data) do
        data[i][j] := (data[i][j] - minVal) / range;
    end;
  end;
end;

end.