Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 162 additions & 0 deletions dynamic_programming/kadane's_algo.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
# Kadane's Algorithm in R
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This algorithm is already implemented

#
# Finds the contiguous subarray with the largest sum.
# Time Complexity: O(n)
# Space Complexity: O(1) (not counting output subarray)
#
# Applications:
# - Financial time series (max profit window)
# - Signal processing (max energy segment)
# - Pattern detection in sequences
# - As a subroutine in more complex DP/optimization tasks

kadane <- function(arr) {
#' Kadane's algorithm to find maximum subarray sum and its indices
#' @param arr: Numeric vector (can include negatives and positives)
#' @return: A list with fields:
#' max_sum - numeric: maximum subarray sum
#' start - integer: start index of the subarray (1-based), NA if empty input
#' end - integer: end index of the subarray (1-based), NA if empty input
#' subarray- numeric vector: the subarray that gives max_sum (empty if input empty)

n <- length(arr)

# Edge cases
if (n == 0) {
return(list(
max_sum = -Inf,
start = NA_integer_,
end = NA_integer_,
subarray = numeric(0)
))
}

# Initialize with first element (handles all-negative arrays correctly)
max_ending_here <- arr[1]
max_so_far <- arr[1]
s <- 1
start <- 1
end <- 1

if (n >= 2) {
for (i in 2:n) {
# If adding arr[i] to current segment is worse than starting new at arr[i]
if (max_ending_here + arr[i] < arr[i]) {
max_ending_here <- arr[i]
s <- i
} else {
max_ending_here <- max_ending_here + arr[i]
}

# Update best segment if needed
if (max_ending_here > max_so_far) {
max_so_far <- max_ending_here
start <- s
end <- i
}
}
}

return(list(
max_sum = max_so_far,
start = as.integer(start),
end = as.integer(end),
subarray = arr[start:end]
))
}

# Variant: Kadane that returns also when you want first-occurrence vs. any occurrence
kadane_first_occurrence <- function(arr) {
# exactly like kadane() but ties favor earlier segment (current code already does)
kadane(arr)
}

# Helper to pretty-print results
print_kadane_result <- function(res, arr_name="Array") {
cat("Input:", arr_name, "\n")
if (is.na(res$start)) {
cat("Result: empty input\n\n")
return(invisible(NULL))
}
cat("Max Subarray Sum:", res$max_sum, "\n")
cat("Start Index:", res$start, " End Index:", res$end, "\n")
cat("Subarray:", paste(res$subarray, collapse = ", "), "\n\n")
}

# ===========================
# Example Usage & Testing
# ===========================
cat("=== Kadane's Algorithm Tests ===\n\n")

# Test 1: Mixed positive and negative
arr1 <- c(-2, 1, -3, 4, -1, 2, 1, -5, 4)
res1 <- kadane(arr1)
print_kadane_result(res1, "arr1 (mixed)")

# Test 2: All positive
arr2 <- c(2, 3, 1, 4)
res2 <- kadane(arr2)
print_kadane_result(res2, "arr2 (all positive)")

# Test 3: All negative
arr3 <- c(-8, -3, -6, -2, -5, -4)
res3 <- kadane(arr3)
print_kadane_result(res3, "arr3 (all negative)")

# Test 4: Single element
arr4 <- c(5)
res4 <- kadane(arr4)
print_kadane_result(res4, "arr4 (single element)")

# Test 5: Empty array
arr5 <- numeric(0)
res5 <- kadane(arr5)
print_kadane_result(res5, "arr5 (empty)")

# Test 6: Random large array - timing example
set.seed(123)
arr6 <- sample(-100:100, 100000, replace = TRUE)
start_time <- Sys.time()
res6 <- kadane(arr6)
end_time <- Sys.time()
print_kadane_result(res6, "arr6 (large random)")
cat("Elapsed time (seconds):", as.numeric(end_time - start_time, units = "secs"), "\n\n")

# Optional: function to get maximum circular subarray (Kadane + total sum trick)
kadane_circular <- function(arr) {
#' Finds max subarray sum for circular arrays (wrap-around allowed)
#' If all elements are negative, returns max element (non-wrap).
n <- length(arr)
if (n == 0) return(list(max_sum = -Inf, start = NA, end = NA, subarray = numeric(0)))

# Standard Kadane for non-circular max
normal <- kadane(arr)$max_sum

# If all negative, normal already is max element; circular logic would fail
if (all(arr <= 0)) {
return(list(max_sum = normal, start = which.max(arr), end = which.max(arr), subarray = arr[which.max(arr)]))
}

# Max wrap = total_sum - min_subarray_sum
total_sum <- sum(arr)

# Find minimum subarray using Kadane on inverted array
inverted <- -arr
min_sub_sum <- kadane(inverted)$max_sum # this is -min_subarray_sum
max_wrap <- total_sum + min_sub_sum # because min_sub_sum is negative of min subarray

if (max_wrap > normal) {
return(list(max_sum = max_wrap, start = NA, end = NA, subarray = NA)) # indices for wrap-around not computed here
} else {
return(list(max_sum = normal, start = kadane(arr)$start, end = kadane(arr)$end, subarray = kadane(arr)$subarray))
}
}

# Example for circular
cat("=== Circular Kadane Example ===\n")
arrc <- c(8, -1, 3, 4)
res_circ <- kadane_circular(arrc)
cat("Input:", paste(arrc, collapse = ", "), "\n")
cat("Max circular subarray sum:", res_circ$max_sum, "\n\n")

# End of script
61 changes: 61 additions & 0 deletions machine_learning/cnn.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# ==============================================
# Convolutional Neural Network (CNN)
# ==============================================
# Algorithm: Deep learning model using convolutional, pooling, and dense layers.
# Framework: Keras (TensorFlow backend)
#
# Purpose:
# - Automatically extract spatial and hierarchical features from image data.
# - Commonly used for image classification, object detection, and visual recognition.
#
# Architecture Steps:
# 1. Convolution Layer: Extracts local spatial patterns using learnable filters.
# 2. Activation (ReLU): Adds non-linearity by thresholding at zero.
# 3. Pooling Layer: Reduces spatial dimensions (downsampling) while preserving features.
# 4. Flatten Layer: Converts 2D feature maps into 1D vector.
# 5. Dense Layers: Combines extracted features for classification.
# 6. Output Layer: Uses Softmax activation for class probabilities.
#
# Complexity:
# - Time: O(E × N × F × K²) where E=epochs, N=samples, F=filters, K=kernel size
# - Space: O(parameters + feature maps)
#
# Reference:
# LeCun et al., "Gradient-based learning applied to document recognition" (1998)
# https://yann.lecun.com/exdb/lenet/
#
# ==============================================

# Load Required Library
suppressPackageStartupMessages(library(keras))

# Define CNN Architecture as a Function (Reusable)
build_cnn_model <- function(input_shape = c(28, 28, 1), num_classes = 10) {
keras_model_sequential() %>%
layer_conv_2d(
filters = 32, kernel_size = c(3, 3), activation = "relu",
input_shape = input_shape, padding = "same"
) %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_conv_2d(
filters = 64, kernel_size = c(3, 3),
activation = "relu", padding = "same"
) %>%
layer_max_pooling_2d(pool_size = c(2, 2)) %>%
layer_flatten() %>%
layer_dense(units = 128, activation = "relu") %>%
layer_dense(units = num_classes, activation = "softmax")
}

# Example: Display Model Summary (only in interactive sessions)
if (interactive()) {
model <- build_cnn_model()
summary(model)
}

# ==============================================
# Note:
# - This script defines the CNN algorithm structure only.
# - You can compile and train it using model %>% compile() and model %>% fit()
# with any dataset (e.g., MNIST, CIFAR-10).
# ==============================================
87 changes: 87 additions & 0 deletions machine_learning/guassian_process.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# ==============================================
Copy link

Copilot AI Oct 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Filename contains typo: 'guassian_process.r' should be 'gaussian_process.r' (missing 'i' in Gaussian).

Copilot uses AI. Check for mistakes.
Copy link

Copilot AI Oct 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

File extension must be lowercase '.r'. The filename 'guassian_process.r' uses lowercase extension, which is correct, but the filename itself contains a spelling error ('guassian' instead of 'gaussian'). Please rename to 'gaussian_process.r'.

Copilot generated this review using guidance from repository custom instructions.
# Gaussian Process Regression (GP)
# ==============================================
# Algorithm: Non-parametric Bayesian regression using Gaussian Processes.
# Framework: R (kernlab package)
#
# Purpose:
# - Perform regression while providing uncertainty estimates.
# - Useful for small datasets and Bayesian optimization.
#
# Core Idea:
# - Define a prior over functions using a kernel (covariance) function.
# - Update the posterior distribution using observed data.
# - Predictions include mean and variance (uncertainty) for each point.
#
# Complexity:
# - Time: O(n^3) due to inversion of the kernel matrix
# - Space: O(n^2) for storing the kernel matrix
#
# Edge Cases / Notes:
# - Choice of kernel is critical for good performance.
# - Computationally heavy for large datasets; sparse approximations exist.
# - Great for uncertainty quantification in predictions.
#
# Typical Applications:
# - Bayesian optimization
# - Small-data regression tasks
# - Time-series forecasting with uncertainty estimates
#
# Reference:
# Rasmussen, C. E., & Williams, C. K. I. (2006). Gaussian Processes for Machine Learning.
# ==============================================

# Load required library
suppressPackageStartupMessages(library(kernlab))

# ---- Core Functions ----

#' Train a Gaussian Process Regression model
#' @param x Numeric vector or matrix of input features
#' @param y Numeric vector of target values
#' @param kernel Kernel to use (default: "rbfdot")
#' @param ... Additional arguments passed to gausspr
#' @return Trained GP model (kernlab::gausspr object)
gp_train <- function(x, y, kernel = "rbfdot", ...) {
gausspr(
x = as.matrix(x), y = y,
kernel = kernel,
...
)
}

#' Predict using a trained Gaussian Process Regression model
#' @param model Trained GP model (from gp_train)
#' @param x_test Numeric vector or matrix of test inputs
#' @param type Prediction type (default: "response")
#' @param ... Additional arguments passed to predict
#' @return Predicted values
gp_predict <- function(model, x_test, type = "response", ...) {
predict(model, as.matrix(x_test), type = type, ...)
}

# ---- Example Usage (runs only in interactive sessions) ----
if (interactive()) {
# Example Dataset (Synthetic)
set.seed(42)
x <- seq(-5, 5, length.out = 50)
y <- sin(x) + rnorm(length(x), sd = 0.2)

# Train GP model
gp_model <- gp_train(x, y)

# Make Predictions
x_test <- seq(-6, 6, length.out = 100)
y_pred <- gp_predict(gp_model, x_test)

# Plot Results
plot(x, y, main = "Gaussian Process Regression", xlab = "X", ylab = "Y", pch = 19)
lines(x_test, y_pred, col = "blue", lwd = 2)
legend("topright", legend = c("Observations", "GP Prediction"), col = c("black", "blue"), pch = c(19, NA), lty = c(NA, 1))
}
# ==============================================
# Note:
# - This script defines a Gaussian Process Regression model in R.
# - Can be applied to other regression datasets by replacing x and y.
# - For large datasets, consider sparse GP approximations.
# ==============================================
Loading