-
-
Notifications
You must be signed in to change notification settings - Fork 342
Add Gaussian Process Regression #235
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 8 commits
457a1f3
d51320b
563abaa
5676cd6
a46a2df
a084630
606ef9d
0c2f369
3afbc04
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,162 @@ | ||
| # Kadane's Algorithm in R | ||
siriak marked this conversation as resolved.
Show resolved
Hide resolved
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This algorithm is already implemented |
||
| # | ||
| # Finds the contiguous subarray with the largest sum. | ||
| # Time Complexity: O(n) | ||
| # Space Complexity: O(1) (not counting output subarray) | ||
| # | ||
| # Applications: | ||
| # - Financial time series (max profit window) | ||
| # - Signal processing (max energy segment) | ||
| # - Pattern detection in sequences | ||
| # - As a subroutine in more complex DP/optimization tasks | ||
|
|
||
| kadane <- function(arr) { | ||
| #' Kadane's algorithm to find maximum subarray sum and its indices | ||
| #' @param arr: Numeric vector (can include negatives and positives) | ||
| #' @return: A list with fields: | ||
| #' max_sum - numeric: maximum subarray sum | ||
| #' start - integer: start index of the subarray (1-based), NA if empty input | ||
| #' end - integer: end index of the subarray (1-based), NA if empty input | ||
| #' subarray- numeric vector: the subarray that gives max_sum (empty if input empty) | ||
|
|
||
| n <- length(arr) | ||
|
|
||
| # Edge cases | ||
| if (n == 0) { | ||
| return(list( | ||
| max_sum = -Inf, | ||
| start = NA_integer_, | ||
| end = NA_integer_, | ||
| subarray = numeric(0) | ||
| )) | ||
| } | ||
|
|
||
| # Initialize with first element (handles all-negative arrays correctly) | ||
| max_ending_here <- arr[1] | ||
| max_so_far <- arr[1] | ||
| s <- 1 | ||
| start <- 1 | ||
| end <- 1 | ||
|
|
||
| if (n >= 2) { | ||
| for (i in 2:n) { | ||
| # If adding arr[i] to current segment is worse than starting new at arr[i] | ||
| if (max_ending_here + arr[i] < arr[i]) { | ||
| max_ending_here <- arr[i] | ||
| s <- i | ||
| } else { | ||
| max_ending_here <- max_ending_here + arr[i] | ||
| } | ||
|
|
||
| # Update best segment if needed | ||
| if (max_ending_here > max_so_far) { | ||
| max_so_far <- max_ending_here | ||
| start <- s | ||
| end <- i | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return(list( | ||
| max_sum = max_so_far, | ||
| start = as.integer(start), | ||
| end = as.integer(end), | ||
| subarray = arr[start:end] | ||
| )) | ||
| } | ||
|
|
||
| # Variant: Kadane that returns also when you want first-occurrence vs. any occurrence | ||
| kadane_first_occurrence <- function(arr) { | ||
| # exactly like kadane() but ties favor earlier segment (current code already does) | ||
| kadane(arr) | ||
| } | ||
|
|
||
| # Helper to pretty-print results | ||
| print_kadane_result <- function(res, arr_name="Array") { | ||
| cat("Input:", arr_name, "\n") | ||
| if (is.na(res$start)) { | ||
| cat("Result: empty input\n\n") | ||
| return(invisible(NULL)) | ||
| } | ||
| cat("Max Subarray Sum:", res$max_sum, "\n") | ||
| cat("Start Index:", res$start, " End Index:", res$end, "\n") | ||
| cat("Subarray:", paste(res$subarray, collapse = ", "), "\n\n") | ||
| } | ||
|
|
||
| # =========================== | ||
| # Example Usage & Testing | ||
| # =========================== | ||
| cat("=== Kadane's Algorithm Tests ===\n\n") | ||
|
|
||
| # Test 1: Mixed positive and negative | ||
| arr1 <- c(-2, 1, -3, 4, -1, 2, 1, -5, 4) | ||
| res1 <- kadane(arr1) | ||
| print_kadane_result(res1, "arr1 (mixed)") | ||
|
|
||
| # Test 2: All positive | ||
| arr2 <- c(2, 3, 1, 4) | ||
| res2 <- kadane(arr2) | ||
| print_kadane_result(res2, "arr2 (all positive)") | ||
|
|
||
| # Test 3: All negative | ||
| arr3 <- c(-8, -3, -6, -2, -5, -4) | ||
| res3 <- kadane(arr3) | ||
| print_kadane_result(res3, "arr3 (all negative)") | ||
|
|
||
| # Test 4: Single element | ||
| arr4 <- c(5) | ||
| res4 <- kadane(arr4) | ||
| print_kadane_result(res4, "arr4 (single element)") | ||
siriak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| # Test 5: Empty array | ||
| arr5 <- numeric(0) | ||
| res5 <- kadane(arr5) | ||
| print_kadane_result(res5, "arr5 (empty)") | ||
|
|
||
| # Test 6: Random large array - timing example | ||
| set.seed(123) | ||
| arr6 <- sample(-100:100, 100000, replace = TRUE) | ||
| start_time <- Sys.time() | ||
| res6 <- kadane(arr6) | ||
| end_time <- Sys.time() | ||
| print_kadane_result(res6, "arr6 (large random)") | ||
| cat("Elapsed time (seconds):", as.numeric(end_time - start_time, units = "secs"), "\n\n") | ||
siriak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| # Optional: function to get maximum circular subarray (Kadane + total sum trick) | ||
| kadane_circular <- function(arr) { | ||
| #' Finds max subarray sum for circular arrays (wrap-around allowed) | ||
| #' If all elements are negative, returns max element (non-wrap). | ||
| n <- length(arr) | ||
| if (n == 0) return(list(max_sum = -Inf, start = NA, end = NA, subarray = numeric(0))) | ||
|
|
||
| # Standard Kadane for non-circular max | ||
| normal <- kadane(arr)$max_sum | ||
siriak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| # If all negative, normal already is max element; circular logic would fail | ||
| if (all(arr <= 0)) { | ||
| return(list(max_sum = normal, start = which.max(arr), end = which.max(arr), subarray = arr[which.max(arr)])) | ||
| } | ||
|
|
||
| # Max wrap = total_sum - min_subarray_sum | ||
| total_sum <- sum(arr) | ||
|
|
||
| # Find minimum subarray using Kadane on inverted array | ||
| inverted <- -arr | ||
| min_sub_sum <- kadane(inverted)$max_sum # this is -min_subarray_sum | ||
| max_wrap <- total_sum + min_sub_sum # because min_sub_sum is negative of min subarray | ||
|
|
||
| if (max_wrap > normal) { | ||
| return(list(max_sum = max_wrap, start = NA, end = NA, subarray = NA)) # indices for wrap-around not computed here | ||
| } else { | ||
| return(list(max_sum = normal, start = kadane(arr)$start, end = kadane(arr)$end, subarray = kadane(arr)$subarray)) | ||
AtharvaPatange marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
AtharvaPatange marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| # Example for circular | ||
| cat("=== Circular Kadane Example ===\n") | ||
| arrc <- c(8, -1, 3, 4) | ||
| res_circ <- kadane_circular(arrc) | ||
| cat("Input:", paste(arrc, collapse = ", "), "\n") | ||
| cat("Max circular subarray sum:", res_circ$max_sum, "\n\n") | ||
|
|
||
| # End of script | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,61 @@ | ||
| # ============================================== | ||
| # Convolutional Neural Network (CNN) | ||
| # ============================================== | ||
| # Algorithm: Deep learning model using convolutional, pooling, and dense layers. | ||
| # Framework: Keras (TensorFlow backend) | ||
| # | ||
| # Purpose: | ||
| # - Automatically extract spatial and hierarchical features from image data. | ||
| # - Commonly used for image classification, object detection, and visual recognition. | ||
| # | ||
| # Architecture Steps: | ||
| # 1. Convolution Layer: Extracts local spatial patterns using learnable filters. | ||
| # 2. Activation (ReLU): Adds non-linearity by thresholding at zero. | ||
| # 3. Pooling Layer: Reduces spatial dimensions (downsampling) while preserving features. | ||
| # 4. Flatten Layer: Converts 2D feature maps into 1D vector. | ||
| # 5. Dense Layers: Combines extracted features for classification. | ||
| # 6. Output Layer: Uses Softmax activation for class probabilities. | ||
| # | ||
| # Complexity: | ||
| # - Time: O(E × N × F × K²) where E=epochs, N=samples, F=filters, K=kernel size | ||
| # - Space: O(parameters + feature maps) | ||
| # | ||
| # Reference: | ||
| # LeCun et al., "Gradient-based learning applied to document recognition" (1998) | ||
| # https://yann.lecun.com/exdb/lenet/ | ||
| # | ||
| # ============================================== | ||
|
|
||
| # Load Required Library | ||
| suppressPackageStartupMessages(library(keras)) | ||
|
|
||
| # Define CNN Architecture as a Function (Reusable) | ||
| build_cnn_model <- function(input_shape = c(28, 28, 1), num_classes = 10) { | ||
| keras_model_sequential() %>% | ||
| layer_conv_2d( | ||
| filters = 32, kernel_size = c(3, 3), activation = "relu", | ||
| input_shape = input_shape, padding = "same" | ||
| ) %>% | ||
| layer_max_pooling_2d(pool_size = c(2, 2)) %>% | ||
| layer_conv_2d( | ||
| filters = 64, kernel_size = c(3, 3), | ||
| activation = "relu", padding = "same" | ||
| ) %>% | ||
| layer_max_pooling_2d(pool_size = c(2, 2)) %>% | ||
| layer_flatten() %>% | ||
| layer_dense(units = 128, activation = "relu") %>% | ||
| layer_dense(units = num_classes, activation = "softmax") | ||
| } | ||
|
|
||
| # Example: Display Model Summary (only in interactive sessions) | ||
| if (interactive()) { | ||
| model <- build_cnn_model() | ||
| summary(model) | ||
| } | ||
|
|
||
| # ============================================== | ||
| # Note: | ||
| # - This script defines the CNN algorithm structure only. | ||
| # - You can compile and train it using model %>% compile() and model %>% fit() | ||
| # with any dataset (e.g., MNIST, CIFAR-10). | ||
| # ============================================== |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| # ============================================== | ||
|
||
| # Gaussian Process Regression (GP) | ||
| # ============================================== | ||
AtharvaPatange marked this conversation as resolved.
Show resolved
Hide resolved
AtharvaPatange marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| # Algorithm: Non-parametric Bayesian regression using Gaussian Processes. | ||
| # Framework: R (kernlab package) | ||
| # | ||
| # Purpose: | ||
| # - Perform regression while providing uncertainty estimates. | ||
| # - Useful for small datasets and Bayesian optimization. | ||
| # | ||
| # Core Idea: | ||
| # - Define a prior over functions using a kernel (covariance) function. | ||
| # - Update the posterior distribution using observed data. | ||
| # - Predictions include mean and variance (uncertainty) for each point. | ||
siriak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| # | ||
AtharvaPatange marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| # Complexity: | ||
| # - Time: O(n^3) due to inversion of the kernel matrix | ||
| # - Space: O(n^2) for storing the kernel matrix | ||
| # | ||
| # Edge Cases / Notes: | ||
| # - Choice of kernel is critical for good performance. | ||
| # - Computationally heavy for large datasets; sparse approximations exist. | ||
| # - Great for uncertainty quantification in predictions. | ||
| # | ||
| # Typical Applications: | ||
| # - Bayesian optimization | ||
| # - Small-data regression tasks | ||
| # - Time-series forecasting with uncertainty estimates | ||
| # | ||
| # Reference: | ||
| # Rasmussen, C. E., & Williams, C. K. I. (2006). Gaussian Processes for Machine Learning. | ||
| # ============================================== | ||
|
|
||
| # Load required library | ||
| suppressPackageStartupMessages(library(kernlab)) | ||
|
|
||
| # ---- Core Functions ---- | ||
|
|
||
| #' Train a Gaussian Process Regression model | ||
| #' @param x Numeric vector or matrix of input features | ||
| #' @param y Numeric vector of target values | ||
| #' @param kernel Kernel to use (default: "rbfdot") | ||
| #' @param ... Additional arguments passed to gausspr | ||
| #' @return Trained GP model (kernlab::gausspr object) | ||
| gp_train <- function(x, y, kernel = "rbfdot", ...) { | ||
| gausspr( | ||
| x = as.matrix(x), y = y, | ||
| kernel = kernel, | ||
| ... | ||
| ) | ||
| } | ||
|
|
||
| #' Predict using a trained Gaussian Process Regression model | ||
| #' @param model Trained GP model (from gp_train) | ||
| #' @param x_test Numeric vector or matrix of test inputs | ||
| #' @param type Prediction type (default: "response") | ||
| #' @param ... Additional arguments passed to predict | ||
| #' @return Predicted values | ||
| gp_predict <- function(model, x_test, type = "response", ...) { | ||
| predict(model, as.matrix(x_test), type = type, ...) | ||
| } | ||
siriak marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| # ---- Example Usage (runs only in interactive sessions) ---- | ||
| if (interactive()) { | ||
| # Example Dataset (Synthetic) | ||
| set.seed(42) | ||
| x <- seq(-5, 5, length.out = 50) | ||
| y <- sin(x) + rnorm(length(x), sd = 0.2) | ||
|
|
||
| # Train GP model | ||
| gp_model <- gp_train(x, y) | ||
|
|
||
| # Make Predictions | ||
| x_test <- seq(-6, 6, length.out = 100) | ||
| y_pred <- gp_predict(gp_model, x_test) | ||
|
|
||
| # Plot Results | ||
| plot(x, y, main = "Gaussian Process Regression", xlab = "X", ylab = "Y", pch = 19) | ||
| lines(x_test, y_pred, col = "blue", lwd = 2) | ||
| legend("topright", legend = c("Observations", "GP Prediction"), col = c("black", "blue"), pch = c(19, NA), lty = c(NA, 1)) | ||
| } | ||
| # ============================================== | ||
| # Note: | ||
| # - This script defines a Gaussian Process Regression model in R. | ||
| # - Can be applied to other regression datasets by replacing x and y. | ||
| # - For large datasets, consider sparse GP approximations. | ||
| # ============================================== | ||
Uh oh!
There was an error while loading. Please reload this page.