Skip to content

Commit fb72bda

Browse files
authored
Merge branch 'master' into add-linked-list-data-structure
2 parents 9d30fcf + 05f442d commit fb72bda

File tree

23 files changed

+4931
-2
lines changed

23 files changed

+4931
-2
lines changed

.github/copilot-instructions.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ Before submitting a pull request, verify that your code:
5252

5353
When reviewing a pull request:
5454
- Verify that any added algorithms or data structures aren't already implemented elsewhere in the repository (including under a different name)
55-
- Confirm that the proposed algorithm is a recognized computer-science algorithm, not a problem-specific adaptation of a general technique (e.g., tuned for LeetCode or other competitive-programming problems)
55+
- Confirm that the proposed algorithm is a recognized computer-science algorithm, not a problem-specific adaptation of a general technique (e.g., tuned for LeetCode or other competitive-programming problems). It is prohibited to add LeetCode problems.
5656
- Check that the extension of all code file names is a lowercase `.r`
57-
- Check that DIRECTORY.md was updated correctly
57+
- Check that the newly added algorithm is also added to DIRECTORY.md file
5858
- Verify that the code includes appropriate documentation and examples
5959
- Ensure that variable naming follows repository conventions

data_preprocessing/pca.r

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# ==============================================
2+
# Principal Component Analysis (PCA)
3+
# ==============================================
4+
# Algorithm: Dimensionality reduction using orthogonal linear transformation
5+
# Framework: Base R
6+
#
7+
# Purpose:
8+
# - Reduce high-dimensional data into a smaller set of uncorrelated variables (principal components)
9+
# - Preserve as much variance as possible
10+
#
11+
# Steps:
12+
# 1. Standardize the dataset (zero mean, unit variance)
13+
# 2. Compute the covariance matrix of the standardized data
14+
# 3. Compute eigenvalues and eigenvectors of the covariance matrix
15+
# 4. Sort eigenvectors by decreasing eigenvalues (most variance first)
16+
# 5. Project original data onto top k eigenvectors to get reduced data
17+
#
18+
# Complexity:
19+
# - Time: O(n * d^2 + d^3) where n = samples, d = features
20+
# - Space: O(d^2 + n * d)
21+
#
22+
# Applications:
23+
# - Data visualization, noise reduction, feature extraction
24+
# - Preprocessing for machine learning models
25+
# ==============================================
26+
27+
# PCA Algorithm Implementation (Algorithm only)
28+
pca_algorithm <- function(X, k) {
29+
# Basic input validation (kept minimal to match repo style)
30+
if (is.vector(X)) {
31+
X <- matrix(X, ncol = 1)
32+
}
33+
if (!is.matrix(X) || !is.numeric(X)) {
34+
stop("Input 'X' must be a numeric matrix or vector")
35+
}
36+
d <- ncol(X)
37+
if (k <= 0 || k > d) {
38+
stop("'k' must be between 1 and the number of columns of X")
39+
}
40+
41+
# Step 1: Standardize the data (zero mean, unit variance per feature)
42+
X_std <- scale(X)
43+
44+
# Step 2: Compute covariance matrix of standardized data
45+
cov_matrix <- cov(X_std)
46+
47+
# Step 3: Eigen decomposition (covariance is symmetric)
48+
eig <- eigen(cov_matrix)
49+
eig_values <- eig$values
50+
eig_vectors <- eig$vectors
51+
52+
# Step 4: Select top k principal components (eigenvectors)
53+
top_vectors <- eig_vectors[, 1:k, drop = FALSE]
54+
55+
# Step 5: Project standardized data onto top k components
56+
X_reduced <- X_std %*% top_vectors
57+
58+
return(list(
59+
reduced_data = X_reduced,
60+
components = top_vectors,
61+
eigenvalues = eig_values
62+
))
63+
}
64+
65+
# Example usage (algorithm only)
66+
# set.seed(42)
67+
# X <- matrix(rnorm(50 * 5), nrow = 50, ncol = 5)
68+
# pca_result <- pca_algorithm(X, k = 2)
69+
# head(pca_result$reduced_data)

dynamic_programming/kadanes_algo.r

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
# Kadane's Algorithm in R
2+
#
3+
# Finds the contiguous subarray with the largest sum.
4+
# Time Complexity: O(n)
5+
# Space Complexity: O(1) (not counting output subarray)
6+
#
7+
# Applications:
8+
# - Financial time series (max profit window)
9+
# - Signal processing (max energy segment)
10+
# - Pattern detection in sequences
11+
# - As a subroutine in more complex DP/optimization tasks
12+
13+
kadane <- function(arr) {
14+
#' Kadane's algorithm to find maximum subarray sum and its indices
15+
#' @param arr: Numeric vector (can include negatives and positives)
16+
#' @return: A list with fields:
17+
#' max_sum - numeric: maximum subarray sum
18+
#' start - integer: start index of the subarray (1-based), NA if empty input
19+
#' end - integer: end index of the subarray (1-based), NA if empty input
20+
#' subarray- numeric vector: the subarray that gives max_sum (empty if input empty)
21+
22+
n <- length(arr)
23+
24+
# Edge cases
25+
if (n == 0) {
26+
return(list(
27+
max_sum = -Inf,
28+
start = NA_integer_,
29+
end = NA_integer_,
30+
subarray = numeric(0)
31+
))
32+
}
33+
34+
# Initialize with first element (handles all-negative arrays correctly)
35+
max_ending_here <- arr[1]
36+
max_so_far <- arr[1]
37+
s <- 1
38+
start <- 1
39+
end <- 1
40+
41+
if (n >= 2) {
42+
for (i in 2:n) {
43+
# If adding arr[i] to current segment is worse than starting new at arr[i]
44+
if (max_ending_here + arr[i] < arr[i]) {
45+
max_ending_here <- arr[i]
46+
s <- i
47+
} else {
48+
max_ending_here <- max_ending_here + arr[i]
49+
}
50+
51+
# Update best segment if needed
52+
if (max_ending_here > max_so_far) {
53+
max_so_far <- max_ending_here
54+
start <- s
55+
end <- i
56+
}
57+
}
58+
}
59+
60+
return(list(
61+
max_sum = max_so_far,
62+
start = as.integer(start),
63+
end = as.integer(end),
64+
subarray = arr[start:end]
65+
))
66+
}
67+
68+
# Helper to pretty-print results
69+
print_kadane_result <- function(res, arr_name="Array") {
70+
cat("Input:", arr_name, "\n")
71+
if (is.na(res$start)) {
72+
cat("Result: empty input\n\n")
73+
return(invisible(NULL))
74+
}
75+
cat("Max Subarray Sum:", res$max_sum, "\n")
76+
cat("Start Index:", res$start, " End Index:", res$end, "\n")
77+
cat("Subarray:", paste(res$subarray, collapse = ", "), "\n\n")
78+
}
79+
80+
# ===========================
81+
# Example Usage & Testing
82+
# ===========================
83+
cat("=== Kadane's Algorithm Tests ===\n\n")
84+
85+
# Test 1: Mixed positive and negative
86+
arr1 <- c(-2, 1, -3, 4, -1, 2, 1, -5, 4)
87+
res1 <- kadane(arr1)
88+
print_kadane_result(res1, "arr1 (mixed)")
89+
90+
# Test 2: All positive
91+
arr2 <- c(2, 3, 1, 4)
92+
res2 <- kadane(arr2)
93+
print_kadane_result(res2, "arr2 (all positive)")
94+
95+
# Test 3: All negative
96+
arr3 <- c(-8, -3, -6, -2, -5, -4)
97+
res3 <- kadane(arr3)
98+
print_kadane_result(res3, "arr3 (all negative)")
99+
100+
# Test 4: Single element
101+
arr4 <- c(5)
102+
res4 <- kadane(arr4)
103+
print_kadane_result(res4, "arr4 (single element)")
104+
105+
# Test 5: Empty array
106+
arr5 <- numeric(0)
107+
res5 <- kadane(arr5)
108+
print_kadane_result(res5, "arr5 (empty)")
109+
110+
# Test 6: Random large array - timing example
111+
set.seed(123)
112+
arr6 <- sample(-100:100, 100000, replace = TRUE)
113+
start_time <- Sys.time()
114+
res6 <- kadane(arr6)
115+
end_time <- Sys.time()
116+
print_kadane_result(res6, "arr6 (large random)")
117+
cat("Elapsed time (seconds):", as.numeric(end_time - start_time, units = "secs"), "\n\n")
118+
119+
# Optional: function to get maximum circular subarray (Kadane + total sum trick)
120+
kadane_circular <- function(arr) {
121+
#' Finds max subarray sum for circular arrays (wrap-around allowed)
122+
#' If all elements are negative, returns max element (non-wrap).
123+
n <- length(arr)
124+
if (n == 0) return(list(max_sum = -Inf, start = NA, end = NA, subarray = numeric(0)))
125+
126+
# Standard Kadane for non-circular max
127+
normal <- kadane(arr)$max_sum
128+
129+
# If all negative, normal already is max element; circular logic would fail
130+
if (all(arr <= 0)) {
131+
return(list(max_sum = normal, start = which.max(arr), end = which.max(arr), subarray = arr[which.max(arr)]))
132+
}
133+
134+
# Max wrap = total_sum - min_subarray_sum
135+
total_sum <- sum(arr)
136+
137+
# Find minimum subarray using Kadane on inverted array
138+
inverted <- -arr
139+
min_sub_sum <- kadane(inverted)$max_sum # this is -min_subarray_sum
140+
max_wrap <- total_sum + min_sub_sum # because min_sub_sum is negative of min subarray
141+
142+
if (max_wrap > normal) {
143+
return(list(max_sum = max_wrap, start = NA, end = NA, subarray = NA)) # indices for wrap-around not computed here
144+
} else {
145+
normal_result <- kadane(arr)
146+
return(list(max_sum = normal, start = normal_result$start, end = normal_result$end, subarray = normal_result$subarray))
147+
}
148+
}
149+
150+
# Example for circular
151+
cat("=== Circular Kadane Example ===\n")
152+
arrc <- c(8, -1, 3, 4)
153+
res_circ <- kadane_circular(arrc)
154+
cat("Input:", paste(arrc, collapse = ", "), "\n")
155+
cat("Max circular subarray sum:", res_circ$max_sum, "\n\n")
156+
157+
# End of script

0 commit comments

Comments
 (0)