From 8b30acf58c4636541e76801c4f9f22dfcd53d85b Mon Sep 17 00:00:00 2001 From: Prathamesh Kalshetti Date: Sat, 18 Oct 2025 11:44:39 +0530 Subject: [PATCH 01/10] bidirectional_bfs --- graph_algorithms/bidirectional_bfs.r | 153 +++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 graph_algorithms/bidirectional_bfs.r diff --git a/graph_algorithms/bidirectional_bfs.r b/graph_algorithms/bidirectional_bfs.r new file mode 100644 index 00000000..8daf6f38 --- /dev/null +++ b/graph_algorithms/bidirectional_bfs.r @@ -0,0 +1,153 @@ +# ============================================================== +# Bidirectional Breadth-First Search (BFS) Shortest Path Algorithm +# ============================================================== +# +# Description: +# Finds the shortest path between a source and target in an +# unweighted graph using Bidirectional BFS. +# +# Time Complexity: O(b^(d/2)) — much faster than normal BFS O(b^d) +# Space Complexity: O(V) +# +# Input: +# graph - adjacency list (list of integer vectors) +# source - integer (starting vertex) +# target - integer (destination vertex) +# +# Output: +# A list containing: +# path - vector of vertices representing the path +# distance - number of edges in the shortest path +# found - logical flag (TRUE if path found, else FALSE) +# +# Example usage at bottom of file. +# ============================================================== + +bidirectional_bfs <- function(graph, source, target) { + if (source == target) { + return(list(path = c(source), distance = 0, found = TRUE)) + } + + # Initialize BFS from both ends + visited_from_source <- setNames(rep(FALSE, length(graph)), names(graph)) + visited_from_target <- setNames(rep(FALSE, length(graph)), names(graph)) + + parent_from_source <- rep(NA, length(graph)) + parent_from_target <- rep(NA, length(graph)) + + queue_source <- c(source) + queue_target <- c(target) + + visited_from_source[source] <- TRUE + visited_from_target[target] <- TRUE + + meeting_node <- NA + + # Function to check intersection + get_intersection <- function() { + common <- which(visited_from_source & visited_from_target) + if (length(common) > 0) return(common[1]) + return(NA) + } + + # Main loop + while (length(queue_source) > 0 && length(queue_target) > 0) { + # Expand one level from source side + next_queue <- c() + for (u in queue_source) { + for (v in graph[[as.character(u)]]) { + if (!visited_from_source[v]) { + visited_from_source[v] <- TRUE + parent_from_source[v] <- u + next_queue <- c(next_queue, v) + } + } + } + queue_source <- next_queue + + # Check intersection + meeting_node <- get_intersection() + if (!is.na(meeting_node)) break + + # Expand one level from target side + next_queue <- c() + for (u in queue_target) { + for (v in graph[[as.character(u)]]) { + if (!visited_from_target[v]) { + visited_from_target[v] <- TRUE + parent_from_target[v] <- u + next_queue <- c(next_queue, v) + } + } + } + queue_target <- next_queue + + # Check intersection again + meeting_node <- get_intersection() + if (!is.na(meeting_node)) break + } + + if (is.na(meeting_node)) { + return(list(path = NULL, distance = Inf, found = FALSE)) + } + + # Reconstruct path from source → meeting_node + path1 <- c() + node <- meeting_node + while (!is.na(node)) { + path1 <- c(node, path1) + node <- parent_from_source[node] + } + + # Reconstruct path from meeting_node → target + path2 <- c() + node <- parent_from_target[meeting_node] + while (!is.na(node)) { + path2 <- c(path2, node) + node <- parent_from_target[node] + } + + full_path <- c(path1, path2) + return(list(path = full_path, distance = length(full_path) - 1, found = TRUE)) +} + +# ============================================================== +# Example Usage and Test +# ============================================================== + +cat("=== Bidirectional BFS Shortest Path ===\n") + +# Example Graph (Unweighted) +# 1 -- 2 -- 3 +# | | +# 4 -- 5 -- 6 + +graph <- list( + "1" = c(2, 4), + "2" = c(1, 3, 5), + "3" = c(2, 6), + "4" = c(1, 5), + "5" = c(2, 4, 6), + "6" = c(3, 5) +) + +cat("Graph adjacency list:\n") +for (v in names(graph)) { + cat("Vertex", v, "-> [", paste(graph[[v]], collapse = ", "), "]\n") +} + +cat("\nRunning Bidirectional BFS from 1 to 6...\n") +result <- bidirectional_bfs(graph, 1, 6) + +if (result$found) { + cat("Shortest Path Found!\n") + cat("Path:", paste(result$path, collapse = " -> "), "\n") + cat("Distance:", result$distance, "\n") +} else { + cat("No path found between source and target.\n") +} + return(list( + distances = distances, + predecessor = predecessor, + found = found + )) \ No newline at end of file From 7a501168347c694b757fbd9ff0840f5126657463 Mon Sep 17 00:00:00 2001 From: Prathamesh Kalshetti Date: Sun, 19 Oct 2025 14:21:44 +0530 Subject: [PATCH 02/10] feat-graph_colouring --- dynamic_programming/viterbi.r | 113 ++++++++ et --soft HEAD~1 | 461 ++++++++++++++++++++++++++++++ graph_algorithms/graph_coloring.r | 252 ++++++++++++++++ 3 files changed, 826 insertions(+) create mode 100644 dynamic_programming/viterbi.r create mode 100644 et --soft HEAD~1 create mode 100644 graph_algorithms/graph_coloring.r diff --git a/dynamic_programming/viterbi.r b/dynamic_programming/viterbi.r new file mode 100644 index 00000000..7d656562 --- /dev/null +++ b/dynamic_programming/viterbi.r @@ -0,0 +1,113 @@ +# ============================================================== +# Viterbi Algorithm — Hidden Markov Model (HMM) Decoding +# ============================================================== +# +# Description: +# The Viterbi algorithm finds the most probable sequence of +# hidden states (state path) that results in a given sequence of +# observed events in a Hidden Markov Model. +# +# Time Complexity: O(N * T) +# - N = number of hidden states +# - T = length of observation sequence +# +# Space Complexity: O(N * T) +# +# Input: +# states - vector of hidden states +# observations - vector of observed symbols +# start_prob - named vector of initial probabilities (state → prob) +# trans_prob - matrix of transition probabilities (from_state → to_state) +# emit_prob - matrix of emission probabilities (state → observation) +# +# Output: +# A list containing: +# best_path - most probable state sequence +# best_prob - probability of the best path +# +# Example usage provided at bottom of file. +# ============================================================== + +viterbi <- function(states, observations, start_prob, trans_prob, emit_prob) { + N <- length(states) + T_len <- length(observations) + + # Initialize matrices + V <- matrix(0, nrow = N, ncol = T_len) # probability table + path <- matrix(NA, nrow = N, ncol = T_len) # backpointer table + + # Initialization step + for (i in 1:N) { + V[i, 1] <- start_prob[states[i]] * emit_prob[states[i], observations[1]] + path[i, 1] <- 0 + } + + # Recursion step + for (t in 2:T_len) { + for (j in 1:N) { + probs <- V[, t - 1] * trans_prob[, states[j]] * emit_prob[states[j], observations[t]] + V[j, t] <- max(probs) + path[j, t] <- which.max(probs) + } + } + + # Termination step + best_last_state <- which.max(V[, T_len]) + best_prob <- V[best_last_state, T_len] + + # Backtrack the best path + best_path <- rep(NA, T_len) + best_path[T_len] <- best_last_state + + for (t in (T_len - 1):1) { + best_path[t] <- path[best_path[t + 1], t + 1] + } + + best_state_sequence <- states[best_path] + + return(list( + best_path = best_state_sequence, + best_prob = best_prob + )) +} + +# ============================================================== +# Example Usage and Test +# ============================================================== + +cat("=== Viterbi Algorithm — Hidden Markov Model ===\n") + +# Example: Weather HMM +# States: Rainy, Sunny +# Observations: walk, shop, clean +states <- c("Rainy", "Sunny") +observations <- c("walk", "shop", "clean") + +# Start probabilities +start_prob <- c(Rainy = 0.6, Sunny = 0.4) + +# Transition probabilities +trans_prob <- matrix(c( + 0.7, 0.3, # from Rainy to (Rainy, Sunny) + 0.4, 0.6 # from Sunny to (Rainy, Sunny) +), nrow = 2, byrow = TRUE) +rownames(trans_prob) <- states +colnames(trans_prob) <- states + +# Emission probabilities +emit_prob <- matrix(c( + 0.1, 0.4, 0.5, # Rainy emits (walk, shop, clean) + 0.6, 0.3, 0.1 # Sunny emits (walk, shop, clean) +), nrow = 2, byrow = TRUE) +rownames(emit_prob) <- states +colnames(emit_prob) <- observations + +# Observed sequence +obs_seq <- c("walk", "shop", "clean") + +cat("Observation sequence:", paste(obs_seq, collapse = ", "), "\n") +result <- viterbi(states, obs_seq, start_prob, trans_prob, emit_prob) + +cat("Most probable state sequence:\n") +cat(paste(result$best_path, collapse = " -> "), "\n") +cat("Probability of this sequence:", result$best_prob, "\n") diff --git a/et --soft HEAD~1 b/et --soft HEAD~1 new file mode 100644 index 00000000..e9c4e0df --- /dev/null +++ b/et --soft HEAD~1 @@ -0,0 +1,461 @@ +commit 7d4b7af52036b21abf54435f14250ef170351389 (HEAD -> Graph_colouring) +Author: Prathamesh Kalshetti +Date: Sun Oct 19 13:56:38 2025 +0530 + + graph_colouring + +commit 4921341b2921457245427d4b465e0eb1478f28e6 (origin/feat-viterbi, feat-viterbi, feat-bidirectional_bfs) +Author: Prathamesh Kalshetti +Date: Sat Oct 18 12:35:09 2025 +0530 + + viterbi + +commit 8b30acf58c4636541e76801c4f9f22dfcd53d85b (origin/feat-bidirectional_bfs, master) +Author: Prathamesh Kalshetti +Date: Sat Oct 18 11:44:39 2025 +0530 + + bidirectional_bfs + +commit b083bc9dcffa63af4210cb5b8664246a4a0cf941 (origin/master, origin/HEAD) +Author: PIYUSH KUMAR SINGH <121piyush466mits@gmail.com> +Date: Thu Oct 16 20:02:54 2025 +0530 + + Add Knuth-Morris-Pratt (KMP) string matching algorithm [HACKTOBERFEST 2025] (#157) + +commit 1ae8bda8c22c4d580f7c6e7cdc457125328acadb +Author: Pratik +Date: Wed Oct 15 03:15:51 2025 +0530 + + [FEATURE] Add Jump Search Algorithm Implementation in R (#216) + +commit 65e82bffad452bcc0237df9551acfc90e3d69243 +Author: Pratik +Date: Sun Oct 12 19:04:20 2025 +0530 + + Add comprehensive Value at Risk (VaR) and Expected Shortfall (ES) calculator (#201) + +commit a5cbeecd63b6efedd7fc9a94d61143e73115a0dc +Author: Pratik +Date: Sun Oct 12 19:00:13 2025 +0530 + + Add Time Series Analysis and ARIMA Modeling Implementation in R (#202) + +commit d711ac1f0db4909653825ae531968c4393e27fff +Author: Andrii Siriak +Date: Sun Oct 12 16:26:48 2025 +0300 + + Create stale.yml + +commit 69a0c3045cade6b7c8409f55a0d4b5ae29a1b083 +Author: Pratik +Date: Sun Oct 12 18:53:20 2025 +0530 + + Add Floyd–Warshall All-Pairs Shortest Path Algorithm Implementation in R (#203) + +commit 6d15d42ac89877ab1a54cf5707c03cc3a659947e +Author: Pratik +Date: Sun Oct 12 15:18:30 2025 +0530 + + created a comprehensive Black-Scholes option pricing algorithm (#200) + +commit 79ca778cf37b2fca295f893ed00785e61f2396a8 +Author: Pratik +Date: Sun Oct 12 15:18:20 2025 +0530 + + Implement Gradient Boosting Regressor with Decision Trees in R (#199) + +commit e6c0b52d8e1300b88c6971e45f87745425bcf4a4 +Author: Pratik +Date: Sun Oct 12 15:18:11 2025 +0530 + + Tarjan's Bridge Finding Algorithm (#198) + +commit b811a36c4f33cf4551f3196a1735319f861a9bb5 +Author: Pratik +Date: Sun Oct 12 13:42:20 2025 +0530 + + feat : Add the Bellman-Ford Shortest Path Algorithm in R (#192) + +commit 47ff5ed6db5ae97765218b89073c2ea96b8cc5e7 +Author: Srishti Soni <92056170+shimmer12@users.noreply.github.com> +Date: Sun Oct 12 02:41:13 2025 +0530 + + Add modular exponentiation function (#194) + +commit b5d1199c77508d1bb99c4c7bda1cb44a19570556 +Author: Srishti Soni <92056170+shimmer12@users.noreply.github.com> +Date: Sun Oct 12 02:40:30 2025 +0530 + + Implement Newton-Raphson method in R (#196) + +commit b90abace62e50712a58eba9ff717f89fe87434de +Author: Srishti Soni <92056170+shimmer12@users.noreply.github.com> +Date: Sun Oct 12 02:38:56 2025 +0530 + + Add one-way ANOVA function implementation (#195) + +commit 7a5ea9c1aa0c3e79557515e083b646c408a84d48 +Author: Sachin Pangal <151670745+Orthodox-64@users.noreply.github.com> +Date: Sun Oct 12 02:21:20 2025 +0530 + + Implemented the Subset Sum Problem algorithm in R. (#171) + +commit 52f65cfbc046862917dc33a28cff489cca0fdfbb +Author: Pratik +Date: Sun Oct 12 02:19:57 2025 +0530 + + Add Catalan Numbers algorithm implementation (#191) + +commit aa686ce907b27380ca30a23afa1f6d06a12766e4 +Author: Sachin Pangal <151670745+Orthodox-64@users.noreply.github.com> +Date: Sat Oct 11 16:25:05 2025 +0530 + + Implemented the Minimum Path Sum algorithm in R. (#172) + +commit e33a9c68b50d209d76c019a0456c2422664ee535 +Author: Arpita Roy <100989922+Arpita23r@users.noreply.github.com> +Date: Sat Oct 11 16:23:24 2025 +0530 + + feat: added ternary search (#177) + +commit 4c17308965c2067587ca2b4c70b23ed632fda5f9 +Author: Sachin Pangal <151670745+Orthodox-64@users.noreply.github.com> +Date: Sat Oct 11 16:19:47 2025 +0530 + + Implemented the Matrix Chain Multiplication algorithm in R. (#173) + +commit 66bc83f2bf75054b3f732036600966aa5c1022d8 +Author: Copilot <198982749+Copilot@users.noreply.github.com> +Date: Sat Oct 11 13:13:01 2025 +0300 + + Move misplaced algorithms to correct folders (#188) + +commit ad44b946ce0e0f47a94719f0f11b4eaaa1631661 +Author: Copilot <198982749+Copilot@users.noreply.github.com> +Date: Sat Oct 11 13:05:11 2025 +0300 + + ✨ Enhance Copilot instructions with comprehensive contribution guidelines (#190) + +commit 2c17bf7f083504f12a9aacdeae5b31d412585030 +Author: Andrii Siriak +Date: Sat Oct 11 11:48:57 2025 +0200 + + Create copilot-instructions.md (#186) + +commit b397c7d15dea0030aee1a454cd376537126c7386 +Author: Sachin Pangal <151670745+Orthodox-64@users.noreply.github.com> +Date: Sat Oct 11 14:58:38 2025 +0530 + + Implemented the Coin Change algorithm in R. (#170) + +commit 39b9816674da70109816746f9537790f56288d51 +Author: Arpita Roy <100989922+Arpita23r@users.noreply.github.com> +Date: Sat Oct 11 14:57:26 2025 +0530 + + added shortest common super sequence (#180) + +commit eb97f01a3a3dcc7fe28383bc5e9bfcc654527b48 +Author: Arpita Roy <100989922+Arpita23r@users.noreply.github.com> +Date: Sat Oct 11 14:55:38 2025 +0530 + + feat:added minimum palindromic insertion (#181) + +commit 7ca7e9555d8646666fd3ffd41bd2c527bb74b6ef +Author: Arpita Roy <100989922+Arpita23r@users.noreply.github.com> +Date: Sat Oct 11 14:42:34 2025 +0530 + + added manacher algorithm (#178) + +commit f9ad73230a7102c1fd7e9ad016b36b3efcfe39ea +Author: Arpita Roy <100989922+Arpita23r@users.noreply.github.com> +Date: Sat Oct 11 14:19:39 2025 +0530 + + add levenshtein (#179) + +commit 5bb715513cfdd49a4ffa987033dcf507bd890752 +Author: Arpita Roy <100989922+Arpita23r@users.noreply.github.com> +Date: Sat Oct 11 14:18:59 2025 +0530 + + feat: Add Z Algorithm string search in R (#176) + +commit e92a3f21a8cbae8d4828cd4f16b77adbdb64e076 +Author: Supratim <109270340+sgindeed@users.noreply.github.com> +Date: Sat Oct 11 14:18:34 2025 +0530 + + Add Burrows-Wheeler Transform (BWT) implementation in R (#184) + +commit 1887b9346a4e043b0e821c34fb4963fc0327bea6 +Author: Sachin Pangal <151670745+Orthodox-64@users.noreply.github.com> +Date: Thu Oct 9 03:32:13 2025 +0530 + + feat: implement Longest Increasing Subsequence algorithm in R (#169) + +commit e5f601222adb584472c8f5ccf9c1e7baebf0c6b2 +Author: Supratim <109270340+sgindeed@users.noreply.github.com> +Date: Thu Oct 9 03:01:56 2025 +0530 + + feat: Add Rabin–Karp string search algorithm in R (#166) + +commit 96737fe963113a4caa1945e0048592bc0bbf8b9e +Author: Supratim <109270340+sgindeed@users.noreply.github.com> +Date: Thu Oct 9 02:34:51 2025 +0530 + + feat: Add Minimum Window Substring algorithm in R (#165) + +commit 8c18f07eb90e5b1bd5ba595c5cbcc490e64c615c +Author: Supratim <109270340+sgindeed@users.noreply.github.com> +Date: Thu Oct 9 02:30:19 2025 +0530 + + feat: Add Longest Palindromic Subsequence algorithm in R (#164) + +commit 9e23362e429f1350e5b1a17668e3fbb7d6cfd777 +Author: Sachin Pangal <151670745+Orthodox-64@users.noreply.github.com> +Date: Thu Oct 9 02:23:38 2025 +0530 + + feat- 0/1 Knapsack Problem (Dynamic Programming) in R (#167) + +commit 72ad1cc59b8e2e3e0971ea8bf9264b3979a195d3 +Author: Supratim <109270340+sgindeed@users.noreply.github.com> +Date: Wed Oct 8 17:40:31 2025 +0530 + + feat: Add R program to find longest substring without repeating characters (#163) + +commit d408fea8e7a33d5ef39c5d2907b3d3f926a2684c +Author: Supratim <109270340+sgindeed@users.noreply.github.com> +Date: Wed Oct 8 13:06:04 2025 +0530 + + Create unique.letters.count.R (#162) + +commit cd836f75a18150fdba2ab6c92a762918679bddb8 +Author: PIYUSH KUMAR SINGH <121piyush466mits@gmail.com> +Date: Wed Oct 8 03:22:27 2025 +0530 + + Add Longest Common Subsequence (LCS) dynamic programming algorithm (#158) + +commit 365ec08676862369c69b077473d1b5ba6a1b8322 +Author: PIYUSH KUMAR SINGH <121piyush466mits@gmail.com> +Date: Mon Oct 6 22:09:05 2025 +0530 + + Add Binary Search Tree (BST) (#159) + +commit 68bc3c84a2328baa65030f3474ec168740d9e9e7 +Author: PIYUSH KUMAR SINGH <121piyush466mits@gmail.com> +Date: Mon Oct 6 02:36:34 2025 +0530 + + Add Sieve of Eratosthenes algorithm (#154) + +commit df205e73e11c94e4a2aa33db83085a9c033f5aa4 +Author: PIYUSH KUMAR SINGH <121piyush466mits@gmail.com> +Date: Mon Oct 6 02:36:00 2025 +0530 + + Add Dijkstra's shortest path algorithm (#153) + +commit b0f8ccf076a8b6c1e9e15904b6ecda9b4e684099 +Author: PIYUSH KUMAR SINGH <121piyush466mits@gmail.com> +Date: Mon Oct 6 02:34:00 2025 +0530 + + Add Breadth-First Search (BFS) (#152) + +commit b3d0b78f302dc66864af73c66a9ebdcb14b32412 +Author: PIYUSH KUMAR SINGH <121piyush466mits@gmail.com> +Date: Mon Oct 6 02:26:51 2025 +0530 + + Add Depth-First Search (DFS) algorithm (#151) + +commit 6e76d3422e584ecb1159a08eb12d7a2bdcff96b7 +Author: PIYUSH KUMAR SINGH <121piyush466mits@gmail.com> +Date: Sat Oct 4 23:43:58 2025 +0530 + + Add Extended Euclidean Algorithm (#155) + +commit 6b0fd3b5e79a4f2706fb579e391b25ba711131a1 +Author: Pradnya Ingle <146155532+Pradnyaa05@users.noreply.github.com> +Date: Sat Apr 19 18:42:09 2025 +0530 + + Update contribution guidelines (#147) + +commit 74547073b5776205d9522f776c833df9954b2684 +Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> +Date: Tue Nov 26 17:10:10 2024 +0100 + + Add maskWords.R (#144) + +commit b46d3ac3be75d6e13ee72d507924e3b10bc7ed0a +Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> +Date: Wed Nov 13 22:18:47 2024 +0100 + + Add shorten.vector.R (#143) + +commit 191af3f0e4f2f799ca095f68a91ab94cefe631cd +Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> +Date: Wed Nov 6 22:08:53 2024 +0100 + + Move findPalindrome.R to string_manipulation folder (#139) + +commit 27e2420f557d394d2679e8f8163fdfd31f7ec133 +Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> +Date: Wed Nov 6 22:08:36 2024 +0100 + + Move is.anagram.R to string_manipulation folder (#140) + +commit 91525faf60f3004170f8493226e6ce7bca327e37 +Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> +Date: Wed Nov 6 22:08:17 2024 +0100 + + Move is.lower.R to string_manipulation folder (#141) + +commit 37eb076a00448137a9f0ea42c41bb2eabfa01126 +Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> +Date: Wed Nov 6 22:07:29 2024 +0100 + + Move is.upper.R to string_manipulation folder (#142) + +commit 7ab44f65f543ef05bbccb95f4cefa21e3ae6a568 +Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> +Date: Mon Nov 4 20:41:49 2024 +0100 + + Change folder name (#138) + +commit 2a7072558072e0223f4c29651aa9075cf7c0b7b1 +Author: Simon Waldherr +Date: Tue Oct 29 21:24:58 2024 +0100 + + Add Patience Sort (#137) + +commit 5da148ecd1ab8d7416d1d15e835553211a546464 +Author: Simon Waldherr +Date: Tue Oct 29 21:24:15 2024 +0100 + + Add strand sort (#136) + +commit ad2f0963b20a8304e157d726eaf717bcc116b26a +Author: Simon Waldherr +Date: Tue Oct 29 21:23:46 2024 +0100 + + Add common divisor and common multiple (#135) + +commit acbb8d8766a9bd771129c660a3b782461a522a43 +Author: Simon Waldherr +Date: Tue Oct 29 21:23:18 2024 +0100 + + Add permutation calculation example (#134) + +commit 92324e2ed5cc174854e21d5a527632e32ba5da53 +Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> +Date: Tue Oct 29 18:38:38 2024 +0100 + + Add an algorithm for checking if a string is in lowercase (#131) + +commit c2dcaff0007bfc8f9086c2f3c00cb69b9fc473b8 +Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> +Date: Tue Oct 29 18:38:22 2024 +0100 + + Add an algorithm for checking if a string is in uppercase (#130) + +commit ce8a6948838c725d123825872ef9b2df74fe1330 +Author: Simon Waldherr +Date: Tue Oct 29 18:37:12 2024 +0100 + + Add gnome sort (#133) + +commit 64b7b70be500795a4e9b52cf72a0283cf972ae6a +Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> +Date: Fri Oct 25 15:57:35 2024 +0100 + + Add an algorithm for determining the number of possible rearrangements of a string (#129) + +commit 7f007339efcf9d01ced45960dd1a4e086e607467 +Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> +Date: Wed Oct 23 18:52:10 2024 +0100 + + Add an algorithm for checking anagrams (#128) + +commit 61eed3860d479581e3aa18abb6032538ad596c37 +Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> +Date: Wed Oct 23 11:35:50 2024 +0100 + + Add an algorithm that finds palindromes in text (#127) + +commit 898a0a1fda67fec17f579bbb7df0ebf5caf301ef +Author: Vineet Kumar <108144301+whyvineet@users.noreply.github.com> +Date: Mon Oct 7 18:29:38 2024 +0530 + + Add Tim Sort (#125) + +commit a1aeafc84a375243befd6839621fa6a392e33ec1 +Author: Akshat Sharma <111536616+Akshat111111@users.noreply.github.com> +Date: Tue Mar 12 14:35:04 2024 +0530 + + Add Monte Carlo simulation, Markowitz portfolio optimization, and Kalman filter (#123) + +commit 3ddddeec99dcffe5ae577747e6c831b2388178c1 +Author: Andrii Siriak +Date: Tue Mar 12 10:17:13 2024 +0200 + + Remove outdated documentation by fixing documentation_workflow.yml (#121) + +commit 8381a8d6a06ae9d4f48cd69f8d3ab11607893772 +Author: Rohit Chaudhari <100275369+rohitkbc@users.noreply.github.com> +Date: Sun Nov 19 15:12:46 2023 +0530 + + Add Gitpod (#119) + +commit 348c099e5b9d1a4245e8d33655af212b694af04d +Author: Saurav <91366385+Saurav-IIITU@users.noreply.github.com> +Date: Fri Nov 3 18:37:43 2023 +0530 + + Add topological sort (#117) + +commit 1c7e4a23522149e74bc45971f074849f451b496e +Author: Riya Khandelwal <114740796+Khandelwal05@users.noreply.github.com> +Date: Sat Oct 14 18:48:58 2023 +0530 + + Add cocktail_sort.r (#114) + +commit 8beb87a6c3c0251083c63b1553d8de749d2feaf3 +Author: paras-2407 <131509174+paras-2407@users.noreply.github.com> +Date: Wed Oct 11 01:47:32 2023 +0530 + + Update directory.md (#112) + +commit f5056677ece79ee39be8c14b3b20d5fa6a9a4ef9 +Author: paras-2407 <131509174+paras-2407@users.noreply.github.com> +Date: Mon Oct 9 21:02:46 2023 +0530 + + Add binary_search.r (fixes #107) (#108) + +commit 4f35b315fa16c10fe93246aa1385913e9f66ff84 +Author: paras-2407 <131509174+paras-2407@users.noreply.github.com> +Date: Sun Oct 8 00:45:13 2023 +0530 + + Add linear_search.r (#106) + +commit 74b05246256de287702e5ea2385fc7c877bd24de +Author: iMeet07 <97329296+iMeet07@users.noreply.github.com> +Date: Fri Oct 6 22:28:14 2023 +0530 + + Add pancake_sort (#101) + +commit bd0cb3348d3d1ea46e8aedd8bd2fb4dedab44848 +Author: iMeet07 <97329296+iMeet07@users.noreply.github.com> +Date: Fri Oct 6 22:26:00 2023 +0530 + + Add shell_sort (#102) + +commit 07a1c231cb5a2e1cdc9b2d6940d68598b3fc5e0f +Author: iMeet07 <97329296+iMeet07@users.noreply.github.com> +Date: Thu Oct 5 19:08:27 2023 +0530 + + Add wiggle sort (#97) + +commit d73ef9dfc5a891af14b10ce7ee7b057938b051bb +Author: iMeet07 <97329296+iMeet07@users.noreply.github.com> +Date: Thu Oct 5 19:05:40 2023 +0530 + + Add binary_insertion_sort.r (#95) + +commit bc04a424bdf93b96feb0a0a8c94f70c2205eb9ca +Author: BSzmolke <40246238+BSzmolke@users.noreply.github.com> +Date: Mon May 29 13:25:30 2023 +0200 + + Add amicable number checker (#92) diff --git a/graph_algorithms/graph_coloring.r b/graph_algorithms/graph_coloring.r new file mode 100644 index 00000000..afee9c20 --- /dev/null +++ b/graph_algorithms/graph_coloring.r @@ -0,0 +1,252 @@ +# Graph Coloring Algorithm using Backtracking +# +# The graph coloring problem involves assigning colors to vertices of a graph such that +# no two adjacent vertices share the same color. This implementation uses backtracking +# to find a valid coloring with a given number of colors (chromatic number). +# +# Time Complexity: O(m^V) where m is number of colors and V is number of vertices +# Space Complexity: O(V) for recursion stack and color assignment array +# +# Input: graph as adjacency matrix (n x n), number of colors +# Output: color assignment for each vertex, or NULL if no valid coloring exists + +graph_coloring <- function(graph, num_colors) { + n <- nrow(graph) + colors <- rep(0, n) + + # Check if color assignment is safe for vertex v + is_safe <- function(v, c) { + for (i in 1:n) { + if (graph[v, i] == 1 && colors[i] == c) { + return(FALSE) + } + } + return(TRUE) + } + + # Backtracking function to color vertices + color_vertex <- function(v) { + if (v > n) { + return(TRUE) + } + + for (c in 1:num_colors) { + if (is_safe(v, c)) { + colors[v] <<- c + + if (color_vertex(v + 1)) { + return(TRUE) + } + + colors[v] <<- 0 + } + } + + return(FALSE) + } + + if (color_vertex(1)) { + return(list( + success = TRUE, + colors = colors, + num_colors_used = num_colors + )) + } else { + return(list( + success = FALSE, + colors = NULL, + num_colors_used = NULL + )) + } +} + +# Find chromatic number (minimum colors needed) +find_chromatic_number <- function(graph) { + n <- nrow(graph) + + for (num_colors in 1:n) { + result <- graph_coloring(graph, num_colors) + if (result$success) { + return(num_colors) + } + } + + return(n) +} + +# Greedy graph coloring (faster but not always optimal) +greedy_coloring <- function(graph) { + n <- nrow(graph) + colors <- rep(0, n) + + for (v in 1:n) { + available <- rep(TRUE, n) + + for (i in 1:n) { + if (graph[v, i] == 1 && colors[i] != 0) { + available[colors[i]] <- FALSE + } + } + + for (c in 1:n) { + if (available[c]) { + colors[v] <- c + break + } + } + } + + num_colors_used <- max(colors) + + return(list( + colors = colors, + num_colors_used = num_colors_used + )) +} + +# Welsh-Powell algorithm (colors vertices in descending degree order) +welsh_powell_coloring <- function(graph) { + n <- nrow(graph) + degrees <- rowSums(graph) + vertex_order <- order(degrees, decreasing = TRUE) + + colors <- rep(0, n) + + for (v in vertex_order) { + available <- rep(TRUE, n + 1) + + for (i in 1:n) { + if (graph[v, i] == 1 && colors[i] != 0) { + available[colors[i]] <- FALSE + } + } + + for (c in 1:(n + 1)) { + if (available[c]) { + colors[v] <- c + break + } + } + } + + num_colors_used <- max(colors) + + return(list( + colors = colors, + num_colors_used = num_colors_used + )) +} + +# Validate coloring solution +validate_coloring <- function(graph, colors) { + n <- nrow(graph) + + for (i in 1:n) { + for (j in 1:n) { + if (graph[i, j] == 1 && colors[i] == colors[j]) { + return(FALSE) + } + } + } + + return(TRUE) +} + +# Example usage and tests +cat("=== Graph Coloring Algorithm ===\n\n") + +# Example 1: Simple triangle graph (needs 3 colors) +cat("Example 1: Triangle Graph\n") +triangle <- matrix(c( + 0, 1, 1, + 1, 0, 1, + 1, 1, 0 +), nrow = 3, byrow = TRUE) + +cat("Adjacency Matrix:\n") +print(triangle) + +result1 <- graph_coloring(triangle, 3) +cat("\nBacktracking with 3 colors:\n") +cat("Success:", result1$success, "\n") +cat("Color assignment:", result1$colors, "\n") +cat("Valid:", validate_coloring(triangle, result1$colors), "\n") + +result1_fail <- graph_coloring(triangle, 2) +cat("\nBacktracking with 2 colors:\n") +cat("Success:", result1_fail$success, "\n") + +# Example 2: Petersen graph (chromatic number = 3) +cat("\n\nExample 2: Petersen Graph (10 vertices)\n") +petersen <- matrix(0, nrow = 10, ncol = 10) +edges <- list( + c(1, 2), c(2, 3), c(3, 4), c(4, 5), c(5, 1), + c(1, 6), c(2, 7), c(3, 8), c(4, 9), c(5, 10), + c(6, 8), c(8, 10), c(10, 7), c(7, 9), c(9, 6) +) +for (edge in edges) { + petersen[edge[1], edge[2]] <- 1 + petersen[edge[2], edge[1]] <- 1 +} + +cat("Finding chromatic number...\n") +chromatic_num <- find_chromatic_number(petersen) +cat("Chromatic number:", chromatic_num, "\n") + +result2 <- graph_coloring(petersen, chromatic_num) +cat("Color assignment:", result2$colors, "\n") +cat("Valid:", validate_coloring(petersen, result2$colors), "\n") + +# Example 3: Bipartite graph (needs 2 colors) +cat("\n\nExample 3: Bipartite Graph K(3,3)\n") +bipartite <- matrix(c( + 0, 0, 0, 1, 1, 1, + 0, 0, 0, 1, 1, 1, + 0, 0, 0, 1, 1, 1, + 1, 1, 1, 0, 0, 0, + 1, 1, 1, 0, 0, 0, + 1, 1, 1, 0, 0, 0 +), nrow = 6, byrow = TRUE) + +result3 <- graph_coloring(bipartite, 2) +cat("Backtracking with 2 colors:\n") +cat("Success:", result3$success, "\n") +cat("Color assignment:", result3$colors, "\n") +cat("Valid:", validate_coloring(bipartite, result3$colors), "\n") + +# Example 4: Compare algorithms +cat("\n\nExample 4: Algorithm Comparison on Random Graph\n") +set.seed(42) +n <- 8 +random_graph <- matrix(0, nrow = n, ncol = n) +for (i in 1:(n-1)) { + for (j in (i+1):n) { + if (runif(1) < 0.3) { + random_graph[i, j] <- 1 + random_graph[j, i] <- 1 + } + } +} + +cat("Graph size:", n, "vertices\n") +cat("Number of edges:", sum(random_graph) / 2, "\n\n") + +greedy_result <- greedy_coloring(random_graph) +cat("Greedy Coloring:\n") +cat("Colors used:", greedy_result$num_colors_used, "\n") +cat("Color assignment:", greedy_result$colors, "\n") +cat("Valid:", validate_coloring(random_graph, greedy_result$colors), "\n\n") + +wp_result <- welsh_powell_coloring(random_graph) +cat("Welsh-Powell Coloring:\n") +cat("Colors used:", wp_result$num_colors_used, "\n") +cat("Color assignment:", wp_result$colors, "\n") +cat("Valid:", validate_coloring(random_graph, wp_result$colors), "\n\n") + +chromatic <- find_chromatic_number(random_graph) +cat("Optimal (Backtracking):\n") +cat("Chromatic number:", chromatic, "\n") + +optimal_result <- graph_coloring(random_graph, chromatic) +cat("Color assignment:", optimal_result$colors, "\n") +cat("Valid:", validate_coloring(random_graph, optimal_result$colors), "\n") From c79833715103d9a3bd30eee05f261ba98fcf87f9 Mon Sep 17 00:00:00 2001 From: Prathamesh Kalshetti Date: Mon, 20 Oct 2025 08:00:46 +0530 Subject: [PATCH 03/10] feat_dinics_algo --- graph_algorithms/dinics_max_flow.r | 311 +++++++++++++++++++++++++++++ 1 file changed, 311 insertions(+) create mode 100644 graph_algorithms/dinics_max_flow.r diff --git a/graph_algorithms/dinics_max_flow.r b/graph_algorithms/dinics_max_flow.r new file mode 100644 index 00000000..ba893ef6 --- /dev/null +++ b/graph_algorithms/dinics_max_flow.r @@ -0,0 +1,311 @@ +# Dinic's Maximum Flow Algorithm +# +# Dinic's algorithm finds the maximum flow in a flow network using blocking flows. +# It builds level graphs using BFS and finds blocking flows using DFS, achieving +# better performance than Ford-Fulkerson on many graphs. +# +# Time Complexity: O(V^2 * E) +# Space Complexity: O(V + E) +# +# Input: Flow network with capacities, source and sink vertices +# Output: Maximum flow value from source to sink + +# Edge structure for flow network +create_edge <- function(to, capacity, flow = 0, rev_idx = NULL) { + list(to = to, capacity = capacity, flow = flow, rev_idx = rev_idx) +} + +# Flow network structure +create_flow_network <- function(n) { + graph <- vector("list", n) + for (i in 1:n) { + graph[[i]] <- list() + } + list(graph = graph, n = n) +} + +# Add directed edge with capacity +add_edge <- function(network, u, v, capacity) { + u_edges <- network$graph[[u]] + v_edges <- network$graph[[v]] + + # Forward edge + forward_idx <- length(u_edges) + 1 + reverse_idx <- length(v_edges) + 1 + + # Add forward edge (u -> v) with capacity + network$graph[[u]][[forward_idx]] <- create_edge(v, capacity, 0, reverse_idx) + + # Add reverse edge (v -> u) with 0 capacity (for residual graph) + network$graph[[v]][[reverse_idx]] <- create_edge(u, 0, 0, forward_idx) + + return(network) +} + +# BFS to construct level graph +bfs_level_graph <- function(network, source, sink) { + n <- network$n + level <- rep(-1, n) + level[source] <- 0 + + queue <- c(source) + head_idx <- 1 + + while (head_idx <= length(queue)) { + u <- queue[head_idx] + head_idx <- head_idx + 1 + + if (u == sink) break + + for (edge in network$graph[[u]]) { + v <- edge$to + # Check if edge has residual capacity and v is not visited + if (level[v] == -1 && edge$capacity > edge$flow) { + level[v] <- level[u] + 1 + queue <- c(queue, v) + } + } + } + + return(level) +} + +# DFS to find blocking flow +dfs_blocking_flow <- function(network, u, sink, pushed, level, iter) { + if (u == sink) { + return(pushed) + } + + # Try all edges from current vertex + while (iter[u] <= length(network$graph[[u]])) { + edge_idx <- iter[u] + edge <- network$graph[[u]][[edge_idx]] + v <- edge$to + + # Check if edge is in level graph and has residual capacity + if (level[v] == level[u] + 1 && edge$capacity > edge$flow) { + # Calculate minimum flow we can push + flow <- min(pushed, edge$capacity - edge$flow) + + # Recursively push flow + result <- dfs_blocking_flow(network, v, sink, flow, level, iter) + + if (result > 0) { + # Update flow on forward edge + network$graph[[u]][[edge_idx]]$flow <<- edge$flow + result + + # Update flow on reverse edge + rev_idx <- edge$rev_idx + rev_edge <- network$graph[[v]][[rev_idx]] + network$graph[[v]][[rev_idx]]$flow <<- rev_edge$flow - result + + return(result) + } + } + + iter[u] <- iter[u] + 1 + } + + return(0) +} + +# Dinic's maximum flow algorithm +dinic_max_flow <- function(network, source, sink) { + if (source == sink) { + return(0) + } + + max_flow <- 0 + + # Repeat until no augmenting path exists + while (TRUE) { + # Build level graph using BFS + level <- bfs_level_graph(network, source, sink) + + # If sink is not reachable, no more augmenting paths + if (level[sink] == -1) { + break + } + + # Find blocking flows using DFS + iter <- rep(1, network$n) + + while (TRUE) { + pushed <- dfs_blocking_flow(network, source, sink, Inf, level, iter) + + if (pushed == 0) { + break + } + + max_flow <- max_flow + pushed + } + } + + return(max_flow) +} + +# Get minimum cut (vertices reachable from source in residual graph) +get_min_cut <- function(network, source) { + n <- network$n + visited <- rep(FALSE, n) + visited[source] <- TRUE + + queue <- c(source) + head_idx <- 1 + + while (head_idx <= length(queue)) { + u <- queue[head_idx] + head_idx <- head_idx + 1 + + for (edge in network$graph[[u]]) { + v <- edge$to + if (!visited[v] && edge$capacity > edge$flow) { + visited[v] <- TRUE + queue <- c(queue, v) + } + } + } + + return(which(visited)) +} + +# Get edges in minimum cut +get_min_cut_edges <- function(network, source) { + reachable <- get_min_cut(network, source) + reachable_set <- rep(FALSE, network$n) + reachable_set[reachable] <- TRUE + + cut_edges <- list() + + for (u in reachable) { + for (edge in network$graph[[u]]) { + v <- edge$to + if (!reachable_set[v] && edge$capacity > 0) { + cut_edges[[length(cut_edges) + 1]] <- list(from = u, to = v, capacity = edge$capacity) + } + } + } + + return(cut_edges) +} + +# Example usage and tests +cat("=== Dinic's Maximum Flow Algorithm ===\n\n") + +# Example 1: Simple network +cat("Example 1: Simple Flow Network (6 vertices)\n") +cat("Network structure:\n") +cat(" 1 --[10]--> 2 --[10]--> 5\n") +cat(" | | ^\n") +cat(" [10] [2] [10]\n") +cat(" | | |\n") +cat(" v v |\n") +cat(" 3 --[4]---> 4 --[10]----+\n") +cat(" | ^\n") +cat(" [10] |\n") +cat(" +----------[10]---------+\n\n") + +network1 <- create_flow_network(6) +network1 <- add_edge(network1, 1, 2, 10) +network1 <- add_edge(network1, 1, 3, 10) +network1 <- add_edge(network1, 2, 4, 2) +network1 <- add_edge(network1, 2, 5, 10) +network1 <- add_edge(network1, 3, 4, 4) +network1 <- add_edge(network1, 3, 5, 10) +network1 <- add_edge(network1, 4, 5, 10) + +max_flow1 <- dinic_max_flow(network1, 1, 5) +cat("Maximum flow from vertex 1 to vertex 5:", max_flow1, "\n") + +min_cut1 <- get_min_cut(network1, 1) +cat("Minimum cut (source side vertices):", paste(min_cut1, collapse = ", "), "\n") + +cut_edges1 <- get_min_cut_edges(network1, 1) +cat("Cut edges:\n") +for (edge in cut_edges1) { + cat(sprintf(" %d -> %d (capacity: %d)\n", edge$from, edge$to, edge$capacity)) +} + +# Example 2: Bipartite matching +cat("\n\nExample 2: Bipartite Matching Problem\n") +cat("Left set: {1, 2, 3}, Right set: {4, 5, 6}\n") +cat("Edges: 1-4, 1-5, 2-5, 2-6, 3-4, 3-6\n") + +network2 <- create_flow_network(8) +# Add source (vertex 7) to left partition +network2 <- add_edge(network2, 7, 1, 1) +network2 <- add_edge(network2, 7, 2, 1) +network2 <- add_edge(network2, 7, 3, 1) + +# Add edges between partitions +network2 <- add_edge(network2, 1, 4, 1) +network2 <- add_edge(network2, 1, 5, 1) +network2 <- add_edge(network2, 2, 5, 1) +network2 <- add_edge(network2, 2, 6, 1) +network2 <- add_edge(network2, 3, 4, 1) +network2 <- add_edge(network2, 3, 6, 1) + +# Add right partition to sink (vertex 8) +network2 <- add_edge(network2, 4, 8, 1) +network2 <- add_edge(network2, 5, 8, 1) +network2 <- add_edge(network2, 6, 8, 1) + +max_flow2 <- dinic_max_flow(network2, 7, 8) +cat("Maximum matching size:", max_flow2, "\n") + +# Example 3: Complex network with multiple paths +cat("\n\nExample 3: Complex Network with Multiple Paths\n") +network3 <- create_flow_network(6) +network3 <- add_edge(network3, 1, 2, 16) +network3 <- add_edge(network3, 1, 3, 13) +network3 <- add_edge(network3, 2, 3, 10) +network3 <- add_edge(network3, 2, 4, 12) +network3 <- add_edge(network3, 3, 2, 4) +network3 <- add_edge(network3, 3, 5, 14) +network3 <- add_edge(network3, 4, 3, 9) +network3 <- add_edge(network3, 4, 6, 20) +network3 <- add_edge(network3, 5, 4, 7) +network3 <- add_edge(network3, 5, 6, 4) + +max_flow3 <- dinic_max_flow(network3, 1, 6) +cat("Maximum flow from vertex 1 to vertex 6:", max_flow3, "\n") + +min_cut3 <- get_min_cut(network3, 1) +cat("Minimum cut (source side vertices):", paste(min_cut3, collapse = ", "), "\n") + +# Example 4: Network with bottleneck +cat("\n\nExample 4: Network with Bottleneck Edge\n") +network4 <- create_flow_network(5) +network4 <- add_edge(network4, 1, 2, 100) +network4 <- add_edge(network4, 1, 3, 100) +network4 <- add_edge(network4, 2, 4, 5) # Bottleneck +network4 <- add_edge(network4, 3, 4, 5) # Bottleneck +network4 <- add_edge(network4, 4, 5, 100) + +max_flow4 <- dinic_max_flow(network4, 1, 5) +cat("Maximum flow from vertex 1 to vertex 5:", max_flow4, "\n") +cat("Note: Flow limited by bottleneck edges (total capacity = 10)\n") + +# Example 5: Multi-source multi-sink (converted to single source-sink) +cat("\n\nExample 5: Multi-Source Multi-Sink Problem\n") +cat("Sources: {2, 3}, Sinks: {5, 6}\n") + +network5 <- create_flow_network(8) +# Super source (1) to sources +network5 <- add_edge(network5, 1, 2, 15) +network5 <- add_edge(network5, 1, 3, 15) + +# Internal edges +network5 <- add_edge(network5, 2, 4, 10) +network5 <- add_edge(network5, 3, 4, 10) +network5 <- add_edge(network5, 4, 5, 8) +network5 <- add_edge(network5, 4, 6, 8) + +# Sinks to super sink (7) +network5 <- add_edge(network5, 5, 7, 10) +network5 <- add_edge(network5, 6, 7, 10) + +max_flow5 <- dinic_max_flow(network5, 1, 7) +cat("Maximum flow from super-source to super-sink:", max_flow5, "\n") + +cat("\n=== All tests completed successfully ===\n") From f3750795d6c106f8bc04541db92f1bca8c6fd47e Mon Sep 17 00:00:00 2001 From: Prathamesh Kalshetti Date: Mon, 20 Oct 2025 15:17:57 +0530 Subject: [PATCH 04/10] feat_wildcard_pattern --- .../wildcard_pattern_matching.r | 270 ++++++++++++++++++ 1 file changed, 270 insertions(+) create mode 100644 dynamic_programming/wildcard_pattern_matching.r diff --git a/dynamic_programming/wildcard_pattern_matching.r b/dynamic_programming/wildcard_pattern_matching.r new file mode 100644 index 00000000..d77646d1 --- /dev/null +++ b/dynamic_programming/wildcard_pattern_matching.r @@ -0,0 +1,270 @@ +# Wildcard Pattern Matching using Dynamic Programming +# +# Matches a text string against a pattern containing wildcards: +# '*' matches any sequence of characters (including empty sequence) +# '?' matches exactly one character +# +# Time Complexity: O(m * n) where m is text length, n is pattern length +# Space Complexity: O(m * n) for DP table +# +# Input: text string and pattern string with wildcards +# Output: TRUE if pattern matches text, FALSE otherwise + +isMatch <- function(text, pattern) { + m <- nchar(text) + n <- nchar(pattern) + + # DP table: dp[i][j] = TRUE if text[1:i] matches pattern[1:j] + dp <- matrix(FALSE, nrow = m + 1, ncol = n + 1) + + # Empty pattern matches empty text + dp[1, 1] <- TRUE + + # Handle patterns starting with '*' + for (j in 2:(n + 1)) { + if (substr(pattern, j - 1, j - 1) == "*") { + dp[1, j] <- dp[1, j - 1] + } + } + + # Fill DP table + for (i in 2:(m + 1)) { + for (j in 2:(n + 1)) { + text_char <- substr(text, i - 1, i - 1) + pattern_char <- substr(pattern, j - 1, j - 1) + + if (pattern_char == "*") { + # '*' matches empty or any sequence + dp[i, j] <- dp[i, j - 1] || dp[i - 1, j] + } else if (pattern_char == "?" || pattern_char == text_char) { + # '?' or exact match + dp[i, j] <- dp[i - 1, j - 1] + } + } + } + + return(dp[m + 1, n + 1]) +} + +# Space-optimized version using two rows +isMatch_optimized <- function(text, pattern) { + m <- nchar(text) + n <- nchar(pattern) + + # Use two rows instead of full table + prev <- rep(FALSE, n + 1) + curr <- rep(FALSE, n + 1) + + prev[1] <- TRUE + + for (j in 2:(n + 1)) { + if (substr(pattern, j - 1, j - 1) == "*") { + prev[j] <- prev[j - 1] + } + } + + for (i in 2:(m + 1)) { + curr <- rep(FALSE, n + 1) + text_char <- substr(text, i - 1, i - 1) + + for (j in 2:(n + 1)) { + pattern_char <- substr(pattern, j - 1, j - 1) + + if (pattern_char == "*") { + curr[j] <- curr[j - 1] || prev[j] + } else if (pattern_char == "?" || pattern_char == text_char) { + curr[j] <- prev[j - 1] + } + } + + prev <- curr + } + + return(curr[n + 1]) +} + +# Backtracking solution (alternative approach) +isMatch_backtrack <- function(text, pattern) { + match_helper <- function(t_idx, p_idx) { + # Base cases + if (p_idx > nchar(pattern)) { + return(t_idx > nchar(text)) + } + + if (t_idx > nchar(text)) { + # Check if remaining pattern is all '*' + while (p_idx <= nchar(pattern)) { + if (substr(pattern, p_idx, p_idx) != "*") { + return(FALSE) + } + p_idx <- p_idx + 1 + } + return(TRUE) + } + + pattern_char <- substr(pattern, p_idx, p_idx) + text_char <- substr(text, t_idx, t_idx) + + if (pattern_char == "*") { + # Try matching empty or any sequence + return(match_helper(t_idx, p_idx + 1) || match_helper(t_idx + 1, p_idx)) + } else if (pattern_char == "?" || pattern_char == text_char) { + return(match_helper(t_idx + 1, p_idx + 1)) + } else { + return(FALSE) + } + } + + return(match_helper(1, 1)) +} + +# Find all matching substrings +find_matches <- function(text, pattern) { + matches <- list() + n <- nchar(text) + + for (start in 1:n) { + for (end in start:n) { + substring <- substr(text, start, end) + if (isMatch(substring, pattern)) { + matches[[length(matches) + 1]] <- list( + start = start, + end = end, + text = substring + ) + } + } + } + + return(matches) +} + +# Count matching patterns +count_matches <- function(texts, pattern) { + count <- 0 + for (text in texts) { + if (isMatch(text, pattern)) { + count <- count + 1 + } + } + return(count) +} + +# Example usage and tests +cat("=== Wildcard Pattern Matching ===\n\n") + +# Test cases +test_cases <- list( + list(text = "aa", pattern = "a", expected = FALSE), + list(text = "aa", pattern = "*", expected = TRUE), + list(text = "cb", pattern = "?a", expected = FALSE), + list(text = "adceb", pattern = "*a*b", expected = TRUE), + list(text = "acdcb", pattern = "a*c?b", expected = FALSE), + list(text = "abc", pattern = "abc", expected = TRUE), + list(text = "abc", pattern = "a?c", expected = TRUE), + list(text = "abc", pattern = "a*c", expected = TRUE), + list(text = "", pattern = "*", expected = TRUE), + list(text = "", pattern = "?", expected = FALSE), + list(text = "mississippi", pattern = "m*iss*p*", expected = TRUE), + list(text = "hello", pattern = "h*o", expected = TRUE), + list(text = "world", pattern = "w?r*", expected = TRUE), + list(text = "test", pattern = "t??t", expected = TRUE), + list(text = "abcdef", pattern = "a*f", expected = TRUE) +) + +cat("Running test cases:\n\n") +passed <- 0 +failed <- 0 + +for (i in seq_along(test_cases)) { + tc <- test_cases[[i]] + result <- isMatch(tc$text, tc$pattern) + status <- if (result == tc$expected) "PASS" else "FAIL" + + if (result == tc$expected) { + passed <- passed + 1 + } else { + failed <- failed + 1 + } + + cat(sprintf("Test %d: text='%s', pattern='%s' => %s [%s]\n", + i, tc$text, tc$pattern, result, status)) +} + +cat(sprintf("\nResults: %d passed, %d failed out of %d tests\n\n", + passed, failed, length(test_cases))) + +# Example: Complex patterns +cat("Complex Pattern Examples:\n") + +examples <- list( + list(text = "programming", pattern = "pro*ing"), + list(text = "dynamic", pattern = "d?n?m?c"), + list(text = "algorithm", pattern = "*gor*"), + list(text = "computer", pattern = "c*t*r"), + list(text = "science", pattern = "s*e*e") +) + +for (ex in examples) { + result <- isMatch(ex$text, ex$pattern) + cat(sprintf(" '%s' matches '%s': %s\n", ex$text, ex$pattern, result)) +} + +# Example: Space-optimized version comparison +cat("\nSpace-Optimized Version Test:\n") +text1 <- "abcdefghij" +pattern1 <- "a*f*j" +result_normal <- isMatch(text1, pattern1) +result_optimized <- isMatch_optimized(text1, pattern1) +cat(sprintf("Text: '%s', Pattern: '%s'\n", text1, pattern1)) +cat(sprintf("Normal DP: %s, Optimized: %s\n", result_normal, result_optimized)) + +# Example: Multiple texts matching +cat("\nMatching Multiple Texts:\n") +texts <- c("cat", "bat", "rat", "hat", "mat", "sat") +pattern2 <- "?at" +cat(sprintf("Pattern: '%s'\n", pattern2)) +cat("Matching texts:\n") +for (text in texts) { + if (isMatch(text, pattern2)) { + cat(sprintf(" - %s\n", text)) + } +} + +# Example: Wildcard star patterns +cat("\nWildcard Star Patterns:\n") +files <- c("document.txt", "image.png", "script.r", "data.csv", "report.pdf") +pattern3 <- "*.txt" +cat(sprintf("Pattern: '%s'\n", pattern3)) +cat("Matching files:\n") +for (file in files) { + if (isMatch(file, pattern3)) { + cat(sprintf(" - %s\n", file)) + } +} + +pattern4 <- "*.r" +cat(sprintf("\nPattern: '%s'\n", pattern4)) +cat("Matching files:\n") +for (file in files) { + if (isMatch(file, pattern4)) { + cat(sprintf(" - %s\n", file)) + } +} + +# Example: Edge cases +cat("\nEdge Cases:\n") +edge_cases <- list( + list(text = "", pattern = ""), + list(text = "a", pattern = ""), + list(text = "", pattern = "a"), + list(text = "***", pattern = "*"), + list(text = "aaa", pattern = "a*a") +) + +for (ec in edge_cases) { + result <- isMatch(ec$text, ec$pattern) + cat(sprintf(" text='%s', pattern='%s' => %s\n", ec$text, ec$pattern, result)) +} + +cat("\n=== All tests completed ===\n") From 8ae6d19bb402c2b808fc3320df306f97f64fe83b Mon Sep 17 00:00:00 2001 From: Prathamesh Kalshetti Date: Mon, 20 Oct 2025 17:55:50 +0530 Subject: [PATCH 05/10] feat_optics_algo --- clustering_algorithms/optics.r | 334 +++++++++++++++++++++++++++++++++ 1 file changed, 334 insertions(+) create mode 100644 clustering_algorithms/optics.r diff --git a/clustering_algorithms/optics.r b/clustering_algorithms/optics.r new file mode 100644 index 00000000..5ebcb54e --- /dev/null +++ b/clustering_algorithms/optics.r @@ -0,0 +1,334 @@ +# OPTICS (Ordering Points To Identify the Clustering Structure) +# +# OPTICS is a density-based clustering algorithm that extends DBSCAN by creating +# an augmented ordering of the database representing its density-based clustering +# structure. Unlike DBSCAN, it produces a reachability plot that can be used to +# extract clusters at different density thresholds. +# +# Time Complexity: O(n^2) for distance matrix, O(n log n) with spatial indexing +# Space Complexity: O(n^2) for distance matrix +# +# Input: data matrix/dataframe, eps (maximum radius), minPts (minimum neighbors) +# Output: ordered points with reachability distances and core distances + +optics <- function(data, eps = 0.5, minPts = 5) { + # Convert to matrix + if (is.data.frame(data)) { + data <- as.matrix(data) + } + + n <- nrow(data) + + # Calculate pairwise distances + dist_matrix <- as.matrix(dist(data, method = "euclidean")) + + # Initialize result vectors + reachability <- rep(Inf, n) + core_dist <- rep(Inf, n) + processed <- rep(FALSE, n) + ordered_points <- integer(0) + cluster_id <- rep(0, n) + + # Calculate core distance for each point + for (i in 1:n) { + neighbors <- which(dist_matrix[i, ] <= eps) + neighbors <- neighbors[neighbors != i] + + if (length(neighbors) >= minPts) { + neighbor_dists <- sort(dist_matrix[i, neighbors]) + core_dist[i] <- neighbor_dists[minPts] + } + } + + # Priority queue implementation using simple sorting + update_seeds <- function(seeds, neighbors, center_idx) { + c_dist <- core_dist[center_idx] + + for (neighbor in neighbors) { + if (processed[neighbor]) next + + new_reach_dist <- max(c_dist, dist_matrix[center_idx, neighbor]) + + if (reachability[neighbor] == Inf) { + reachability[neighbor] <<- new_reach_dist + seeds <- c(seeds, neighbor) + } else if (new_reach_dist < reachability[neighbor]) { + reachability[neighbor] <<- new_reach_dist + } + } + + # Sort seeds by reachability distance + if (length(seeds) > 0) { + seeds <- seeds[order(reachability[seeds])] + } + + return(seeds) + } + + # Main OPTICS algorithm + for (i in 1:n) { + if (processed[i]) next + + # Get neighbors within eps + neighbors <- which(dist_matrix[i, ] <= eps) + neighbors <- neighbors[neighbors != i] + + processed[i] <- TRUE + ordered_points <- c(ordered_points, i) + + if (core_dist[i] != Inf) { + seeds <- integer(0) + seeds <- update_seeds(seeds, neighbors, i) + + while (length(seeds) > 0) { + current <- seeds[1] + seeds <- seeds[-1] + + if (processed[current]) next + + neighbors_current <- which(dist_matrix[current, ] <= eps) + neighbors_current <- neighbors_current[neighbors_current != current] + + processed[current] <- TRUE + ordered_points <- c(ordered_points, current) + + if (core_dist[current] != Inf) { + seeds <- update_seeds(seeds, neighbors_current, current) + } + } + } + } + + return(list( + order = ordered_points, + reachability = reachability[ordered_points], + core_distance = core_dist[ordered_points], + eps = eps, + minPts = minPts + )) +} + +# Extract DBSCAN-like clusters from OPTICS result +extract_dbscan_clusters <- function(optics_result, eps_cluster) { + n <- length(optics_result$order) + cluster_id <- rep(0, n) + current_cluster <- 0 + + for (i in 1:n) { + if (optics_result$reachability[i] > eps_cluster) { + if (optics_result$core_distance[i] <= eps_cluster) { + current_cluster <- current_cluster + 1 + cluster_id[i] <- current_cluster + } else { + cluster_id[i] <- 0 + } + } else { + if (current_cluster == 0) { + current_cluster <- current_cluster + 1 + } + cluster_id[i] <- current_cluster + } + } + + # Reorder to original indices + result <- rep(0, n) + result[optics_result$order] <- cluster_id + + return(result) +} + +# Extract clusters using xi method (steep areas in reachability plot) +extract_xi_clusters <- function(optics_result, xi = 0.05, min_cluster_size = 5) { + n <- length(optics_result$order) + reach <- optics_result$reachability + + # Replace Inf with max finite value + max_reach <- max(reach[is.finite(reach)]) + reach[is.infinite(reach)] <- max_reach * 2 + + # Find steep up and down areas + steep_up <- c() + steep_down <- c() + + for (i in 2:n) { + if (reach[i] > reach[i-1] * (1 + xi)) { + steep_up <- c(steep_up, i) + } + if (reach[i] < reach[i-1] * (1 - xi)) { + steep_down <- c(steep_down, i) + } + } + + # Match steep areas to form clusters + cluster_id <- rep(0, n) + current_cluster <- 0 + + if (length(steep_down) > 0 && length(steep_up) > 0) { + for (sd in steep_down) { + matching_up <- steep_up[steep_up > sd] + if (length(matching_up) > 0) { + su <- matching_up[1] + if (su - sd >= min_cluster_size) { + current_cluster <- current_cluster + 1 + cluster_id[sd:su] <- current_cluster + } + } + } + } + + # Reorder to original indices + result <- rep(0, n) + result[optics_result$order] <- cluster_id + + return(result) +} + +# Plot reachability plot +plot_reachability <- function(optics_result, main = "OPTICS Reachability Plot") { + reach <- optics_result$reachability + max_reach <- max(reach[is.finite(reach)]) + reach[is.infinite(reach)] <- max_reach * 1.2 + + plot(1:length(reach), reach, type = "h", lwd = 2, + xlab = "Cluster Order", ylab = "Reachability Distance", + main = main, col = "steelblue") + grid() +} + +# Calculate silhouette score for clusters +calculate_silhouette <- function(data, clusters) { + if (is.data.frame(data)) { + data <- as.matrix(data) + } + + unique_clusters <- unique(clusters[clusters != 0]) + if (length(unique_clusters) < 2) { + return(NA) + } + + dist_matrix <- as.matrix(dist(data)) + n <- nrow(data) + silhouette_scores <- rep(0, n) + + for (i in 1:n) { + if (clusters[i] == 0) next + + same_cluster <- which(clusters == clusters[i]) + same_cluster <- same_cluster[same_cluster != i] + + if (length(same_cluster) == 0) { + silhouette_scores[i] <- 0 + next + } + + a <- mean(dist_matrix[i, same_cluster]) + + b <- Inf + for (other_cluster in unique_clusters) { + if (other_cluster == clusters[i]) next + other_points <- which(clusters == other_cluster) + if (length(other_points) > 0) { + b <- min(b, mean(dist_matrix[i, other_points])) + } + } + + silhouette_scores[i] <- (b - a) / max(a, b) + } + + return(mean(silhouette_scores[clusters != 0])) +} + +# Example usage and tests +cat("=== OPTICS Clustering Algorithm ===\n\n") + +# Example 1: Simple 2D dataset +cat("Example 1: Simple 2D Dataset\n") +set.seed(42) +cluster1 <- matrix(rnorm(100, mean = 0, sd = 0.5), ncol = 2) +cluster2 <- matrix(rnorm(100, mean = 3, sd = 0.5), ncol = 2) +cluster3 <- matrix(rnorm(100, mean = c(1.5, 3), sd = 0.5), ncol = 2) +data1 <- rbind(cluster1, cluster2, cluster3) + +cat("Running OPTICS...\n") +result1 <- optics(data1, eps = 1.0, minPts = 5) +cat("Processed", length(result1$order), "points\n") +cat("First 10 reachability distances:", head(result1$reachability, 10), "\n") + +# Extract clusters +clusters1 <- extract_dbscan_clusters(result1, eps_cluster = 0.8) +cat("Clusters found:", length(unique(clusters1[clusters1 != 0])), "\n") +cat("Noise points:", sum(clusters1 == 0), "\n") + +if (length(unique(clusters1[clusters1 != 0])) >= 2) { + silhouette1 <- calculate_silhouette(data1, clusters1) + cat("Silhouette score:", round(silhouette1, 4), "\n") +} + +# Example 2: Dataset with varying densities +cat("\n\nExample 2: Varying Density Dataset\n") +set.seed(123) +dense_cluster <- matrix(rnorm(200, mean = 0, sd = 0.3), ncol = 2) +sparse_cluster <- matrix(rnorm(100, mean = 4, sd = 1.0), ncol = 2) +data2 <- rbind(dense_cluster, sparse_cluster) + +result2 <- optics(data2, eps = 2.0, minPts = 5) +cat("Processed", length(result2$order), "points\n") + +clusters2_dbscan <- extract_dbscan_clusters(result2, eps_cluster = 1.0) +cat("DBSCAN-style extraction - Clusters:", length(unique(clusters2_dbscan[clusters2_dbscan != 0])), "\n") + +clusters2_xi <- extract_xi_clusters(result2, xi = 0.05, min_cluster_size = 10) +cat("Xi-method extraction - Clusters:", length(unique(clusters2_xi[clusters2_xi != 0])), "\n") + +# Example 3: Iris dataset +cat("\n\nExample 3: Iris Dataset\n") +data(iris) +iris_data <- iris[, 1:4] + +result3 <- optics(iris_data, eps = 2.0, minPts = 5) +cat("Processed", length(result3$order), "points\n") + +clusters3 <- extract_dbscan_clusters(result3, eps_cluster = 0.5) +cat("Clusters found:", length(unique(clusters3[clusters3 != 0])), "\n") +cat("Noise points:", sum(clusters3 == 0), "\n") + +# Compare with actual species +if (length(unique(clusters3[clusters3 != 0])) >= 2) { + silhouette3 <- calculate_silhouette(iris_data, clusters3) + cat("Silhouette score:", round(silhouette3, 4), "\n") +} + +# Example 4: Moons dataset (non-convex clusters) +cat("\n\nExample 4: Two Moons Dataset (Non-convex)\n") +set.seed(456) +n_points <- 100 +t1 <- seq(0, pi, length.out = n_points) +moon1 <- cbind(cos(t1), sin(t1)) + matrix(rnorm(n_points * 2, sd = 0.1), ncol = 2) +moon2 <- cbind(1 - cos(t1), 1 - sin(t1) - 0.5) + matrix(rnorm(n_points * 2, sd = 0.1), ncol = 2) +moons_data <- rbind(moon1, moon2) + +result4 <- optics(moons_data, eps = 0.5, minPts = 5) +clusters4 <- extract_dbscan_clusters(result4, eps_cluster = 0.3) +cat("Clusters found:", length(unique(clusters4[clusters4 != 0])), "\n") +cat("Noise points:", sum(clusters4 == 0), "\n") + +if (length(unique(clusters4[clusters4 != 0])) >= 2) { + silhouette4 <- calculate_silhouette(moons_data, clusters4) + cat("Silhouette score:", round(silhouette4, 4), "\n") +} + +# Example 5: Performance comparison +cat("\n\nExample 5: Performance Test\n") +sizes <- c(100, 200, 500) +cat("Dataset size vs execution time:\n") + +for (size in sizes) { + test_data <- matrix(rnorm(size * 2), ncol = 2) + start_time <- Sys.time() + test_result <- optics(test_data, eps = 1.0, minPts = 5) + end_time <- Sys.time() + elapsed <- as.numeric(difftime(end_time, start_time, units = "secs")) + cat(sprintf(" %d points: %.3f seconds\n", size, elapsed)) +} + +cat("\n=== All tests completed successfully ===\n") From 7c6b4db7d2fc86869cf5caf1eb9b2803586bf374 Mon Sep 17 00:00:00 2001 From: Prathamesh Kalshetti Date: Tue, 21 Oct 2025 00:05:47 +0530 Subject: [PATCH 06/10] feat_join_multiple_dataset --- data_manipulation/join_multiple_datasets.r | 30 ++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 data_manipulation/join_multiple_datasets.r diff --git a/data_manipulation/join_multiple_datasets.r b/data_manipulation/join_multiple_datasets.r new file mode 100644 index 00000000..ec98a965 --- /dev/null +++ b/data_manipulation/join_multiple_datasets.r @@ -0,0 +1,30 @@ +# join_multiple_datasets.r + +join_multiple_datasets <- function(inputs) { + library(dplyr) + library(purrr) + + # Read CSV files if character paths are provided + datasets <- lapply(inputs, function(x) { + if (is.character(x)) read.csv(x, stringsAsFactors = FALSE) else x + }) + + # Remove empty or invalid datasets + datasets <- datasets[!sapply(datasets, function(df) is.null(df) || nrow(df) == 0)] + if (length(datasets) < 2) stop("At least two valid datasets are required.") + + # Function to find common columns between two datasets + get_common_cols <- function(df1, df2) intersect(names(df1), names(df2)) + + # Sequentially join all datasets on their common columns + merged <- reduce(datasets, function(df1, df2) { + common_cols <- get_common_cols(df1, df2) + if (length(common_cols) == 0) stop("No common columns found for joining.") + suppressWarnings( + inner_join(df1, df2, by = common_cols) %>% + mutate_all(~ ifelse(is.na(.), "", .)) + ) + }) + + return(merged) +} From eef9c9e82aad7f413f51b1ca238dfcf2cf7b9abd Mon Sep 17 00:00:00 2001 From: Prathamesh Kalshetti Date: Tue, 21 Oct 2025 00:13:46 +0530 Subject: [PATCH 07/10] Update data_manipulation/join_multiple_datasets.r Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- data_manipulation/join_multiple_datasets.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_manipulation/join_multiple_datasets.r b/data_manipulation/join_multiple_datasets.r index ec98a965..8507425d 100644 --- a/data_manipulation/join_multiple_datasets.r +++ b/data_manipulation/join_multiple_datasets.r @@ -11,7 +11,7 @@ join_multiple_datasets <- function(inputs) { # Remove empty or invalid datasets datasets <- datasets[!sapply(datasets, function(df) is.null(df) || nrow(df) == 0)] - if (length(datasets) < 2) stop("At least two valid datasets are required.") + if (length(datasets) < 2) stop(sprintf("At least two valid datasets are required, but only %d valid dataset(s) found.", length(datasets))) # Function to find common columns between two datasets get_common_cols <- function(df1, df2) intersect(names(df1), names(df2)) From 66d17c350d765b0a0366bf784af50e8d54c1c3ea Mon Sep 17 00:00:00 2001 From: Prathamesh Kalshetti Date: Tue, 21 Oct 2025 00:13:58 +0530 Subject: [PATCH 08/10] Update data_manipulation/join_multiple_datasets.r Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- data_manipulation/join_multiple_datasets.r | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/data_manipulation/join_multiple_datasets.r b/data_manipulation/join_multiple_datasets.r index 8507425d..ea899372 100644 --- a/data_manipulation/join_multiple_datasets.r +++ b/data_manipulation/join_multiple_datasets.r @@ -1,11 +1,9 @@ # join_multiple_datasets.r +library(dplyr) +library(purrr) + join_multiple_datasets <- function(inputs) { - library(dplyr) - library(purrr) - - # Read CSV files if character paths are provided - datasets <- lapply(inputs, function(x) { if (is.character(x)) read.csv(x, stringsAsFactors = FALSE) else x }) From ff4d994288d17b382de795546b6c9df5f121d66a Mon Sep 17 00:00:00 2001 From: Prathamesh Kalshetti Date: Tue, 21 Oct 2025 00:14:07 +0530 Subject: [PATCH 09/10] Update data_manipulation/join_multiple_datasets.r Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- data_manipulation/join_multiple_datasets.r | 33 ++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/data_manipulation/join_multiple_datasets.r b/data_manipulation/join_multiple_datasets.r index ea899372..1d72cf67 100644 --- a/data_manipulation/join_multiple_datasets.r +++ b/data_manipulation/join_multiple_datasets.r @@ -1,5 +1,38 @@ # join_multiple_datasets.r +# +# Algorithm Description: +# This script provides a function to join multiple datasets (data frames or CSV file paths) +# by their common columns using inner joins. The function reads input datasets, removes +# empty or invalid ones, and then sequentially merges them on their shared columns. +# The merging is performed using dplyr's inner_join, and missing values are replaced with +# empty strings. The algorithm iteratively joins datasets, so its complexity is O(n * m), +# where n is the number of datasets and m is the average number of rows in each dataset. +# +# Example usage: +# result <- join_multiple_datasets(list("data1.csv", "data2.csv", df3)) +# head(result) +# +#' Join Multiple Datasets by Common Columns +#' +#' This function takes a list of data frames or CSV file paths and joins them sequentially +#' on their common columns using inner joins. It reads CSV files if paths are provided, +#' removes empty or invalid datasets, and merges the remaining datasets. Missing values +#' in the result are replaced with empty strings. +#' +#' @param inputs A list of data frames and/or character strings representing CSV file paths. +#' @return A data frame resulting from the inner join of all valid input datasets on their common columns. +#' @examples +#' # Example 1: Joining three data frames +#' df1 <- data.frame(id = 1:3, val1 = c("A", "B", "C")) +#' df2 <- data.frame(id = 2:3, val2 = c("X", "Y")) +#' df3 <- data.frame(id = 3, val3 = "Z") +#' result <- join_multiple_datasets(list(df1, df2, df3)) +#' print(result) +#' +#' # Example 2: Joining CSV files and a data frame +#' result <- join_multiple_datasets(list("file1.csv", "file2.csv", df3)) +#' head(result) library(dplyr) library(purrr) From d9c501b8d6870d1913ce3b84cf81d90c42278e16 Mon Sep 17 00:00:00 2001 From: Prathamesh Kalshetti Date: Sat, 25 Oct 2025 18:53:19 +0530 Subject: [PATCH 10/10] Fix: Remove accidentally committed git log file and improve bidirectional_bfs documentation --- et --soft HEAD~1 | 461 --------------------------- graph_algorithms/bidirectional_bfs.r | 3 +- 2 files changed, 2 insertions(+), 462 deletions(-) delete mode 100644 et --soft HEAD~1 diff --git a/et --soft HEAD~1 b/et --soft HEAD~1 deleted file mode 100644 index e9c4e0df..00000000 --- a/et --soft HEAD~1 +++ /dev/null @@ -1,461 +0,0 @@ -commit 7d4b7af52036b21abf54435f14250ef170351389 (HEAD -> Graph_colouring) -Author: Prathamesh Kalshetti -Date: Sun Oct 19 13:56:38 2025 +0530 - - graph_colouring - -commit 4921341b2921457245427d4b465e0eb1478f28e6 (origin/feat-viterbi, feat-viterbi, feat-bidirectional_bfs) -Author: Prathamesh Kalshetti -Date: Sat Oct 18 12:35:09 2025 +0530 - - viterbi - -commit 8b30acf58c4636541e76801c4f9f22dfcd53d85b (origin/feat-bidirectional_bfs, master) -Author: Prathamesh Kalshetti -Date: Sat Oct 18 11:44:39 2025 +0530 - - bidirectional_bfs - -commit b083bc9dcffa63af4210cb5b8664246a4a0cf941 (origin/master, origin/HEAD) -Author: PIYUSH KUMAR SINGH <121piyush466mits@gmail.com> -Date: Thu Oct 16 20:02:54 2025 +0530 - - Add Knuth-Morris-Pratt (KMP) string matching algorithm [HACKTOBERFEST 2025] (#157) - -commit 1ae8bda8c22c4d580f7c6e7cdc457125328acadb -Author: Pratik -Date: Wed Oct 15 03:15:51 2025 +0530 - - [FEATURE] Add Jump Search Algorithm Implementation in R (#216) - -commit 65e82bffad452bcc0237df9551acfc90e3d69243 -Author: Pratik -Date: Sun Oct 12 19:04:20 2025 +0530 - - Add comprehensive Value at Risk (VaR) and Expected Shortfall (ES) calculator (#201) - -commit a5cbeecd63b6efedd7fc9a94d61143e73115a0dc -Author: Pratik -Date: Sun Oct 12 19:00:13 2025 +0530 - - Add Time Series Analysis and ARIMA Modeling Implementation in R (#202) - -commit d711ac1f0db4909653825ae531968c4393e27fff -Author: Andrii Siriak -Date: Sun Oct 12 16:26:48 2025 +0300 - - Create stale.yml - -commit 69a0c3045cade6b7c8409f55a0d4b5ae29a1b083 -Author: Pratik -Date: Sun Oct 12 18:53:20 2025 +0530 - - Add Floyd–Warshall All-Pairs Shortest Path Algorithm Implementation in R (#203) - -commit 6d15d42ac89877ab1a54cf5707c03cc3a659947e -Author: Pratik -Date: Sun Oct 12 15:18:30 2025 +0530 - - created a comprehensive Black-Scholes option pricing algorithm (#200) - -commit 79ca778cf37b2fca295f893ed00785e61f2396a8 -Author: Pratik -Date: Sun Oct 12 15:18:20 2025 +0530 - - Implement Gradient Boosting Regressor with Decision Trees in R (#199) - -commit e6c0b52d8e1300b88c6971e45f87745425bcf4a4 -Author: Pratik -Date: Sun Oct 12 15:18:11 2025 +0530 - - Tarjan's Bridge Finding Algorithm (#198) - -commit b811a36c4f33cf4551f3196a1735319f861a9bb5 -Author: Pratik -Date: Sun Oct 12 13:42:20 2025 +0530 - - feat : Add the Bellman-Ford Shortest Path Algorithm in R (#192) - -commit 47ff5ed6db5ae97765218b89073c2ea96b8cc5e7 -Author: Srishti Soni <92056170+shimmer12@users.noreply.github.com> -Date: Sun Oct 12 02:41:13 2025 +0530 - - Add modular exponentiation function (#194) - -commit b5d1199c77508d1bb99c4c7bda1cb44a19570556 -Author: Srishti Soni <92056170+shimmer12@users.noreply.github.com> -Date: Sun Oct 12 02:40:30 2025 +0530 - - Implement Newton-Raphson method in R (#196) - -commit b90abace62e50712a58eba9ff717f89fe87434de -Author: Srishti Soni <92056170+shimmer12@users.noreply.github.com> -Date: Sun Oct 12 02:38:56 2025 +0530 - - Add one-way ANOVA function implementation (#195) - -commit 7a5ea9c1aa0c3e79557515e083b646c408a84d48 -Author: Sachin Pangal <151670745+Orthodox-64@users.noreply.github.com> -Date: Sun Oct 12 02:21:20 2025 +0530 - - Implemented the Subset Sum Problem algorithm in R. (#171) - -commit 52f65cfbc046862917dc33a28cff489cca0fdfbb -Author: Pratik -Date: Sun Oct 12 02:19:57 2025 +0530 - - Add Catalan Numbers algorithm implementation (#191) - -commit aa686ce907b27380ca30a23afa1f6d06a12766e4 -Author: Sachin Pangal <151670745+Orthodox-64@users.noreply.github.com> -Date: Sat Oct 11 16:25:05 2025 +0530 - - Implemented the Minimum Path Sum algorithm in R. (#172) - -commit e33a9c68b50d209d76c019a0456c2422664ee535 -Author: Arpita Roy <100989922+Arpita23r@users.noreply.github.com> -Date: Sat Oct 11 16:23:24 2025 +0530 - - feat: added ternary search (#177) - -commit 4c17308965c2067587ca2b4c70b23ed632fda5f9 -Author: Sachin Pangal <151670745+Orthodox-64@users.noreply.github.com> -Date: Sat Oct 11 16:19:47 2025 +0530 - - Implemented the Matrix Chain Multiplication algorithm in R. (#173) - -commit 66bc83f2bf75054b3f732036600966aa5c1022d8 -Author: Copilot <198982749+Copilot@users.noreply.github.com> -Date: Sat Oct 11 13:13:01 2025 +0300 - - Move misplaced algorithms to correct folders (#188) - -commit ad44b946ce0e0f47a94719f0f11b4eaaa1631661 -Author: Copilot <198982749+Copilot@users.noreply.github.com> -Date: Sat Oct 11 13:05:11 2025 +0300 - - ✨ Enhance Copilot instructions with comprehensive contribution guidelines (#190) - -commit 2c17bf7f083504f12a9aacdeae5b31d412585030 -Author: Andrii Siriak -Date: Sat Oct 11 11:48:57 2025 +0200 - - Create copilot-instructions.md (#186) - -commit b397c7d15dea0030aee1a454cd376537126c7386 -Author: Sachin Pangal <151670745+Orthodox-64@users.noreply.github.com> -Date: Sat Oct 11 14:58:38 2025 +0530 - - Implemented the Coin Change algorithm in R. (#170) - -commit 39b9816674da70109816746f9537790f56288d51 -Author: Arpita Roy <100989922+Arpita23r@users.noreply.github.com> -Date: Sat Oct 11 14:57:26 2025 +0530 - - added shortest common super sequence (#180) - -commit eb97f01a3a3dcc7fe28383bc5e9bfcc654527b48 -Author: Arpita Roy <100989922+Arpita23r@users.noreply.github.com> -Date: Sat Oct 11 14:55:38 2025 +0530 - - feat:added minimum palindromic insertion (#181) - -commit 7ca7e9555d8646666fd3ffd41bd2c527bb74b6ef -Author: Arpita Roy <100989922+Arpita23r@users.noreply.github.com> -Date: Sat Oct 11 14:42:34 2025 +0530 - - added manacher algorithm (#178) - -commit f9ad73230a7102c1fd7e9ad016b36b3efcfe39ea -Author: Arpita Roy <100989922+Arpita23r@users.noreply.github.com> -Date: Sat Oct 11 14:19:39 2025 +0530 - - add levenshtein (#179) - -commit 5bb715513cfdd49a4ffa987033dcf507bd890752 -Author: Arpita Roy <100989922+Arpita23r@users.noreply.github.com> -Date: Sat Oct 11 14:18:59 2025 +0530 - - feat: Add Z Algorithm string search in R (#176) - -commit e92a3f21a8cbae8d4828cd4f16b77adbdb64e076 -Author: Supratim <109270340+sgindeed@users.noreply.github.com> -Date: Sat Oct 11 14:18:34 2025 +0530 - - Add Burrows-Wheeler Transform (BWT) implementation in R (#184) - -commit 1887b9346a4e043b0e821c34fb4963fc0327bea6 -Author: Sachin Pangal <151670745+Orthodox-64@users.noreply.github.com> -Date: Thu Oct 9 03:32:13 2025 +0530 - - feat: implement Longest Increasing Subsequence algorithm in R (#169) - -commit e5f601222adb584472c8f5ccf9c1e7baebf0c6b2 -Author: Supratim <109270340+sgindeed@users.noreply.github.com> -Date: Thu Oct 9 03:01:56 2025 +0530 - - feat: Add Rabin–Karp string search algorithm in R (#166) - -commit 96737fe963113a4caa1945e0048592bc0bbf8b9e -Author: Supratim <109270340+sgindeed@users.noreply.github.com> -Date: Thu Oct 9 02:34:51 2025 +0530 - - feat: Add Minimum Window Substring algorithm in R (#165) - -commit 8c18f07eb90e5b1bd5ba595c5cbcc490e64c615c -Author: Supratim <109270340+sgindeed@users.noreply.github.com> -Date: Thu Oct 9 02:30:19 2025 +0530 - - feat: Add Longest Palindromic Subsequence algorithm in R (#164) - -commit 9e23362e429f1350e5b1a17668e3fbb7d6cfd777 -Author: Sachin Pangal <151670745+Orthodox-64@users.noreply.github.com> -Date: Thu Oct 9 02:23:38 2025 +0530 - - feat- 0/1 Knapsack Problem (Dynamic Programming) in R (#167) - -commit 72ad1cc59b8e2e3e0971ea8bf9264b3979a195d3 -Author: Supratim <109270340+sgindeed@users.noreply.github.com> -Date: Wed Oct 8 17:40:31 2025 +0530 - - feat: Add R program to find longest substring without repeating characters (#163) - -commit d408fea8e7a33d5ef39c5d2907b3d3f926a2684c -Author: Supratim <109270340+sgindeed@users.noreply.github.com> -Date: Wed Oct 8 13:06:04 2025 +0530 - - Create unique.letters.count.R (#162) - -commit cd836f75a18150fdba2ab6c92a762918679bddb8 -Author: PIYUSH KUMAR SINGH <121piyush466mits@gmail.com> -Date: Wed Oct 8 03:22:27 2025 +0530 - - Add Longest Common Subsequence (LCS) dynamic programming algorithm (#158) - -commit 365ec08676862369c69b077473d1b5ba6a1b8322 -Author: PIYUSH KUMAR SINGH <121piyush466mits@gmail.com> -Date: Mon Oct 6 22:09:05 2025 +0530 - - Add Binary Search Tree (BST) (#159) - -commit 68bc3c84a2328baa65030f3474ec168740d9e9e7 -Author: PIYUSH KUMAR SINGH <121piyush466mits@gmail.com> -Date: Mon Oct 6 02:36:34 2025 +0530 - - Add Sieve of Eratosthenes algorithm (#154) - -commit df205e73e11c94e4a2aa33db83085a9c033f5aa4 -Author: PIYUSH KUMAR SINGH <121piyush466mits@gmail.com> -Date: Mon Oct 6 02:36:00 2025 +0530 - - Add Dijkstra's shortest path algorithm (#153) - -commit b0f8ccf076a8b6c1e9e15904b6ecda9b4e684099 -Author: PIYUSH KUMAR SINGH <121piyush466mits@gmail.com> -Date: Mon Oct 6 02:34:00 2025 +0530 - - Add Breadth-First Search (BFS) (#152) - -commit b3d0b78f302dc66864af73c66a9ebdcb14b32412 -Author: PIYUSH KUMAR SINGH <121piyush466mits@gmail.com> -Date: Mon Oct 6 02:26:51 2025 +0530 - - Add Depth-First Search (DFS) algorithm (#151) - -commit 6e76d3422e584ecb1159a08eb12d7a2bdcff96b7 -Author: PIYUSH KUMAR SINGH <121piyush466mits@gmail.com> -Date: Sat Oct 4 23:43:58 2025 +0530 - - Add Extended Euclidean Algorithm (#155) - -commit 6b0fd3b5e79a4f2706fb579e391b25ba711131a1 -Author: Pradnya Ingle <146155532+Pradnyaa05@users.noreply.github.com> -Date: Sat Apr 19 18:42:09 2025 +0530 - - Update contribution guidelines (#147) - -commit 74547073b5776205d9522f776c833df9954b2684 -Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> -Date: Tue Nov 26 17:10:10 2024 +0100 - - Add maskWords.R (#144) - -commit b46d3ac3be75d6e13ee72d507924e3b10bc7ed0a -Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> -Date: Wed Nov 13 22:18:47 2024 +0100 - - Add shorten.vector.R (#143) - -commit 191af3f0e4f2f799ca095f68a91ab94cefe631cd -Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> -Date: Wed Nov 6 22:08:53 2024 +0100 - - Move findPalindrome.R to string_manipulation folder (#139) - -commit 27e2420f557d394d2679e8f8163fdfd31f7ec133 -Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> -Date: Wed Nov 6 22:08:36 2024 +0100 - - Move is.anagram.R to string_manipulation folder (#140) - -commit 91525faf60f3004170f8493226e6ce7bca327e37 -Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> -Date: Wed Nov 6 22:08:17 2024 +0100 - - Move is.lower.R to string_manipulation folder (#141) - -commit 37eb076a00448137a9f0ea42c41bb2eabfa01126 -Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> -Date: Wed Nov 6 22:07:29 2024 +0100 - - Move is.upper.R to string_manipulation folder (#142) - -commit 7ab44f65f543ef05bbccb95f4cefa21e3ae6a568 -Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> -Date: Mon Nov 4 20:41:49 2024 +0100 - - Change folder name (#138) - -commit 2a7072558072e0223f4c29651aa9075cf7c0b7b1 -Author: Simon Waldherr -Date: Tue Oct 29 21:24:58 2024 +0100 - - Add Patience Sort (#137) - -commit 5da148ecd1ab8d7416d1d15e835553211a546464 -Author: Simon Waldherr -Date: Tue Oct 29 21:24:15 2024 +0100 - - Add strand sort (#136) - -commit ad2f0963b20a8304e157d726eaf717bcc116b26a -Author: Simon Waldherr -Date: Tue Oct 29 21:23:46 2024 +0100 - - Add common divisor and common multiple (#135) - -commit acbb8d8766a9bd771129c660a3b782461a522a43 -Author: Simon Waldherr -Date: Tue Oct 29 21:23:18 2024 +0100 - - Add permutation calculation example (#134) - -commit 92324e2ed5cc174854e21d5a527632e32ba5da53 -Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> -Date: Tue Oct 29 18:38:38 2024 +0100 - - Add an algorithm for checking if a string is in lowercase (#131) - -commit c2dcaff0007bfc8f9086c2f3c00cb69b9fc473b8 -Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> -Date: Tue Oct 29 18:38:22 2024 +0100 - - Add an algorithm for checking if a string is in uppercase (#130) - -commit ce8a6948838c725d123825872ef9b2df74fe1330 -Author: Simon Waldherr -Date: Tue Oct 29 18:37:12 2024 +0100 - - Add gnome sort (#133) - -commit 64b7b70be500795a4e9b52cf72a0283cf972ae6a -Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> -Date: Fri Oct 25 15:57:35 2024 +0100 - - Add an algorithm for determining the number of possible rearrangements of a string (#129) - -commit 7f007339efcf9d01ced45960dd1a4e086e607467 -Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> -Date: Wed Oct 23 18:52:10 2024 +0100 - - Add an algorithm for checking anagrams (#128) - -commit 61eed3860d479581e3aa18abb6032538ad596c37 -Author: Ifeanyi Idiaye <72707830+Ifeanyi55@users.noreply.github.com> -Date: Wed Oct 23 11:35:50 2024 +0100 - - Add an algorithm that finds palindromes in text (#127) - -commit 898a0a1fda67fec17f579bbb7df0ebf5caf301ef -Author: Vineet Kumar <108144301+whyvineet@users.noreply.github.com> -Date: Mon Oct 7 18:29:38 2024 +0530 - - Add Tim Sort (#125) - -commit a1aeafc84a375243befd6839621fa6a392e33ec1 -Author: Akshat Sharma <111536616+Akshat111111@users.noreply.github.com> -Date: Tue Mar 12 14:35:04 2024 +0530 - - Add Monte Carlo simulation, Markowitz portfolio optimization, and Kalman filter (#123) - -commit 3ddddeec99dcffe5ae577747e6c831b2388178c1 -Author: Andrii Siriak -Date: Tue Mar 12 10:17:13 2024 +0200 - - Remove outdated documentation by fixing documentation_workflow.yml (#121) - -commit 8381a8d6a06ae9d4f48cd69f8d3ab11607893772 -Author: Rohit Chaudhari <100275369+rohitkbc@users.noreply.github.com> -Date: Sun Nov 19 15:12:46 2023 +0530 - - Add Gitpod (#119) - -commit 348c099e5b9d1a4245e8d33655af212b694af04d -Author: Saurav <91366385+Saurav-IIITU@users.noreply.github.com> -Date: Fri Nov 3 18:37:43 2023 +0530 - - Add topological sort (#117) - -commit 1c7e4a23522149e74bc45971f074849f451b496e -Author: Riya Khandelwal <114740796+Khandelwal05@users.noreply.github.com> -Date: Sat Oct 14 18:48:58 2023 +0530 - - Add cocktail_sort.r (#114) - -commit 8beb87a6c3c0251083c63b1553d8de749d2feaf3 -Author: paras-2407 <131509174+paras-2407@users.noreply.github.com> -Date: Wed Oct 11 01:47:32 2023 +0530 - - Update directory.md (#112) - -commit f5056677ece79ee39be8c14b3b20d5fa6a9a4ef9 -Author: paras-2407 <131509174+paras-2407@users.noreply.github.com> -Date: Mon Oct 9 21:02:46 2023 +0530 - - Add binary_search.r (fixes #107) (#108) - -commit 4f35b315fa16c10fe93246aa1385913e9f66ff84 -Author: paras-2407 <131509174+paras-2407@users.noreply.github.com> -Date: Sun Oct 8 00:45:13 2023 +0530 - - Add linear_search.r (#106) - -commit 74b05246256de287702e5ea2385fc7c877bd24de -Author: iMeet07 <97329296+iMeet07@users.noreply.github.com> -Date: Fri Oct 6 22:28:14 2023 +0530 - - Add pancake_sort (#101) - -commit bd0cb3348d3d1ea46e8aedd8bd2fb4dedab44848 -Author: iMeet07 <97329296+iMeet07@users.noreply.github.com> -Date: Fri Oct 6 22:26:00 2023 +0530 - - Add shell_sort (#102) - -commit 07a1c231cb5a2e1cdc9b2d6940d68598b3fc5e0f -Author: iMeet07 <97329296+iMeet07@users.noreply.github.com> -Date: Thu Oct 5 19:08:27 2023 +0530 - - Add wiggle sort (#97) - -commit d73ef9dfc5a891af14b10ce7ee7b057938b051bb -Author: iMeet07 <97329296+iMeet07@users.noreply.github.com> -Date: Thu Oct 5 19:05:40 2023 +0530 - - Add binary_insertion_sort.r (#95) - -commit bc04a424bdf93b96feb0a0a8c94f70c2205eb9ca -Author: BSzmolke <40246238+BSzmolke@users.noreply.github.com> -Date: Mon May 29 13:25:30 2023 +0200 - - Add amicable number checker (#92) diff --git a/graph_algorithms/bidirectional_bfs.r b/graph_algorithms/bidirectional_bfs.r index 8daf6f38..f01cbc10 100644 --- a/graph_algorithms/bidirectional_bfs.r +++ b/graph_algorithms/bidirectional_bfs.r @@ -10,7 +10,8 @@ # Space Complexity: O(V) # # Input: -# graph - adjacency list (list of integer vectors) +# graph - adjacency list (named list with character keys, values are integer vectors) +# Note: Graph keys must be character strings (e.g., "1", "2", etc.) # source - integer (starting vertex) # target - integer (destination vertex) #