diff --git a/AUTHORS b/AUTHORS index 943bf804d..8b5220bb7 100644 --- a/AUTHORS +++ b/AUTHORS @@ -11,3 +11,4 @@ Pratik Goyal Jay Thorat Rajveer Singh Bharadwaj Kishan Ved +Arvinder Singh Dhoul \ No newline at end of file diff --git a/pydatastructs/trees/fenwich_tree.py b/pydatastructs/trees/fenwich_tree.py new file mode 100644 index 000000000..d21584bf3 --- /dev/null +++ b/pydatastructs/trees/fenwich_tree.py @@ -0,0 +1,89 @@ +__all__ = [ + 'fenwich_tree' +] + +class fenwich_tree: + """ + Implementation of Fenwich tree/Binary Indexed Tree + """ + + def __init__(self, size_or_array): + """ + Initializes the Fenwich Tree. + + Args: + size_or_array: size of array the tree will represent or array of values + """ + + if isinstance(size_or_array, int): + self.size = size_or_array + self.tree = [0] * (self.size + 1) + self.original_array = [0] * self.size + elif isinstance(size_or_array, list): + self.original_array = list(size_or_array) + self.size = len(self.original_array) + self.tree = [0] * (self.size + 1) + for i, val in enumerate(self.original_array): + self._update_tree(i, val) + else: + raise ValueError("size_or_array must be an integer or a list.") + + def _update_tree(self, index, delta): + """ + Internal helper to update the Fenwick Tree after a change in the original array. + """ + index += 1 # Fenwick Tree is 1-indexed + while index <= self.size: + self.tree[index] += delta + index += index & (-index) + + def update(self, index, value): + """ + Updates the value at the given index in the original array and the Fenwick Tree. + + Args: + index: The index to update (0-based). + value: The new value. + """ + if not (0 <= index < self.size): + raise IndexError("Index out of bounds") + delta = value - self.original_array[index] + self.original_array[index] = value + self._update_tree(index, delta) + + def prefix_sum(self, index): + """ + Calculates the prefix sum up to the given index (inclusive). + + Args: + index: The index up to which to calculate the sum (0-based). + + Returns: + The prefix sum. + """ + if not (0 <= index < self.size): + raise IndexError("Index out of bounds") + index += 1 # + sum_val = 0 + while index > 0: + sum_val += self.tree[index] + index -= index & (-index) + return sum_val + + def range_sum(self, start_index, end_index): + """ + Calculates the sum of elements within the given range (inclusive). + + Args: + start_index: The starting index of the range (0-based). + end_index: The ending index of the range (0-based). + + Returns: + The sum of elements in the range. + """ + if not (0 <= start_index <= end_index < self.size): + raise IndexError("Indices out of bounds") + if start_index == 0: + return self.prefix_sum(end_index) + else: + return self.prefix_sum(end_index) - self.prefix_sum(start_index - 1) diff --git a/pydatastructs/trees/tests/test_fenwich_tree.py b/pydatastructs/trees/tests/test_fenwich_tree.py new file mode 100644 index 000000000..9c73a5c2d --- /dev/null +++ b/pydatastructs/trees/tests/test_fenwich_tree.py @@ -0,0 +1,112 @@ +import unittest +from pydatastructs.trees.fenwich_tree import fenwich_tree + +class TestFenwickTree(unittest.TestCase): + + def test_initialization_with_size(self): + ft = fenwich_tree(5) + self.assertEqual(ft.size, 5) + self.assertEqual(ft.tree, [0, 0, 0, 0, 0, 0]) + self.assertEqual(ft.original_array, [0, 0, 0, 0, 0]) + + def test_initialization_with_array(self): + arr = [1, 2, 3, 4, 5] + ft = fenwich_tree(arr) + self.assertEqual(ft.size, 5) + self.assertEqual(ft.original_array, arr) + # Manually calculate prefix sums and check the tree structure + expected_tree = [0, 1, 3, 3, 10, 5] + self.assertEqual(ft.tree, expected_tree) + + def test_initialization_with_empty_array(self): + arr = [] + ft = fenwich_tree(arr) + self.assertEqual(ft.size, 0) + self.assertEqual(ft.tree, [0]) + self.assertEqual(ft.original_array, []) + + def test_initialization_with_invalid_input(self): + with self.assertRaises(ValueError): + fenwich_tree("invalid") + + def test_update_single_element(self): + ft = fenwich_tree([1, 2, 3, 4, 5]) + ft.update(1, 10) + self.assertEqual(ft.original_array, [1, 10, 3, 4, 5]) + expected_tree = [0, 1, 11, 3, 18, 5] + self.assertEqual(ft.tree, expected_tree) + + def test_update_out_of_bounds(self): + ft = fenwich_tree(5) + with self.assertRaises(IndexError): + ft.update(5, 10) + with self.assertRaises(IndexError): + ft.update(-1, 10) + + def test_prefix_sum_positive_indices(self): + arr = [1, 2, 3, 4, 5] + ft = fenwich_tree(arr) + self.assertEqual(ft.prefix_sum(0), 1) + self.assertEqual(ft.prefix_sum(1), 3) + self.assertEqual(ft.prefix_sum(2), 6) + self.assertEqual(ft.prefix_sum(3), 10) + self.assertEqual(ft.prefix_sum(4), 15) + + def test_prefix_sum_out_of_bounds(self): + ft = fenwich_tree(5) + with self.assertRaises(IndexError): + ft.prefix_sum(5) + with self.assertRaises(IndexError): + ft.prefix_sum(-1) + + def test_prefix_sum_empty_array(self): + ft = fenwich_tree([]) + with self.assertRaises(IndexError): + ft.prefix_sum(0) # Should raise IndexError as size is 0 + + def test_range_sum_valid_range(self): + arr = [1, 2, 3, 4, 5] + ft = fenwich_tree(arr) + self.assertEqual(ft.range_sum(0, 0), 1) + self.assertEqual(ft.range_sum(0, 1), 3) + self.assertEqual(ft.range_sum(1, 3), 2 + 3 + 4) + self.assertEqual(ft.range_sum(2, 4), 3 + 4 + 5) + self.assertEqual(ft.range_sum(0, 4), 1 + 2 + 3 + 4 + 5) + + def test_range_sum_out_of_bounds(self): + ft = fenwich_tree(5) + with self.assertRaises(IndexError): + ft.range_sum(0, 5) + with self.assertRaises(IndexError): + ft.range_sum(-1, 2) + with self.assertRaises(IndexError): + ft.range_sum(1, 5) + with self.assertRaises(IndexError): + ft.range_sum(-1, -1) + + def test_range_sum_invalid_range(self): + ft = fenwich_tree(5) + with self.assertRaises(IndexError): + ft.range_sum(3, 1) + + def test_range_sum_single_element(self): + arr = [10, 20, 30] + ft = fenwich_tree(arr) + self.assertEqual(ft.range_sum(0, 0), 10) + self.assertEqual(ft.range_sum(1, 1), 20) + self.assertEqual(ft.range_sum(2, 2), 30) + + def test_range_sum_entire_array(self): + arr = [1, 2, 3, 4, 5] + ft = fenwich_tree(arr) + self.assertEqual(ft.range_sum(0, ft.size - 1), 15) + + def test_update_and_query_sequence(self): + ft = fenwich_tree([2, 5, 1, 8, 3]) + self.assertEqual(ft.prefix_sum(3), 2 + 5 + 1 + 8) # 16 + ft.update(1, 10) + self.assertEqual(ft.prefix_sum(3), 2 + 10 + 1 + 8) # 21 + self.assertEqual(ft.range_sum(0, 2), 2 + 10 + 1) # 13 + ft.update(4, 0) + self.assertEqual(ft.prefix_sum(4), 2 + 10 + 1 + 8 + 0) # 21 + self.assertEqual(ft.range_sum(3, 4), 8 + 0) # 8 diff --git a/pydatastructs/trees/tests/test_trie.py b/pydatastructs/trees/tests/test_trie.py new file mode 100644 index 000000000..9cdd2ce43 --- /dev/null +++ b/pydatastructs/trees/tests/test_trie.py @@ -0,0 +1,111 @@ +import pytest +from pydatastructs.trees.trie import Trie + +def test_trie_insert_search(): + trie = Trie() + trie.insert("apple") + assert trie.search("apple") + assert not trie.search("app") + trie.insert("app") + assert trie.search("app") + +def test_trie_starts_with(): + trie = Trie() + trie.insert("apple") + assert trie.starts_with("app") + assert trie.starts_with("a") + assert not trie.starts_with("b") + assert not trie.starts_with("applxyz") + +def test_trie_empty(): + trie = Trie() + assert not trie.search("apple") + assert not trie.starts_with("app") + +def test_trie_multiple_words(): + trie = Trie() + trie.insert("apple") + trie.insert("application") + trie.insert("banana") + assert trie.search("apple") + assert trie.search("application") + assert trie.search("banana") + assert not trie.search("app") + assert trie.starts_with("app") + assert trie.starts_with("ban") + assert not trie.starts_with("aplx") + +def test_trie_case_sensitive(): + trie = Trie() + trie.insert("Apple") + assert trie.search("Apple") + assert not trie.search("apple") + +def test_count_words(): + trie = Trie() + assert trie.count_words() == 0 + trie.insert("apple") + assert trie.count_words() == 1 + trie.insert("app") + assert trie.count_words() == 2 + trie.insert("apple") + assert trie.count_words() == 2 + +def test_longest_common_prefix(): + trie = Trie() + assert trie.longest_common_prefix() == "" + trie.insert("apple") + assert trie.longest_common_prefix() == "apple" + trie.insert("application") + assert trie.longest_common_prefix() == "appl" + trie.insert("banana") + assert trie.longest_common_prefix() == "" + +def test_autocomplete(): + trie = Trie() + trie.insert("apple") + trie.insert("application") + trie.insert("app") + assert trie.autocomplete("app") == ["app", "apple", "application"] + assert trie.autocomplete("appl") == ["apple", "application"] + assert trie.autocomplete("b") == [] + +def test_bulk_insert(): + trie = Trie() + trie.bulk_insert(["apple", "banana", "orange"]) + assert trie.search("apple") + assert trie.search("banana") + assert trie.search("orange") + assert trie.count_words() == 3 + +def test_clear(): + trie = Trie() + trie.insert("apple") + trie.clear() + assert trie.is_empty() + assert trie.count_words() == 0 + assert not trie.search("apple") + +def test_is_empty(): + trie = Trie() + assert trie.is_empty() + trie.insert("apple") + assert not trie.is_empty() + trie.clear() + assert trie.is_empty() + +def test_find_all_words(): + trie = Trie() + trie.bulk_insert(["apple", "banana", "orange"]) + assert sorted(trie.find_all_words()) == sorted(["apple", "banana", "orange"]) + trie.clear() + assert trie.find_all_words() == [] + + +def test_longest_word(): + trie = Trie() + assert trie.longest_word() is None + trie.bulk_insert(["apple", "banana", "application"]) + assert trie.longest_word() == "application" + trie.insert("a") + assert trie.longest_word() == "application" diff --git a/pydatastructs/trees/trie.py b/pydatastructs/trees/trie.py new file mode 100644 index 000000000..7ccc26e16 --- /dev/null +++ b/pydatastructs/trees/trie.py @@ -0,0 +1,109 @@ +class TrieNode: + """Represents a node in the Trie data structure.""" + def __init__(self): + """Initializes a TrieNode with empty children and is_end_of_word set to False.""" + self.children = {} + self.is_end_of_word = False + self.word = None + +class Trie: + """Represents the Trie (prefix tree) data structure.""" + def __init__(self): + """Initializes an empty Trie with a root TrieNode.""" + self.root = TrieNode() + self.word_count = 0 + + def insert(self, word): + """Inserts a word into the Trie.""" + node = self.root + for char in word: + if char not in node.children: + node.children[char] = TrieNode() + node = node.children[char] + if not node.is_end_of_word: + node.is_end_of_word = True + node.word = word + self.word_count += 1 + + def search(self, word): + """Searches for a word in the Trie.""" + node = self.root + for char in word: + if char not in node.children: + return False + node = node.children[char] + return node.is_end_of_word + + def starts_with(self, prefix): + """Checks if any word in the Trie starts with the given prefix.""" + node = self.root + for char in prefix: + if char not in node.children: + return False + node = node.children[char] + return True + + def count_words(self): + """Returns the total number of words stored in the Trie.""" + return self.word_count + + def longest_common_prefix(self): + """Finds the longest common prefix among all words in the Trie.""" + node = self.root + prefix = "" + while len(node.children) == 1 and not node.is_end_of_word: + char = next(iter(node.children)) + prefix += char + node = node.children[char] + return prefix + + def autocomplete(self, prefix): + """Provides a list of words that match a given prefix.""" + node = self.root + for char in prefix: + if char not in node.children: + return [] + node = node.children[char] + + def collect_words(current_node, current_prefix): + words = [] + if current_node.is_end_of_word: + words.append(current_prefix) + for char, child_node in current_node.children.items(): + words.extend(collect_words(child_node, current_prefix + char)) + return words + + return collect_words(node, prefix) + + def bulk_insert(self, words): + """Inserts multiple words into the Trie in a single operation.""" + for word in words: + self.insert(word) + + def clear(self): + """Removes all words from the Trie, resetting it.""" + self.root = TrieNode() + self.word_count = 0 + + def is_empty(self): + """Returns True if the Trie is empty, otherwise False.""" + return self.word_count == 0 + + def find_all_words(self): + """Retrieves all words currently stored in the Trie.""" + def collect_words(current_node): + words = [] + if current_node.is_end_of_word: + words.append(current_node.word) + for child_node in current_node.children.values(): + words.extend(collect_words(child_node)) + return words + + return collect_words(self.root) + + def longest_word(self): + """Finds and returns the longest word in the Trie.""" + all_words = self.find_all_words() + if not all_words: + return None + return max(all_words, key=len)