From 4c8f4727cdccd08e0a7d06e41147b500e30248c3 Mon Sep 17 00:00:00 2001 From: Zafer Balkan Date: Fri, 6 Oct 2023 23:06:08 +0300 Subject: [PATCH] Added typing to improve the use of the library (#1) These changes require moving to Python 3.9 for `pymerkle` library. The `profiler` already required Python 3.10. For details, see the vermin results: ```bash $ vermin --target=3.7 --violations --backport argparse --backport typing --eval-annotations --no-parse-comments .\ Detecting python files.. Analyzing 28 files using 8 processes.. !2, 3.8 D:\Repos\pymerkle\benchmarks\conftest.py 'typing.Final' member requires 2.7, 3.8 final variable annotations require !2, 3.8 !2, 3.9 D:\Repos\pymerkle\benchmarks\init_db.py 'typing.Final' member requires 2.7, 3.8 builtin generic type annotation (dict[..]) requires !2, 3.9 builtin generic type annotation (list[..]) requires !2, 3.9 final variable annotations require !2, 3.8 !2, 3.9 D:\Repos\pymerkle\benchmarks\test_perf.py builtin generic type annotation (dict[..]) requires !2, 3.9 !2, 3.10 D:\Repos\pymerkle\profiler\__main__.py 'typing.Final' member requires 2.7, 3.8 builtin generic type annotation (dict[..]) requires !2, 3.9 builtin generic type annotation (tuple[..]) requires !2, 3.9 final variable annotations require !2, 3.8 pattern matching requires !2, 3.10 !2, 3.9 D:\Repos\pymerkle\pymerkle\concrete\inmemory.py 'typing.Literal' member requires 2.7, 3.8 builtin generic type annotation (list[..]) requires !2, 3.9 builtin generic type annotation (tuple[..]) requires !2, 3.9 literal variable annotations require !2, 3.8 !2, 3.9 D:\Repos\pymerkle\pymerkle\concrete\sqlite.py builtin generic type annotation (list[..]) requires !2, 3.9 builtin generic type annotation (tuple[..]) requires !2, 3.9 !2, 3.9 D:\Repos\pymerkle\pymerkle\constants.py builtin generic type annotation (list[..]) requires !2, 3.9 !2, 3.9 D:\Repos\pymerkle\pymerkle\core.py builtin generic type annotation (collections.deque[..]) requires !2, 3.9 builtin generic type annotation (list[..]) requires !2, 3.9 builtin generic type annotation (tuple[..]) requires !2, 3.9 !2, 3.8 D:\Repos\pymerkle\pymerkle\hasher.py 'typing.Literal' member requires 2.7, 3.8 literal variable annotations require !2, 3.8 !2, 3.9 D:\Repos\pymerkle\pymerkle\proof.py builtin generic type annotation (dict[..]) requires !2, 3.9 builtin generic type annotation (list[..]) requires !2, 3.9 builtin generic type annotation (tuple[..]) requires !2, 3.9 !2, 3.9 D:\Repos\pymerkle\pymerkle\utils.py builtin generic type annotation (list[..]) requires !2, 3.9 !2, 3.9 D:\Repos\pymerkle\tests\conftest.py 'typing.Final' member requires 2.7, 3.8 builtin generic type annotation (type[..]) requires !2, 3.9 final variable annotations require !2, 3.8 Tips: - You're using potentially backported modules: typing_extensions If so, try using the following for better results: --backport typing_extensions (disable using: --no-tips) Minimum required versions: 3.10 Incompatible versions: 2 Target versions not met: 3.7 ``` --- benchmarks/conftest.py | 51 ++++--- benchmarks/init_db.py | 67 ++++----- benchmarks/test_perf.py | 23 +-- demo.py | 24 ++-- profiler/__main__.py | 117 ++++++++-------- pymerkle/__init__.py | 3 +- pymerkle/concrete/inmemory.py | 184 +++++++++++++----------- pymerkle/concrete/sqlite.py | 80 +++++------ pymerkle/constants.py | 9 +- pymerkle/core.py | 255 ++++++++++++++++------------------ pymerkle/hasher.py | 49 ++++--- pymerkle/proof.py | 48 ++++--- pymerkle/utils.py | 21 ++- requirements-dev.txt | 5 + tests/conftest.py | 59 ++++---- 15 files changed, 521 insertions(+), 474 deletions(-) diff --git a/benchmarks/conftest.py b/benchmarks/conftest.py index 99f17b56..c7cce833 100644 --- a/benchmarks/conftest.py +++ b/benchmarks/conftest.py @@ -1,46 +1,51 @@ -import os import math +import os +from typing import Final + import pytest -from pymerkle import SqliteTree as MerkleTree, constants +from pymerkle import SqliteTree as MerkleTree +from pymerkle import constants -current_dir = os.path.dirname(os.path.abspath(__file__)) +current_dir: str = os.path.dirname(os.path.abspath(__file__)) -DEFAULT_DB = os.path.join(current_dir, 'merkle.db') -DEFAULT_SIZE = 10 ** 6 -DEFAULT_INDEX = math.ceil(DEFAULT_SIZE / 2) -DEFAULT_ROUNDS = 100 -DEFAULT_THRESHOLD = 128 -DEFAULT_CAPACITY = 1024 ** 3 +DEFAULT_DB: Final[str] = os.path.join(current_dir, 'merkle.db') +DEFAULT_SIZE: Final[int] = 10 ** 6 +DEFAULT_INDEX: Final[int] = math.ceil(DEFAULT_SIZE / 2) +DEFAULT_ROUNDS: Final[int] = 100 +DEFAULT_THRESHOLD: Final[int] = 128 +DEFAULT_CAPACITY: Final[int] = 1024 ** 3 def pytest_addoption(parser): parser.addoption('--dbfile', type=str, default=DEFAULT_DB, - help='Database filepath') + help='Database filepath') parser.addoption('--size', type=int, default=DEFAULT_SIZE, - help='Nr entries to consider') + help='Nr entries to consider') parser.addoption('--index', type=int, default=DEFAULT_INDEX, - help='Base index for proof operations') + help='Base index for proof operations') parser.addoption('--rounds', type=int, default=DEFAULT_ROUNDS, - help='Nr rounds per benchmark') + help='Nr rounds per benchmark') parser.addoption('--algorithm', default='sha256', - choices=constants.ALGORITHMS, - help='Hash algorithm used by the tree') + choices=constants.ALGORITHMS, + help='Hash algorithm used by the tree') parser.addoption('--randomize', action='store_true', default=False, - help='Randomize function input per round') + help='Randomize function input per round') parser.addoption('--disable-optimizations', action='store_true', default=False, - help='Use unoptimized versions of core operations') + help='Use unoptimized versions of core operations') parser.addoption('--disable-cache', action='store_true', default=False, - help='Disable subroot caching') + help='Disable subroot caching') parser.addoption('--threshold', type=int, metavar='WIDTH', - default=DEFAULT_THRESHOLD, - help='Subroot cache threshold') + default=DEFAULT_THRESHOLD, + help='Subroot cache threshold') parser.addoption('--capacity', type=int, metavar='BYTES', - default=DEFAULT_CAPACITY, - help='Subroot cache capacity in bytes') + default=DEFAULT_CAPACITY, + help='Subroot cache capacity in bytes') + option = None -def pytest_configure(config): + +def pytest_configure(config) -> None: global option option = config.option diff --git a/benchmarks/init_db.py b/benchmarks/init_db.py index 8f8a4aba..b9abddc1 100644 --- a/benchmarks/init_db.py +++ b/benchmarks/init_db.py @@ -3,47 +3,48 @@ the database file will be overwritten if it already exists. """ +import argparse import os import sys -import argparse import time +from typing import Any, Final from pymerkle import SqliteTree, constants -current_dir = os.path.dirname(os.path.abspath(__file__)) +current_dir: str = os.path.dirname(os.path.abspath(path=__file__)) -DEFAULT_DB = os.path.join(current_dir, 'merkle.db') -DEFAULT_ALGORITHM = 'sha256' -DEFAULT_SIZE = 10 ** 8 -DEFAULT_BATCHSIZE = 10 ** 7 +DEFAULT_DB: Final[str] = os.path.join(current_dir, 'merkle.db') +DEFAULT_ALGORITHM: Final[str] = 'sha256' +DEFAULT_SIZE: Final[int] = 10 ** 8 +DEFAULT_BATCHSIZE: Final[int] = 10 ** 7 -def parse_cli_args(): - config = {'prog': sys.argv[0], 'usage': 'python %s' % sys.argv[0], - 'description': __doc__, 'epilog': '\n', - 'formatter_class': argparse.ArgumentDefaultsHelpFormatter} +def parse_cli_args() -> argparse.Namespace: + config: dict[str, Any] = {'prog': sys.argv[0], 'usage': 'python %s' % sys.argv[0], + 'description': __doc__, 'epilog': '\n', + 'formatter_class': argparse.ArgumentDefaultsHelpFormatter} parser = argparse.ArgumentParser(**config) parser.add_argument('--dbfile', type=str, default=DEFAULT_DB, - help='Database filepath') + help='Database filepath') parser.add_argument('--algorithm', choices=constants.ALGORITHMS, - default=DEFAULT_ALGORITHM, help='Hashing algorithm') + default=DEFAULT_ALGORITHM, help='Hashing algorithm') parser.add_argument('--disable-security', action='store_true', default=False, - help='Disable resistance against 2nd-preimage attack') + help='Disable resistance against 2nd-preimage attack') parser.add_argument('--size', type=int, default=DEFAULT_SIZE, - help='Nr entries to append in total') + help='Nr entries to append in total') parser.add_argument('--batchsize', type=int, default=DEFAULT_BATCHSIZE, - help='Nr entries to append per bulk insertion') + help='Nr entries to append per bulk insertion') parser.add_argument('--preserve-database', action='store_true', default=False, - help='Append without overwriting if already existent') + help='Append without overwriting if already existent') return parser.parse_args() if __name__ == '__main__': - args = parse_cli_args() + args: argparse.Namespace = parse_cli_args() - batchsize = args.batchsize + batchsize = int(args.batchsize) size = args.size if batchsize > size: sys.stdout.write("[-] Batchsize exceeds size\n") @@ -51,29 +52,31 @@ def parse_cli_args(): if not args.preserve_database: try: - os.remove(args.dbfile) + os.remove(path=args.dbfile) except OSError: pass - opts = {'algorithm': args.algorithm, - 'disable_security': args.disable_security} + opts: dict[str, Any] = {'algorithm': args.algorithm, + 'disable_security': args.disable_security} - with SqliteTree(args.dbfile, **opts) as tree: - offset = 0 - count = 1 + with SqliteTree(dbfile=args.dbfile, **opts) as tree: + offset: int = 0 + count: int = 1 append_entries = tree.append_entries - chunksize = min(100_000, batchsize) - start_time = time.time() + chunksize: int = min(100_000, batchsize) + start_time: float = time.time() + currsize: int = 0 while offset < size: - limit = offset + batchsize + 1 + limit: int = offset + batchsize + 1 if limit > size + 1: limit = size + 1 print(f"\nCreating {batchsize} entries...") - entries = [f'entry-{i}'.encode('utf-8') for i in range(offset + 1, - limit)] + entries: list[bytes] = [f'entry-{i}'.encode(encoding='utf-8') for i in range(offset + 1, + limit)] - index = append_entries(entries, chunksize) + index: int = append_entries( + entries=entries, chunksize=chunksize) # type: ignore assert index == limit - 1 currsize = tree.get_size() @@ -83,8 +86,8 @@ def parse_cli_args(): count += 1 offset += batchsize - end_time = time.time() - elapsed_time = end_time - start_time + end_time: float = time.time() + elapsed_time: float = end_time - start_time assert currsize == args.size diff --git a/benchmarks/test_perf.py b/benchmarks/test_perf.py index e9caa6c3..5e09a816 100644 --- a/benchmarks/test_perf.py +++ b/benchmarks/test_perf.py @@ -1,21 +1,24 @@ from random import randint +from typing import Any + import pytest from pymerkle import SqliteTree as MerkleTree + from .conftest import option -defaults = {'warmup_rounds': 0, 'rounds': option.rounds} +defaults: dict[str, Any] = {'warmup_rounds': 0, 'rounds': option.rounds} -opts = {'disable_optimizations': option.disable_optimizations, - 'disable_cache': option.disable_cache, - 'threshold': option.threshold, - 'capacity': option.capacity} +opts: dict[str, Any] = {'disable_optimizations': option.disable_optimizations, + 'disable_cache': option.disable_cache, + 'threshold': option.threshold, + 'capacity': option.capacity} -tree = MerkleTree(option.dbfile, algorithm=option.algorithm, **opts) +tree = MerkleTree(dbfile=option.dbfile, algorithm=option.algorithm, **opts) -def test_root(benchmark): +def test_root(benchmark) -> None: def setup(): start = randint(0, option.size - 2) if option.randomize else 0 @@ -27,7 +30,7 @@ def setup(): benchmark.pedantic(tree._get_root, setup=setup, **defaults) -def test_state(benchmark): +def test_state(benchmark) -> None: def setup(): size = randint(1, option.size) if option.randomize \ @@ -38,7 +41,7 @@ def setup(): benchmark.pedantic(tree.get_state, setup=setup, **defaults) -def test_inclusion(benchmark): +def test_inclusion(benchmark) -> None: def setup(): size = option.size @@ -49,7 +52,7 @@ def setup(): benchmark.pedantic(tree.prove_inclusion, setup=setup, **defaults) -def test_consistency(benchmark): +def test_consistency(benchmark) -> None: def setup(): size2 = option.size diff --git a/demo.py b/demo.py index 6e7a74c7..97655728 100644 --- a/demo.py +++ b/demo.py @@ -29,19 +29,19 @@ def parse_cli_args(): parser = argparse.ArgumentParser(**config) parser.add_argument('--backend', choices=['inmemory', 'sqlite'], - default='inmemory', help='Storage backend') + default='inmemory', help='Storage backend') parser.add_argument('--algorithm', choices=constants.ALGORITHMS, - default='sha256', help='Hashing algorithm') + default='sha256', help='Hashing algorithm') parser.add_argument('--threshold', type=int, metavar='WIDTH', - default=128, help='Subroot cache threshold') + default=128, help='Subroot cache threshold') parser.add_argument('--capacity', type=int, metavar='MAXSIZE', - default=1024 ** 3, help='Subroot cache capacity in bytes') + default=1024 ** 3, help='Subroot cache capacity in bytes') parser.add_argument('--disable-security', action='store_true', - default=False, help='Disable resistance against second-preimage attack') + default=False, help='Disable resistance against second-preimage attack') parser.add_argument('--disable-optimizations', action='store_true', - default=False, help='Use unopmitized versions of core operations') + default=False, help='Use unopmitized versions of core operations') parser.add_argument('--disable-cache', action='store_true', - default=False, help='Disable subroot caching') + default=False, help='Disable subroot caching') return parser.parse_args() @@ -50,7 +50,7 @@ def order_of_magnitude(num): return int(log10(num)) if not num == 0 else 0 -def strpath(rule, path): +def strpath(rule, path) -> str: s2 = 3 * ' ' s3 = 3 * ' ' template = '\n{s1}[{index}]{s2}{bit}{s3}{value}' @@ -65,16 +65,16 @@ def strpath(rule, path): return ''.join(pairs) -def strtree(tree): +def strtree(tree) -> str: if isinstance(tree, SqliteTree): entries = [tree.get_entry(index) for index in range(1, tree.get_size() - + 1)] + + 1)] tree = InmemoryTree.init_from_entries(entries) return str(tree) -def strproof(proof): +def strproof(proof) -> str: template = """ algorithm : {algorithm} security : {security} @@ -98,7 +98,7 @@ def strproof(proof): if __name__ == '__main__': args = parse_cli_args() - MerkleTree = { 'inmemory': InmemoryTree, 'sqlite': SqliteTree }[ + MerkleTree = {'inmemory': InmemoryTree, 'sqlite': SqliteTree}[ args.backend] config = {'algorithm': args.algorithm, diff --git a/profiler/__main__.py b/profiler/__main__.py index 3a72a52f..63501766 100755 --- a/profiler/__main__.py +++ b/profiler/__main__.py @@ -2,112 +2,113 @@ Run tree operations for profiling purposes """ +import argparse import os import sys -import argparse from random import randint +from typing import Any, Final from pymerkle import SqliteTree as MerkleTree from pymerkle import constants -current_dir = os.path.dirname(os.path.abspath(__file__)) -parent_dir = os.path.dirname(current_dir) +current_dir: str = os.path.dirname(os.path.abspath(__file__)) +parent_dir: str = os.path.dirname(current_dir) -DEFAULT_DB = os.path.join(parent_dir, 'benchmarks', 'merkle.db') -DB_SIZE = 10 ** 6 -DEFAULT_ROUNDS = 1 -DEFAULT_THRESHOLD = 128 -DEFAULT_CAPACITY = 1024 ** 3 +DEFAULT_DB: Final[str] = os.path.join(parent_dir, 'benchmarks', 'merkle.db') +DB_SIZE: Final[int] = 10 ** 6 +DEFAULT_ROUNDS: Final[int] = 1 +DEFAULT_THRESHOLD: Final[int] = 128 +DEFAULT_CAPACITY: Final[int] = 1024 ** 3 -def parse_cli_args(): - config = {'prog': sys.argv[0], 'usage': 'python %s' % sys.argv[0], - 'description': __doc__, 'epilog': '\n', - 'formatter_class': argparse.ArgumentDefaultsHelpFormatter} +def parse_cli_args() -> argparse.Namespace: + config: dict[str, Any] = {'prog': sys.argv[0], 'usage': 'python %s' % sys.argv[0], + 'description': __doc__, 'epilog': '\n', + 'formatter_class': argparse.ArgumentDefaultsHelpFormatter} parser = argparse.ArgumentParser(**config) parser.add_argument('--dbfile', type=str, default=DEFAULT_DB, - help='Database filepath') + help='Database filepath') parser.add_argument('--algorithm', choices=constants.ALGORITHMS, - default='sha256', help='Hashing algorithm') + default='sha256', help='Hashing algorithm') parser.add_argument('--rounds', type=int, default=DEFAULT_ROUNDS, - help='Nr rounds') + help='Nr rounds') parser.add_argument('--randomize', action='store_true', default=False, - help='Randomize function input per round') + help='Randomize function input per round') parser.add_argument('--disable-optimizations', action='store_true', default=False, - help='Use unoptimized versions of core functionalities') + help='Use unoptimized versions of core functionalities') parser.add_argument('--disable-cache', action='store_true', default=False, - help='Disable subroot caching') + help='Disable subroot caching') parser.add_argument('--threshold', type=int, metavar='WIDTH', - default=DEFAULT_THRESHOLD, - help='Subroot cache threshold') + default=DEFAULT_THRESHOLD, + help='Subroot cache threshold') parser.add_argument('--capacity', type=int, metavar='BYTES', - default=DEFAULT_CAPACITY, - help='Subroot cache capacity in bytes') + default=DEFAULT_CAPACITY, + help='Subroot cache capacity in bytes') operation = parser.add_subparsers(dest='operation') - root = operation.add_parser('root', - help='Run `_get_root`') + root: argparse.ArgumentParser = operation.add_parser('root', + help='Run `_get_root`') root.add_argument('--start', type=int, default=0, - help='Starting position') + help='Starting position') root.add_argument('--limit', type=int, default=DB_SIZE, - help='Final position') + help='Final position') - state = operation.add_parser('state', - help='Run `get_state`') + state: argparse.ArgumentParser = operation.add_parser('state', + help='Run `get_state`') state.add_argument('--size', type=int, default=DB_SIZE, - help='Nr entries to consider') + help='Nr entries to consider') - inclusion = operation.add_parser('inclusion', - help='Run `prove_inclusion`') + inclusion: argparse.ArgumentParser = operation.add_parser('inclusion', + help='Run `prove_inclusion`') inclusion.add_argument('--index', type=int, required=True, - help='Leaf index') + help='Leaf index') inclusion.add_argument('--size', type=int, default=DB_SIZE, - help='Nr entries to consider') + help='Nr entries to consider') - consistency = operation.add_parser('consistency', - help='Run `prove_consistency`') + consistency: argparse.ArgumentParser = operation.add_parser('consistency', + help='Run `prove_consistency`') consistency.add_argument('--size1', type=int, required=True, - help='Size of prior state') + help='Size of prior state') consistency.add_argument('--size2', type=int, default=DB_SIZE, - help='Size of later state') + help='Size of later state') return parser.parse_args() if __name__ == '__main__': - cli = parse_cli_args() + cli: argparse.Namespace = parse_cli_args() - opts = {'disable_optimizations': cli.disable_optimizations, - 'disable_cache': cli.disable_cache, - 'threshold': cli.threshold, - 'capacity': cli.capacity} + opts: dict[str, Any] = {'disable_optimizations': cli.disable_optimizations, + 'disable_cache': cli.disable_cache, + 'threshold': cli.threshold, + 'capacity': cli.capacity} - tree = MerkleTree(cli.dbfile, algorithm=cli.algorithm, **opts) + tree = MerkleTree(dbfile=cli.dbfile, algorithm=cli.algorithm, **opts) match cli.operation: case 'root': func = tree._get_root - def get_args(): - return (cli.start, cli.limit) + def get_args() -> tuple[int, int]: # type: ignore + return (int(cli.start), int(cli.limit)) if cli.randomize: - def get_args(): - start = randint(0, cli.limit - 2) - limit = randint(start + 1, cli.limit) + def get_args() -> tuple[int, int]: # type: ignore + start: int = randint(0, cli.limit - 2) + limit: int = randint(start + 1, cli.limit) return (start, limit) case 'state': func = tree.get_state - def get_args(): + def get_args(): # type: ignore return (cli.size,) if cli.randomize: - def get_args(): + def get_args(): # type: ignore size = randint(1, cli.size) return (size,) @@ -115,24 +116,24 @@ def get_args(): case 'inclusion': func = tree.prove_inclusion - def get_args(): + def get_args(): # type: ignore return (cli.index, cli.size) if cli.randomize: - def get_args(): - size = cli.size - index = randint(1, size) + def get_args(): # type: ignore + size: int = cli.size + index: int = randint(1, size) return (index, size) case 'consistency': func = tree.prove_consistency - def get_args(): + def get_args(): # type: ignore return (cli.size1, cli.size2) if cli.randomize: - def get_args(): + def get_args(): # type: ignore size2 = cli.size2 size1 = randint(1, size2) @@ -140,9 +141,9 @@ def get_args(): count = 0 while count < cli.rounds: - args = get_args() + args = get_args() # type: ignore print('round %d:' % count, args) - func(*args) + func(*args) # type: ignore count += 1 print("\033[92m {}\033[00m".format(tree.get_cache_info())) diff --git a/pymerkle/__init__.py b/pymerkle/__init__.py index 61574031..9fbec2cc 100644 --- a/pymerkle/__init__.py +++ b/pymerkle/__init__.py @@ -1,8 +1,7 @@ from .concrete.inmemory import InmemoryTree from .concrete.sqlite import SqliteTree from .core import BaseMerkleTree, InvalidChallenge -from .proof import MerkleProof, verify_inclusion, verify_consistency, InvalidProof - +from .proof import InvalidProof, MerkleProof, verify_consistency, verify_inclusion __version__ = '6.1.0' diff --git a/pymerkle/concrete/inmemory.py b/pymerkle/concrete/inmemory.py index 9ebb3383..d65c8ac5 100644 --- a/pymerkle/concrete/inmemory.py +++ b/pymerkle/concrete/inmemory.py @@ -1,5 +1,7 @@ -from pymerkle.utils import decompose +from typing import Literal, Optional, Union + from pymerkle.core import BaseMerkleTree +from pymerkle.utils import decompose class Node: @@ -17,8 +19,12 @@ class Node: __slots__ = ('digest', 'left', 'right', 'parent') + digest: bytes + parent: Optional['Node'] + left: Optional['Node'] + right: Optional['Node'] - def __init__(self, digest, left=None, right=None): + def __init__(self, digest: bytes, left: Optional['Node'] = None, right: Optional['Node'] = None) -> None: self.digest = digest self.left = left @@ -31,8 +37,7 @@ def __init__(self, digest, left=None, right=None): self.parent = None - - def is_root(self): + def is_root(self) -> bool: """ Returns *True* iff the node is currently root. @@ -40,8 +45,7 @@ def is_root(self): """ return not self.parent - - def is_leaf(self): + def is_leaf(self) -> bool: """ Returns *True* iff the node is leaf. @@ -49,34 +53,31 @@ def is_leaf(self): """ return not self.left and not self.right - - def is_left_child(self): + def is_left_child(self) -> bool: """ Returns *True* iff the node is currently left child. :rtype: bool """ - parent = self.parent + parent: Optional[Node] = self.parent if not parent: return False return self == parent.left - - def is_right_child(self): + def is_right_child(self) -> bool: """ Returns *True* iff the node is currently right child. :rtype: bool """ - parent = self.parent + parent: Optional[Node] = self.parent if not parent: return False return self == parent.right - - def get_ancestor(self, degree): + def get_ancestor(self, degree: int) -> 'Node': """ .. note:: Ancestor of degree 0 is the node itself, ancestor of degree 1 is the node's parent, etc. @@ -84,22 +85,24 @@ def get_ancestor(self, degree): :type degree: int :rtype: Node """ - curr = self + curr: 'Node' = self while degree > 0: + if curr.parent is None: + raise Exception( + 'If degree is greater than 0, parent cannot be None.') curr = curr.parent degree -= 1 return curr - - def expand(self, indent=2, trim=None, level=0, ignored=None): + def expand(self, indent: int = 2, trim: Optional[int] = None, level: int = 0, ignored: Optional[list[str]] = None) -> str: """ Returns a string representing the subtree rooted at the present node. :param indent: [optional] - :type indent: str + :type indent: int :param trim: [optional] - :type trim: str + :type trim: int :param level: [optional] :type level: str :param ignored: [optional] @@ -109,7 +112,7 @@ def expand(self, indent=2, trim=None, level=0, ignored=None): ignored = ignored or [] if level == 0: - out = 2 * '\n' + ' └─' if not self.parent else '' + out: str = 2 * '\n' + ' └─' if not self.parent else '' else: out = (indent + 1) * ' ' @@ -124,7 +127,7 @@ def expand(self, indent=2, trim=None, level=0, ignored=None): if self.is_right_child(): out += ' └──' - ignored += [level] + ignored += [str(level)] checksum = self.digest.hex() out += (checksum[:trim] + '...') if trim else checksum @@ -135,6 +138,10 @@ def expand(self, indent=2, trim=None, level=0, ignored=None): recursion = (indent, trim, level + 1, ignored[:]) + if self.left is None or self.right is None: + raise Exception( + 'Node cannot be None.') + out += self.left.expand(*recursion) out += self.right.expand(*recursion) @@ -151,10 +158,13 @@ class Leaf(Node): :type digest: bytes """ - def __init__(self, data, digest): + data: bytes + parent: Optional[Union['Node', 'Leaf']] + + def __init__(self, data: bytes, digest: bytes) -> None: self.data = data - super().__init__(digest, None, None) + super().__init__(digest=digest, left=None, right=None) class InmemoryTree(BaseMerkleTree): @@ -167,15 +177,16 @@ class InmemoryTree(BaseMerkleTree): .. warning:: This is a very memory inefficient implementation. Use it for debugging, testing and investigating the tree structure. """ + root: Optional[Node] + leaves: list[Union[Node, Leaf]] - def __init__(self, algorithm='sha256', **opts): + def __init__(self, algorithm: str = 'sha256', **opts) -> None: self.root = None self.leaves = [] - super().__init__(algorithm, **opts) - + super().__init__(algorithm=algorithm, **opts) - def __str__(self, indent=2, trim=8): + def __str__(self, indent: int = 2, trim: int = 8) -> str: """ :returns: visual representation of the tree :rtype: str @@ -183,10 +194,9 @@ def __str__(self, indent=2, trim=8): if not self.root: return '\n └─[None]\n' - return self.root.expand(indent, trim) + '\n' + return self.root.expand(indent=indent, trim=trim) + '\n' - - def _encode_entry(self, data): + def _encode_entry(self, data: bytes) -> bytes: """ Returns the binary format of the provided data entry. @@ -196,8 +206,7 @@ def _encode_entry(self, data): """ return data - - def _store_leaf(self, data, digest): + def _store_leaf(self, data: bytes, digest: bytes) -> int: """ Creates a new leaf storing the provided data entry along with its hash value. @@ -209,35 +218,42 @@ def _store_leaf(self, data, digest): :returns: index of newly appended leaf counting from one :rtype: int """ - tail = Leaf(data, digest) + tail = Leaf(data=data, digest=digest) if not self.leaves: self.leaves += [tail] self.root = tail return 1 - node = self._get_last_maximal_subroot() + node: Node = self._get_last_maximal_subroot() self.leaves += [tail] - digest = self._hash_nodes(node.digest, tail.digest) + digest = self._hash_nodes(lnode=node.digest, rnode=tail.digest) if node.is_root(): - self.root = Node(digest, node, tail) + self.root = Node(digest=digest, left=node, right=tail) index = self._get_size() return index - curr = node.parent - curr.right = Node(digest, node, tail) + curr: Optional[Node] = node.parent + if curr is None: + raise Exception( + 'Node cannot be None.') + + curr.right = Node(digest=digest, left=node, right=tail) curr.right.parent = curr while curr: + if curr.left is None or curr.right is None: + raise Exception( + 'Node cannot be None.') + curr.digest = self._hash_nodes( - curr.left.digest, curr.right.digest) + lnode=curr.left.digest, rnode=curr.right.digest) curr = curr.parent - index = self._get_size() + index: int = self._get_size() return index - - def _get_leaf(self, index): + def _get_leaf(self, index: int) -> bytes: """ Returns the hash stored at the specified leaf. @@ -250,8 +266,7 @@ def _get_leaf(self, index): return self.leaves[index - 1].digest - - def _get_leaves(self, offset, width): + def _get_leaves(self, offset: int, width: int) -> list[bytes]: """ Returns in respective order the hashes stored by the leaves in the specified range. @@ -263,17 +278,15 @@ def _get_leaves(self, offset, width): """ return [l.digest for l in self.leaves[offset: offset + width]] - - def _get_size(self): + def _get_size(self) -> int: """ :returns: current number of leaves :rtype: int """ return len(self.leaves) - @classmethod - def init_from_entries(cls, entries, algorithm='sha256', **opts): + def init_from_entries(cls, entries: list[bytes], algorithm: str = 'sha256', **opts) -> 'InmemoryTree': """ Create tree from initial data @@ -286,12 +299,11 @@ def init_from_entries(cls, entries, algorithm='sha256', **opts): append_entry = tree.append_entry for data in entries: - append_entry(data) + append_entry(data=data) return tree - - def get_state(self, size=None): + def get_state(self, size: Optional[int] = None) -> bytes: """ Computes the root-hash of the subtree corresponding to the provided size @@ -303,7 +315,7 @@ def get_state(self, size=None): :type size: int :rtype: bytes """ - currsize = self._get_size() + currsize: int = self._get_size() if size is None: size = currsize @@ -312,19 +324,23 @@ def get_state(self, size=None): return self.hash_empty() if size == currsize: + if self.root is None: + raise Exception( + 'Root cannot be None.') + return self.root.digest - subroots = self._get_subroots(size) + subroots: list[Node] = self._get_subroots(size=size) result = subroots[0].digest i = 0 while i < len(subroots) - 1: - result = self._hash_nodes(subroots[i + 1].digest, result) + result: bytes = self._hash_nodes( + lnode=subroots[i + 1].digest, rnode=result) i += 1 return result - - def _inclusion_path_fallback(self, offset): + def _inclusion_path_fallback(self, offset) -> tuple[list[int], list[bytes]]: """ Non-recursive utility using concrete traversals to compute the inclusion path against the current number of leaves. @@ -333,21 +349,21 @@ def _inclusion_path_fallback(self, offset): :type offset: int :rtype: (list[int], list[bytes]) """ - base = self.leaves[offset] - bit = 1 if base.is_right_child() else 0 + base: Union[Node, Leaf] = self.leaves[offset] + bit: Literal[1, 0] = 1 if base.is_right_child() else 0 - path = [base.digest] - rule = [bit] + path: list[bytes] = [base.digest] + rule: list[int] = [bit] - curr = base + curr: Union[Node, Leaf] = base while curr.parent: - parent = curr.parent + parent: Union[Node, Leaf] = curr.parent if curr.is_left_child(): - digest = parent.right.digest + digest: bytes = parent.right.digest # type: ignore bit = 0 if parent.is_left_child() else 1 else: - digest = parent.left.digest + digest = parent.left.digest # type: ignore bit = 1 if parent.is_right_child() else 0 rule += [bit] @@ -360,8 +376,7 @@ def _inclusion_path_fallback(self, offset): return rule, path - - def _inclusion_path(self, start, offset, limit, bit): + def _inclusion_path(self, start: int, offset: int, limit: int, bit: int) -> tuple[list[int], list[bytes]]: """ Computes the inclusion path for the leaf located at the provided offset against the specified leaf range @@ -380,12 +395,11 @@ def _inclusion_path(self, start, offset, limit, bit): :rtype: (list[int], list[bytes]) """ if start == 0 and limit == self._get_size(): - return self._inclusion_path_fallback(offset) + return self._inclusion_path_fallback(offset=offset) - return super()._inclusion_path(start, offset, limit, bit) + return super()._inclusion_path(start=start, offset=offset, limit=limit, bit=bit) - - def _get_subroot_node(self, index, height): + def _get_subroot_node(self, index: int, height: int) -> Optional[Node]: """ Returns the root node of the perfect subtree of the provided height whose leftmost leaf node is located at the provided position. @@ -399,14 +413,14 @@ def _get_subroot_node(self, index, height): :type height: int :rtype: Node """ - node = self.leaves[index - 1] + node: Union[Node, Leaf] = self.leaves[index - 1] if not node: return i = 0 while i < height: - curr = node.parent + curr: Optional[Union[Node, Leaf]] = node.parent if not curr: return @@ -414,35 +428,41 @@ def _get_subroot_node(self, index, height): if curr.left is not node: return - node = curr + node = curr # type: ignore i += 1 # Verify existence of perfect subtree rooted at the detected node curr = node + if curr is None: + raise Exception( + 'Node cannot be None.') + i = 0 while i < height: if curr.is_leaf(): return curr = curr.right + if curr is None: + raise Exception( + 'Node cannot be None.') + i += 1 return node - - def _get_last_maximal_subroot(self): + def _get_last_maximal_subroot(self) -> Node: """ Returns the root node of the perfect subtree of maximum possible size containing the currently last leaf. :rtype: Node """ - degree = decompose(len(self.leaves))[0] - - return self.leaves[-1].get_ancestor(degree) + degree: int = decompose(len(self.leaves))[0] + return self.leaves[-1].get_ancestor(degree=degree) - def _get_subroots(self, size): + def _get_subroots(self, size: int) -> list[Node]: """ Returns in respective order the root nodes of the successive perfect subtrees whose sizes sum up to the provided size. @@ -454,10 +474,10 @@ def _get_subroots(self, size): if size < 0 or size > self._get_size(): return [] - subroots = [] - offset = 0 + subroots: list[Node] = [] + offset: int = 0 for height in reversed(decompose(size)): - node = self._get_subroot_node(offset + 1, height) + node: Optional[Node] = self._get_subroot_node(offset + 1, height) if not node: return [] diff --git a/pymerkle/concrete/sqlite.py b/pymerkle/concrete/sqlite.py index 31fcd7b2..83195af0 100644 --- a/pymerkle/concrete/sqlite.py +++ b/pymerkle/concrete/sqlite.py @@ -1,4 +1,6 @@ import sqlite3 +from typing import Any, Generator, Iterator, Optional + from pymerkle.core import BaseMerkleTree @@ -19,14 +21,14 @@ class SqliteTree(BaseMerkleTree): :type algorithm: str """ - def __init__(self, dbfile, algorithm='sha256', **opts): - self.dbfile = dbfile - self.con = sqlite3.connect(self.dbfile) + def __init__(self, dbfile: str, algorithm: str = 'sha256', **opts) -> None: + self.dbfile: str = dbfile + self.con: sqlite3.Connection = sqlite3.connect(database=self.dbfile) self.con.row_factory = lambda cursor, row: row[0] - self.cur = self.con.cursor() + self.cur: sqlite3.Cursor = self.con.cursor() with self.con: - query = f''' + query: str = f''' CREATE TABLE IF NOT EXISTS leaf( id INTEGER PRIMARY KEY AUTOINCREMENT, entry BLOB, @@ -34,18 +36,15 @@ def __init__(self, dbfile, algorithm='sha256', **opts): );''' self.cur.execute(query) - super().__init__(algorithm, **opts) + super().__init__(algorithm=algorithm, **opts) - - def __enter__(self): + def __enter__(self) -> 'SqliteTree': return self - - def __exit__(self, *exc): + def __exit__(self, *exc) -> None: self.con.close() - - def _encode_entry(self, data): + def _encode_entry(self, data: bytes) -> bytes: """ Returns the binary format of the provided data entry. @@ -55,8 +54,7 @@ def _encode_entry(self, data): """ return data - - def _store_leaf(self, data, digest): + def _store_leaf(self, data: bytes, digest: bytes) -> Optional[int]: """ Creates a new leaf storing the provided data along with its hash value. @@ -71,7 +69,7 @@ def _store_leaf(self, data, digest): if not isinstance(data, bytes): raise ValueError('Provided data is not binary') - cur = self.cur + cur: sqlite3.Cursor = self.cur with self.con: query = f''' @@ -81,8 +79,7 @@ def _store_leaf(self, data, digest): return cur.lastrowid - - def _get_leaf(self, index): + def _get_leaf(self, index: int) -> bytes: """ Returns the hash stored at the specified leaf. @@ -90,17 +87,16 @@ def _get_leaf(self, index): :type index: int :rtype: bytes """ - cur = self.cur + cur: sqlite3.Cursor = self.cur - query = f''' + query: str = f''' SELECT hash FROM leaf WHERE id = ? ''' cur.execute(query, (index,)) - return cur.fetchone() + return bytes(cur.fetchone()) - - def _get_leaves(self, offset, width): + def _get_leaves(self, offset: int, width: int) -> list[bytes]: """ Returns in respective order the hashes stored by the leaves in the specified range. @@ -110,32 +106,30 @@ def _get_leaves(self, offset, width): :param width: number of leaves to consider :type width: int """ - cur = self.cur + cur: sqlite3.Cursor = self.cur - query = f''' + query: str = f''' SELECT hash FROM leaf WHERE id BETWEEN ? AND ? ''' cur.execute(query, (offset + 1, offset + width)) return cur.fetchall() - - def _get_size(self): + def _get_size(self) -> int: """ :returns: current number of leaves :rtype: int """ - cur = self.cur + cur: sqlite3.Cursor = self.cur - query = f''' + query: str = f''' SELECT COUNT(*) FROM leaf ''' cur.execute(query) return cur.fetchone() - - def get_entry(self, index): + def get_entry(self, index: int) -> bytes: """ Returns the unhashed data stored at the specified leaf. @@ -143,17 +137,16 @@ def get_entry(self, index): :type index: int :rtype: bytes """ - cur = self.cur + cur: sqlite3.Cursor = self.cur - query = f''' + query: str = f''' SELECT entry FROM leaf WHERE id = ? ''' cur.execute(query, (index,)) return cur.fetchone() - - def _hash_per_chunk(self, entries, chunksize): + def _hash_per_chunk(self, entries: list[bytes], chunksize: int) -> Generator[Iterator[tuple[bytes, bytes]], Any, None]: """ Generator yielding in chunks pairs of entry data and hash value. @@ -165,16 +158,15 @@ def _hash_per_chunk(self, entries, chunksize): _hash_entry = self.hash_buff offset = 0 - chunk = entries[offset: chunksize] + chunk: list[bytes] = entries[offset: chunksize] while chunk: - hashes = [_hash_entry(data) for data in chunk] + hashes: list[bytes] = [_hash_entry(data=data) for data in chunk] yield zip(chunk, hashes) offset += chunksize chunk = entries[offset: offset + chunksize] - - def append_entries(self, entries, chunksize=100_000): + def append_entries(self, entries: list[bytes], chunksize: int = 100_000) -> int: """ Bulk operation for appending a batch of entries. @@ -186,13 +178,13 @@ def append_entries(self, entries, chunksize=100_000): :returns: index of last appended entry :rtype: int """ - cur = self.cur + cur: sqlite3.Cursor = self.cur with self.con: - query = f''' + query: str = f''' INSERT INTO leaf(entry, hash) VALUES (?, ?) ''' - for chunk in self._hash_per_chunk(entries, chunksize): + for chunk in self._hash_per_chunk(entries=entries, chunksize=chunksize): cur.execute('BEGIN TRANSACTION') for (data, digest) in chunk: @@ -200,4 +192,8 @@ def append_entries(self, entries, chunksize=100_000): cur.execute('END TRANSACTION') - return cur.lastrowid + result = cur.lastrowid + if result is None: + raise Exception( + 'Query returned no result. Integrity of the database cannot be guranteed.') + return result diff --git a/pymerkle/constants.py b/pymerkle/constants.py index 2b0b9b9a..0893612d 100644 --- a/pymerkle/constants.py +++ b/pymerkle/constants.py @@ -2,12 +2,13 @@ List of supported hash functions. """ -SHA2_ALGORITHMS = ['sha224', 'sha256', 'sha384', 'sha512'] -SHA3_ALGORITHMS = ['sha3_224', 'sha3_256', 'sha3_384', 'sha3_512'] -KECCAK_ALGORITHMS = ['keccak_224', 'keccak_256', 'keccak_384', 'keccak_512'] +SHA2_ALGORITHMS: list[str] = ['sha224', 'sha256', 'sha384', 'sha512'] +SHA3_ALGORITHMS: list[str] = ['sha3_224', 'sha3_256', 'sha3_384', 'sha3_512'] +KECCAK_ALGORITHMS: list[str] = ['keccak_224', + 'keccak_256', 'keccak_384', 'keccak_512'] -ALGORITHMS = SHA2_ALGORITHMS + SHA3_ALGORITHMS +ALGORITHMS: list[str] = SHA2_ALGORITHMS + SHA3_ALGORITHMS try: import sha3 except ImportError: diff --git a/pymerkle/core.py b/pymerkle/core.py index bb8681ad..138f965e 100644 --- a/pymerkle/core.py +++ b/pymerkle/core.py @@ -2,25 +2,25 @@ Merkle-tree core functionality """ +import builtins from abc import ABCMeta, abstractmethod from collections import deque, namedtuple from threading import Lock -import builtins +from typing import Any, Callable, Optional, Sized from cachetools import LRUCache from pymerkle.hasher import MerkleHasher from pymerkle.proof import MerkleProof -from pymerkle.utils import log2, decompose - +from pymerkle.utils import decompose, log2 try: - builtins.profile + builtins.profile # type: ignore except AttributeError: def profile(func): return func - builtins.profile = profile + builtins.profile = profile # type: ignore class InvalidChallenge(Exception): @@ -30,8 +30,8 @@ class InvalidChallenge(Exception): pass - -_CacheInfo = namedtuple('CacheInfo', ['size', 'capacity', 'hits', 'misses']) +_CacheInfo = namedtuple(typename='CacheInfo', field_names=[ + 'size', 'capacity', 'hits', 'misses']) class BaseMerkleTree(MerkleHasher, metaclass=ABCMeta): @@ -65,7 +65,16 @@ class BaseMerkleTree(MerkleHasher, metaclass=ABCMeta): :type cache: boolean """ - def __init__(self, algorithm='sha256', **opts): + algorithm: str + security: bool + threshold: int + capacity: int + cache: LRUCache + hits: int + misses: int + lock: Lock + + def __init__(self, algorithm='sha256', **opts) -> None: self.algorithm = algorithm self.security = not opts.get('disable_security', False) self.threshold = opts.get('threshold', 128) @@ -83,18 +92,15 @@ def __init__(self, algorithm='sha256', **opts): if opts.get('disable_cache', False): self._get_subroot = self._get_subroot_uncached - super().__init__(self.algorithm, self.security) - - - def _hash_entry(self, data): - return self.hash_buff(data) - + super().__init__(algorithm=self.algorithm, security=self.security) - def _hash_nodes(self, lnode, rnode): - return self.hash_pair(lnode, rnode) + def _hash_entry(self, data: bytes) -> bytes: + return self.hash_buff(data=data) + def _hash_nodes(self, lnode: bytes, rnode: bytes) -> bytes: + return self.hash_pair(buff1=lnode, buff2=rnode) - def append_entry(self, data): + def append_entry(self, data: Any) -> int: """ Appends a new leaf storing the provided data entry. @@ -103,14 +109,13 @@ def append_entry(self, data): :returns: index of newly appended leaf counting from one :rtype: int """ - buffer = self._encode_entry(data) - digest = self._hash_entry(buffer) - index = self._store_leaf(data, digest) + buffer: bytes = self._encode_entry(data=data) + digest: bytes = self._hash_entry(data=buffer) + index: int = self._store_leaf(data=data, digest=digest) return index - - def get_leaf(self, index): + def get_leaf(self, index: int) -> bytes: """ Returns the leaf hash located at the provided position. @@ -118,10 +123,9 @@ def get_leaf(self, index): :type index: int :rtype: bytes """ - return self._get_leaf(index) - + return self._get_leaf(index=index) - def get_size(self): + def get_size(self) -> int: """ Returns the current number of leaves. @@ -129,8 +133,7 @@ def get_size(self): """ return self._get_size() - - def get_state(self, size=None): + def get_state(self, size: Optional[int] = None) -> bytes: """ Computes the root-hash of the tree corresponding to the provided number of leaves. @@ -143,10 +146,9 @@ def get_state(self, size=None): if size is None: size = self._get_size() - return self._get_root(0, size) - + return self._get_root(start=0, limit=size) - def prove_inclusion(self, index, size=None): + def prove_inclusion(self, index: int, size: Optional[int] = None) -> MerkleProof: """ Proves inclusion of the hash located at the provided index against the tree corresponding to the provided number of leaves. @@ -160,7 +162,7 @@ def prove_inclusion(self, index, size=None): :raises InvalidChallenge: if the provided parameters are invalid or incompatible with each other """ - currsize = self.get_size() + currsize: int = self.get_size() if size is None: size = currsize @@ -171,13 +173,13 @@ def prove_inclusion(self, index, size=None): if not (0 < index <= size): raise InvalidChallenge('Provided index is out of bounds') - rule, path = self._inclusion_path(0, index - 1, size, 0) + rule, path = self._inclusion_path( + start=0, offset=index - 1, limit=size, bit=0) - return MerkleProof(self.algorithm, self.security, size, rule, [], - path) + return MerkleProof(algorithm=self.algorithm, security=self.security, size=size, rule=rule, subset=[], + path=path) - - def prove_consistency(self, size1, size2=None): + def prove_consistency(self, size1: int, size2: Optional[int] = None) -> MerkleProof: """ Proves consistency between the states corresponding to the provided sizes. @@ -191,7 +193,7 @@ def prove_consistency(self, size1, size2=None): :raises InvalidChallenge: if the provided parameters are invalid or incompatible with each other """ - currsize = self.get_size() + currsize: int = self.get_size() if size2 is None: size2 = currsize @@ -202,21 +204,20 @@ def prove_consistency(self, size1, size2=None): if not (0 < size1 <= size2): raise InvalidChallenge('Provided prior size out of bounds') - rule, subset, path = self._consistency_path(0, size1, size2, 0) - - return MerkleProof(self.algorithm, self.security, size2, rule, - subset, path) + rule, subset, path = self._consistency_path( + start=0, offset=size1, limit=size2, bit=0) + return MerkleProof(algorithm=self.algorithm, security=self.security, size=size2, rule=rule, + subset=subset, path=path) - def get_cache_info(self): + def get_cache_info(self) -> '_CacheInfo': """ Returns subroot cache info. """ return _CacheInfo(self.cache.currsize, self.cache.maxsize, self.hits, - self.misses) - + self.misses) - def cache_clear(self): + def cache_clear(self) -> None: """ Clears the subroot cache. """ @@ -226,9 +227,8 @@ def cache_clear(self): self.hits = 0 self.misses = 0 - @abstractmethod - def _encode_entry(self, data): + def _encode_entry(self, data: Any) -> bytes: """ Should return the binary format of the provided data entry. @@ -237,9 +237,8 @@ def _encode_entry(self, data): :rtype: bytes """ - @abstractmethod - def _store_leaf(self, data, digest): + def _store_leaf(self, data: Any, digest: bytes) -> int: """ Should create a new leaf storing the provided data entry along with its hash value. @@ -252,9 +251,8 @@ def _store_leaf(self, data, digest): :rtype: int """ - @abstractmethod - def _get_leaf(self, index): + def _get_leaf(self, index: int) -> bytes: """ Should return the hash stored at the specified leaf. @@ -263,9 +261,8 @@ def _get_leaf(self, index): :rtype: bytes """ - @abstractmethod - def _get_leaves(self, offset, width): + def _get_leaves(self, offset: int, width: int) -> list[bytes]: """ Should return in respective order the hashes stored by the leaves in the specified range. @@ -277,18 +274,16 @@ def _get_leaves(self, offset, width): :rtype: iterable of bytes """ - @abstractmethod - def _get_size(self): + def _get_size(self) -> int: """ Should return the current number of leaves :rtype: int """ - - @profile - def _get_subroot(self, offset, width): + @profile # type: ignore + def _get_subroot(self, offset: int, width: int) -> bytes: """ Cached subroot computation. @@ -301,28 +296,27 @@ def _get_subroot(self, offset, width): :rtype: bytes """ if width < self.threshold: - return self._get_subroot_uncached(offset, width) + return self._get_subroot_uncached(offset=offset, width=width) - key = (offset, width) + key: tuple[int, int] = (offset, width) with self.lock: try: - value = self.cache[key] + value: Sized = self.cache[key] self.hits += 1 - return value + return bytes(value) # type: ignore except KeyError: pass self.misses += 1 - value = self._get_subroot_uncached(offset, width) + value = self._get_subroot_uncached(offset=offset, width=width) self.cache[key] = value return value - - @profile - def _get_subroot_uncached(self, offset, width): + @profile # type: ignore + def _get_subroot_uncached(self, offset: int, width: int) -> bytes: """ Uncached subroot computation. @@ -334,19 +328,19 @@ def _get_subroot_uncached(self, offset, width): :type width: int :rtype: bytes """ - level = deque(self._get_leaves(offset, width)) + level = deque(iterable=self._get_leaves(offset=offset, width=width)) popleft = level.popleft append = level.append hashfunc = self.hashfunc - prefx01 = self.prefx01 + prefx01: bytes = self.prefx01 while width > 1: count = 0 while count < width: - lnode = popleft() - rnode = popleft() - node = hashfunc(prefx01 + lnode + rnode).digest() + lnode: bytes = popleft() + rnode: bytes = popleft() + node: bytes = hashfunc(prefx01 + lnode + rnode).digest() append(node) count += 2 @@ -354,9 +348,8 @@ def _get_subroot_uncached(self, offset, width): return level[0] - - @profile - def _get_root(self, start, limit): + @profile # type: ignore + def _get_root(self, start: int, limit: int) -> bytes: """ Computes the root-hash for the provided leaf range. @@ -366,33 +359,32 @@ def _get_root(self, start, limit): :type limit: int :rtype: bytes """ - subroots = deque() - prepend = subroots.appendleft - append = subroots.append - pop = subroots.pop + subroots: deque[bytes] = deque() + prepend: Callable[[bytes], None] = subroots.appendleft + append: Callable[[bytes], None] = subroots.append + pop: Callable[[], bytes] = subroots.pop _get_subroot = self._get_subroot - exponents = decompose(limit - start) + exponents: list[int] = decompose(limit - start) for p in exponents: - width = 1 << p - offset = limit - width - node = _get_subroot(offset, width) + width: int = 1 << p + offset: int = limit - width + node: bytes = _get_subroot(offset=offset, width=width) prepend(node) limit = offset hashfunc = self.hashfunc prefx01 = self.prefx01 while len(subroots) > 1: - lnode = pop() - rnode = pop() + lnode: bytes = pop() + rnode: bytes = pop() node = hashfunc(prefx01 + rnode + lnode).digest() append(node) return subroots[0] - - @profile - def _inclusion_path(self, start, offset, limit, bit): + @profile # type: ignore + def _inclusion_path(self, start: int, offset: int, limit: int, bit: int) -> tuple[list[int], list[bytes]]: """ Computes the inclusion path for the leaf located at the provided offset against the specified leaf range. @@ -413,7 +405,7 @@ def _inclusion_path(self, start, offset, limit, bit): stack = deque() push = stack.append while limit > start + 1: - k = 1 << log2(limit - start) + k: int = 1 << log2(n=limit - start) if k == limit - start: k >>= 1 @@ -429,20 +421,19 @@ def _inclusion_path(self, start, offset, limit, bit): _get_root = self._get_root _get_leaf = self._get_leaf - rule = [bit] - base = _get_leaf(offset + 1) - path = [base] + rule: list[int] = [bit] + base: bytes = _get_leaf(index=offset + 1) + path: list[bytes] = [base] while stack: bit, args = stack.pop() rule += [bit] - node = _get_root(*args) + node: bytes = _get_root(*args) path += [node] return rule, path - - @profile - def _consistency_path(self, start, offset, limit, bit): + @profile # type: ignore + def _consistency_path(self, start: int, offset: int, limit: int, bit: int) -> tuple[list[int], list[int], list[bytes]]: """ Computes the consistency path for the state corresponding to the provided offset against the specified leaf range @@ -463,7 +454,7 @@ def _consistency_path(self, start, offset, limit, bit): stack = deque() push = stack.append while not offset == limit and not (offset == 0 and limit == 1): - k = 1 << log2(limit) + k: int = 1 << log2(n=limit) if k == limit: k >>= 1 @@ -473,7 +464,7 @@ def _consistency_path(self, start, offset, limit, bit): limit = k bit = 0 else: - mask = int(k == 1 << log2(k)) + mask = int(k == 1 << log2(n=k)) push((bit, mask, (start, start + k))) start += k offset -= k @@ -485,26 +476,25 @@ def _consistency_path(self, start, offset, limit, bit): if offset == limit: mask = 1 - base = _get_root(start, start + limit) + base = _get_root(start=start, limit=start + limit) else: mask = 0 - base = _get_leaf(start + offset + 1) + base: bytes = _get_leaf(index=start + offset + 1) - rule = [bit] - subset = [mask] - path = [base] + rule: list[int] = [bit] + subset: list[int] = [mask] + path: list[bytes] = [base] while stack: bit, mask, args = stack.pop() rule += [bit] subset += [mask] - node = _get_root(*args) + node: bytes = _get_root(*args) path += [node] return rule, subset, path - - @profile - def _get_root_naive(self, start, limit): + @profile # type: ignore + def _get_root_naive(self, start: int, limit: int) -> bytes: """ Computes the root-hash for the provided leaf range. @@ -523,20 +513,19 @@ def _get_root_naive(self, start, limit): return self.hash_empty() if limit == start + 1: - return self._get_leaf(limit) + return self._get_leaf(index=limit) - k = 1 << log2(limit - start) + k: int = 1 << log2(n=limit - start) if k == limit - start: k >>= 1 - lnode = self._get_root_naive(start, start + k) - rnode = self._get_root_naive(start + k, limit) - - return self._hash_nodes(lnode, rnode) + lnode: bytes = self._get_root_naive(start=start, limit=start + k) + rnode: bytes = self._get_root_naive(start=start + k, limit=limit) + return self._hash_nodes(lnode=lnode, rnode=rnode) - @profile - def _inclusion_path_naive(self, start, offset, limit, bit): + @profile # type: ignore + def _inclusion_path_naive(self, start: int, offset: int, limit: int, bit: int) -> tuple[list[int], list[bytes]]: """ Computes the inclusion path for the leaf located at the provided offset against the specified leaf range. @@ -557,25 +546,26 @@ def _inclusion_path_naive(self, start, offset, limit, bit): :rtype: (list[int], list[bytes]) """ if offset == start and start == limit - 1: - node = self._get_leaf(offset + 1) + node: bytes = self._get_leaf(index=offset + 1) return [bit], [node] - k = 1 << log2(limit - start) + k: int = 1 << log2(n=limit - start) if k == limit - start: k >>= 1 if offset < start + k: - rule, path = self._inclusion_path_naive(start, offset, start + k, 0) - node = self._get_root(start + k, limit) + rule, path = self._inclusion_path_naive( + start=start, offset=offset, limit=start + k, bit=0) + node = self._get_root(start=start + k, limit=limit) else: - rule, path = self._inclusion_path_naive(start + k, offset, limit, 1) - node = self._get_root(start, start + k) + rule, path = self._inclusion_path_naive( + start=start + k, offset=offset, limit=limit, bit=1) + node = self._get_root(start=start, limit=start + k) return rule + [bit], path + [node] - - @profile - def _consistency_path_naive(self, start, offset, limit, bit): + @profile # type: ignore + def _consistency_path_naive(self, start: int, offset: int, limit: int, bit: int) -> tuple[list[Any], list[int], list[bytes]]: """ Computes the consistency path for the state corresponding to the provided offset against the specified leaf range. @@ -596,25 +586,26 @@ def _consistency_path_naive(self, start, offset, limit, bit): :rtype: (list[int], list[int], list[bytes]) """ if offset == limit: - node = self._get_root(start, start + limit) + node = self._get_root(start=start, limit=start + limit) return [bit], [1], [node] if offset == 0 and limit == 1: - node = self._get_leaf(start + offset + 1) + node = self._get_leaf(index=start + offset + 1) return [bit], [0], [node] - k = 1 << log2(limit) + k: int = 1 << log2(n=limit) if k == limit: k >>= 1 mask = 0 if offset < k: - rule, subset, path = self._consistency_path_naive(start, offset, k, 0) - node = self._get_root(start + k, start + limit) + rule, subset, path = self._consistency_path_naive( + start=start, offset=offset, limit=k, bit=0) + node = self._get_root(start=start + k, limit=start + limit) else: - rule, subset, path = self._consistency_path_naive(start + k, offset - k, - limit - k, 1) - node = self._get_root(start, start + k) - mask = int(k == 1 << log2(k)) + rule, subset, path = self._consistency_path_naive(start=start + k, offset=offset - k, + limit=limit - k, bit=1) + node: bytes = self._get_root(start=start, limit=start + k) + mask = int(k == 1 << log2(n=k)) return rule + [bit], subset + [mask], path + [node] diff --git a/pymerkle/hasher.py b/pymerkle/hasher.py index ca0af1fa..38fda359 100644 --- a/pymerkle/hasher.py +++ b/pymerkle/hasher.py @@ -1,4 +1,8 @@ import hashlib +from typing import Callable, Literal + +from typing_extensions import Buffer + from pymerkle import constants @@ -12,11 +16,16 @@ class MerkleHasher: to *True* :type security: bool """ - - def __init__(self, algorithm, security=True, **kw): - normalized = algorithm.lower().replace('-', '_') + algorithm: str + security: bool + prefx00: Literal[b"\x00", b""] + prefx01: Literal[b"\x01", b""] + hashfunc: Callable[[bytes,], 'hashlib._Hash'] + + def __init__(self, algorithm: str, security: bool = True, **kw) -> None: + normalized: str = algorithm.lower().replace('-', '_') if normalized not in constants.ALGORITHMS: - msg = f'{algorithm} not supported' + msg: str = f'{algorithm} not supported' if normalized in constants.KECCAK_ALGORITHMS: msg += ': You need to install pysha3' raise ValueError(msg) @@ -32,18 +41,17 @@ def __init__(self, algorithm, security=True, **kw): self.prefx00 = b'\x00' if self.security else b'' self.prefx01 = b'\x01' if self.security else b'' - - def _consume_bytes(self, buff): + def _consume_bytes(self, buff: bytes) -> bytes: """ :param buff: :type buff: bytes :rtype: bytes """ - hasher = self.hashfunc() - update = hasher.update - chunksize = 1024 - offset = 0 - chunk = buff[offset: chunksize] + hasher: hashlib._Hash = self.hashfunc() # type: ignore + update: Callable[[Buffer], None] = hasher.update + chunksize: Literal[1024] = 1024 + offset: int = 0 + chunk: bytes = buff[offset: chunksize] while chunk: update(chunk) offset += chunksize @@ -51,8 +59,7 @@ def _consume_bytes(self, buff): return hasher.digest() - - def hash_empty(self): + def hash_empty(self) -> bytes: """ Computes the hash of the empty data without prepending security prefixes. @@ -61,10 +68,9 @@ def hash_empty(self): :type buff: bytes :rtype: bytes """ - return self._consume_bytes(b'') + return self._consume_bytes(buff=b'') - - def hash_raw(self, buff): + def hash_raw(self, buff: bytes) -> bytes: """ Computes the hash of the provided data without prepending security prefixes. @@ -73,10 +79,9 @@ def hash_raw(self, buff): :type buff: bytes :rtype: bytes """ - return self._consume_bytes(buff) - + return self._consume_bytes(buff=buff) - def hash_buff(self, data): + def hash_buff(self, data: bytes) -> bytes: """ Computes the hash of the provided binary data. @@ -85,11 +90,9 @@ def hash_buff(self, data): :type data: bytes :rtype: bytes """ - return self._consume_bytes(self.prefx00 + data) - - + return self._consume_bytes(buff=self.prefx00 + data) - def hash_pair(self, buff1, buff2): + def hash_pair(self, buff1: bytes, buff2: bytes) -> bytes: """ Computes the hash of the concatenation of the provided binary data. diff --git a/pymerkle/proof.py b/pymerkle/proof.py index f46f6f86..80a9eabf 100644 --- a/pymerkle/proof.py +++ b/pymerkle/proof.py @@ -1,6 +1,5 @@ -import os -import json from hmac import compare_digest +from typing import Any from pymerkle.hasher import MerkleHasher @@ -12,7 +11,7 @@ class InvalidProof(Exception): pass -def verify_inclusion(base, root, proof): +def verify_inclusion(base: bytes, root: bytes, proof: 'MerkleProof') -> None: """ Verifies the provided Merkle-proof of inclusion against the provided leaf hash and tree state. @@ -32,7 +31,7 @@ def verify_inclusion(base, root, proof): raise InvalidProof('State does not match') -def verify_consistency(state1, state2, proof): +def verify_consistency(state1: bytes, state2: bytes, proof: 'MerkleProof') -> None: """ Verifies the provided Merkle-proof of consistency against the given states. @@ -71,7 +70,15 @@ class MerkleProof: :type path: list[bytes] """ - def __init__(self, algorithm, security, size, rule, subset, path): + algorithm: str + security: bool + size: int + rule: list[int] + subset: list[int] + path: list[bytes] + hasher: MerkleHasher + + def __init__(self, algorithm: str, security: bool, size: int, rule: list[int], subset: list[int], path: list[bytes]) -> None: self.algorithm = algorithm self.security = security self.size = size @@ -80,8 +87,7 @@ def __init__(self, algorithm, security, size, rule, subset, path): self.path = path self.hasher = MerkleHasher(**self.get_metadata()) - - def get_metadata(self): + def get_metadata(self) -> dict[str, Any]: """ Returns the information needed to configure the hashing machinery. @@ -90,7 +96,7 @@ def get_metadata(self): return {'algorithm': self.algorithm, 'security': self.security, 'size': self.size} - def serialize(self): + def serialize(self) -> dict[str, Any]: """ Returns the JSON representation of the verifiable object. @@ -107,9 +113,8 @@ def serialize(self): 'path': [digest.hex() for digest in self.path] } - @classmethod - def deserialize(cls, data): + def deserialize(cls, data: dict) -> 'MerkleProof': """ :param data: :type data: dict @@ -118,12 +123,12 @@ def deserialize(cls, data): metadata = data['metadata'] rule = data['rule'] subset = data['subset'] - path = [bytes.fromhex(checksum) for checksum in data['path']] + path: list[bytes] = [bytes.fromhex(checksum) + for checksum in data['path']] return cls(**metadata, rule=rule, subset=subset, path=path) - - def retrieve_prior_state(self): + def retrieve_prior_state(self) -> bytes: """ Computes the acclaimed prior state as specified by the included path of hashes. @@ -132,35 +137,34 @@ def retrieve_prior_state(self): :rtype: bytes """ - subpath = [digest for (mask, digest) in zip(self.subset, self.path) if - mask] + subpath: list[bytes] = [digest for (mask, digest) in zip(self.subset, self.path) if + mask] if not subpath: return self.hasher.hash_empty() - result = subpath[0] - index = 0 + result: bytes = subpath[0] + index: int = 0 hash_pair = self.hasher.hash_pair while index < len(subpath) - 1: - result = hash_pair(subpath[index + 1], result) + result = hash_pair(buff1=subpath[index + 1], buff2=result) index += 1 return result - - def resolve(self): + def resolve(self) -> bytes: """ Computes the target hash of the included path of hashes. :rtype: bytes """ - path = list(zip(self.rule, self.path)) + path: list[tuple[int, bytes]] = list(zip(self.rule, self.path)) if not path: return self.hasher.hash_empty() bit, result = path[0] - index = 0 + index: int = 0 hash_pair = self.hasher.hash_pair while index < len(path) - 1: next_bit, digest = path[index + 1] diff --git a/pymerkle/utils.py b/pymerkle/utils.py index f4c8de96..be1031b7 100644 --- a/pymerkle/utils.py +++ b/pymerkle/utils.py @@ -1,4 +1,4 @@ -def log2(n): +def log2(n: int) -> int: """ Base 2 logarithm @@ -9,7 +9,11 @@ def log2(n): :type n: int :rtype: int """ - k = 0 + + if (n < 0): + raise ArithmeticError('n must be a non-negative integer') + + k: int = 0 while n >> 1: k += 1 n >>= 1 @@ -17,7 +21,7 @@ def log2(n): return k -def decompose(n): +def decompose(n: int) -> list[int]: """ Returns in respective order the exponents corresponding to the binary decomposition of the provided integer. @@ -26,13 +30,16 @@ def decompose(n): :type n: int :rtype: list[int] """ - exponents = [] + if (n < 0): + raise ArithmeticError('n must be a non-negative integer') + + exponents: list[int] = [] - i = 1 + i: int = 1 while i < n + 1: if i & n: - p = -1 - j = i + p: int = -1 + j: int = i while j: j >>= 1 p += 1 diff --git a/requirements-dev.txt b/requirements-dev.txt index 2480d9a1..26cfab14 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,3 +4,8 @@ pytest-benchmark[histogram]>=3.4.1 sphinx==4.4.0 sphinx_rtd_theme>=1.0.0 python-docs-theme +autopep8>=2.0.4 +typing>=3.7.4.3 +typing_extensions>=4.8.0 +isort>=5.12.0 +cachetools>=5.3.1 \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 80308397..9e9a5bd1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,11 +1,15 @@ import itertools +from typing import Final + import pytest -from pymerkle import constants, InmemoryTree, SqliteTree as _SqliteTree +from pymerkle import InmemoryTree +from pymerkle import SqliteTree as _SqliteTree +from pymerkle import constants -DEFAULT_MAXSIZE = 11 -DEFAULT_THRESHOLD = 2 -DEFAULT_CAPACITY = 1024 ** 3 +DEFAULT_MAXSIZE: Final[int] = 11 +DEFAULT_THRESHOLD: Final[int] = 2 +DEFAULT_CAPACITY: Final[int] = 1024 ** 3 class SqliteTree(_SqliteTree): @@ -14,8 +18,8 @@ class SqliteTree(_SqliteTree): used interchangeably """ - def __init__(self, algorithm='sha256', **opts): - super().__init__(':memory:', algorithm, **opts) + def __init__(self, algorithm='sha256', **opts) -> None: + super().__init__(dbfile=':memory:', algorithm=algorithm, **opts) @classmethod def init_from_entries(cls, entries, algorithm='sha256', **opts): @@ -25,32 +29,37 @@ def init_from_entries(cls, entries, algorithm='sha256', **opts): return tree -def pytest_addoption(parser): +def pytest_addoption(parser) -> None: parser.addoption('--algorithm', default='sha256', - choices=constants.ALGORITHMS, - help='Hash algorithm to be used') + choices=constants.ALGORITHMS, + help='Hash algorithm to be used') parser.addoption('--extended', action='store_true', default=False, - help='Test against all supported hash algorothms') + help='Test against all supported hash algorothms') parser.addoption('--backend', choices=['inmemory', 'sqlite'], default='inmemory', - help='Storage backend') + help='Storage backend') parser.addoption('--maxsize', type=int, default=DEFAULT_MAXSIZE, - help='Maximum size of tree fixtures') + help='Maximum size of tree fixtures') parser.addoption('--threshold', type=int, metavar='WIDTH', - default=DEFAULT_THRESHOLD, - help='Subroot cache threshold') + default=DEFAULT_THRESHOLD, + help='Subroot cache threshold') parser.addoption('--capacity', type=int, metavar='BYTES', - default=DEFAULT_CAPACITY, - help='Subroot cache capacity in bytes') + default=DEFAULT_CAPACITY, + help='Subroot cache capacity in bytes') + option = None -def pytest_configure(config): + +def pytest_configure(config) -> None: global option option = config.option + if option is None: + raise Exception('Option cannot be none') def all_configs(option): - algorithms = constants.ALGORITHMS if option.extended else [option.algorithm] + algorithms = constants.ALGORITHMS if option.extended else [ + option.algorithm] configs = [] for (disable_security, algorithm) in itertools.product((True, False), algorithms): @@ -63,7 +72,7 @@ def all_configs(option): return configs -def resolve_backend(option): +def resolve_backend(option) -> type[SqliteTree] | type[InmemoryTree]: if option.backend == 'sqlite': return SqliteTree @@ -72,21 +81,21 @@ def resolve_backend(option): def make_trees(default_config=False): configs = all_configs(option) if not default_config else [{'algorithm': - option.algorithm, 'disable_security': False}] + option.algorithm, 'disable_security': False}] # type: ignore MerkleTree = resolve_backend(option) return [MerkleTree.init_from_entries( [f'entry-{i}'.encode() for i in range(size)], **config) - for size in range(0, option.maxsize + 1) - for config in configs] + for size in range(0, option.maxsize + 1) # type: ignore + for config in configs] def tree_and_index(default_config=False): return [(tree, index) for tree in make_trees(default_config) - for index in range(1, tree.get_size() + 1)] + for index in range(1, tree.get_size() + 1)] def tree_and_range(default_config=False): return [(tree, start, limit) for tree in make_trees(default_config) - for start in range(0, tree.get_size()) - for limit in range(start + 1, tree.get_size())] + for start in range(0, tree.get_size()) + for limit in range(start + 1, tree.get_size())]