Skip to content

Commit d42e43e

Browse files
chg: apply ruff checks.
1 parent 83dc7dd commit d42e43e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+174
-328
lines changed

docs/conf.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/env python3
2-
# -*- coding: utf-8 -*-
32
#
43
# inscriptis documentation build configuration file, created by
54
# sphinx-quickstart on Sat Dec 14 06:42:31 2019.
@@ -23,8 +22,7 @@
2322
sys.path.insert(0, os.path.abspath("."))
2423
sys.path.insert(0, os.path.abspath("./../src"))
2524

26-
from inscriptis.metadata import __copyright__, __author__, __version__
27-
25+
from inscriptis.metadata import __author__, __copyright__, __version__
2826

2927
# -- General configuration ------------------------------------------------
3028

examples/custom-html-handling.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,15 @@
1010
"Welcome to <b>Chur</b>" is rendered as "Welcome to **Chur**".
1111
"""
1212

13-
from typing import Dict
13+
from lxml.html import fromstring
1414

1515
from inscriptis import ParserConfig
1616
from inscriptis.html_engine import Inscriptis
1717
from inscriptis.model.html_document_state import HtmlDocumentState
1818
from inscriptis.model.tag import CustomHtmlTagHandlerMapping
19-
from lxml.html import fromstring
2019

2120

22-
def my_handle_start_b(state: HtmlDocumentState, _: Dict) -> None:
21+
def my_handle_start_b(state: HtmlDocumentState, _: dict) -> None:
2322
"""Handle the opening <b> tag."""
2423
state.tags[-1].write("**")
2524

@@ -38,7 +37,5 @@ def my_handle_end_b(state: HtmlDocumentState) -> None:
3837
HTML = "Welcome to <b>Chur</b>"
3938

4039
html_tree = fromstring(HTML)
41-
inscriptis = Inscriptis(
42-
html_tree, ParserConfig(custom_html_tag_handler_mapping=MY_MAPPING)
43-
)
40+
inscriptis = Inscriptis(html_tree, ParserConfig(custom_html_tag_handler_mapping=MY_MAPPING))
4441
print(inscriptis.get_text())

pyproject.toml

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,26 @@ sources = ["src"]
6666

6767
# code formatting with ruff
6868
[tool.ruff]
69-
line-length = 88
69+
line-length = 120
7070
target-version = "py311" # adjust as needed
7171
extend-exclude = ["*.html", "*.json", "*.txt", "/a", "/b"]
7272
src = ["src", "tests", "benchmarking", "examples"]
7373

7474
[tool.ruff.format]
7575
quote-style = "double"
7676

77+
[tool.ruff.lint]
78+
select = [
79+
# pycodestyle
80+
"E",
81+
# Pyflakes
82+
"F",
83+
# pyupgrade
84+
"UP",
85+
# flake8-bugbear
86+
"B",
87+
# flake8-simplify
88+
"SIM",
89+
# isort
90+
"I",
91+
]

src/inscriptis/__init__.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,18 +60,18 @@
6060
"""
6161

6262
import re
63-
from typing import Dict, Optional, Any
64-
from inscriptis.model.config import ParserConfig
63+
from typing import Any
6564

6665
from lxml.etree import ParserError
67-
from lxml.html import fromstring, HtmlElement
66+
from lxml.html import HtmlElement, fromstring
6867

68+
from inscriptis.model.config import ParserConfig
6969
from inscriptis.html_engine import Inscriptis
7070

7171
RE_STRIP_XML_DECLARATION = re.compile(r"^<\?xml [^>]+?\?>")
7272

7373

74-
def _get_html_tree(html_content: str) -> Optional[HtmlElement]:
74+
def _get_html_tree(html_content: str) -> HtmlElement | None:
7575
"""Obtain the HTML parse tree for the given HTML content.
7676
7777
Args:
@@ -108,9 +108,7 @@ def get_text(html_content: str, config: ParserConfig = None) -> str:
108108
return Inscriptis(html_tree, config).get_text() if html_tree is not None else ""
109109

110110

111-
def get_annotated_text(
112-
html_content: str, config: ParserConfig = None
113-
) -> Dict[str, Any]:
111+
def get_annotated_text(html_content: str, config: ParserConfig = None) -> dict[str, Any]:
114112
"""Return a dictionary of the extracted text and annotations.
115113
116114
Notes:

src/inscriptis/annotation/__init__.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
"""The model used for saving annotations."""
22

3-
from typing import List
43
from typing import NamedTuple
54

65
from inscriptis.html_properties import HorizontalAlignment
@@ -30,12 +29,12 @@ class Annotation(NamedTuple):
3029

3130

3231
def horizontal_shift(
33-
annotations: List[Annotation],
32+
annotations: list[Annotation],
3433
content_width: int,
3534
line_width: int,
3635
align: HorizontalAlignment,
3736
shift: int = 0,
38-
) -> List[Annotation]:
37+
) -> list[Annotation]:
3938
r"""Shift annotations based on the given line's formatting.
4039
4140
Adjusts the start and end indices of annotations based on the line's
@@ -60,6 +59,4 @@ def horizontal_shift(
6059
else:
6160
h_align = shift + (line_width - content_width) // 2
6261

63-
return [
64-
Annotation(a.start + h_align, a.end + h_align, a.metadata) for a in annotations
65-
]
62+
return [Annotation(a.start + h_align, a.end + h_align, a.metadata) for a in annotations]

src/inscriptis/annotation/output/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,13 @@
2626
2727
"""
2828

29-
from typing import Dict, Any
29+
from typing import Any
3030

3131

3232
class AnnotationProcessor:
3333
"""An AnnotationProcessor is called for formatting annotations."""
3434

35-
def __call__(self, annotated_text: Dict[str, str]) -> Any:
35+
def __call__(self, annotated_text: dict[str, str]) -> Any:
3636
"""Format the given text and annotations.
3737
3838
Args:

src/inscriptis/annotation/output/html.py

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from collections import defaultdict
44
from itertools import cycle
5-
from typing import Dict, Any, List
5+
from typing import Any
66

77
from inscriptis.annotation.output import AnnotationProcessor
88

@@ -18,13 +18,11 @@ class HtmlExtractor(AnnotationProcessor):
1818

1919
verbatim = True
2020

21-
def __call__(self, annotated_text: Dict[str, Any]) -> str:
21+
def __call__(self, annotated_text: dict[str, Any]) -> str:
2222
tag_dict = defaultdict(list)
2323

2424
for start, end, label in reversed(annotated_text["label"]):
25-
tag_dict[start].append(
26-
f'<span class="{label}-label">{label}</span><span class="{label}">'
27-
)
25+
tag_dict[start].append(f'<span class="{label}-label">{label}</span><span class="{label}">')
2826
tag_dict[end].insert(0, "</span>")
2927

3028
tagged_content = [
@@ -43,7 +41,7 @@ def __call__(self, annotated_text: Dict[str, Any]) -> str:
4341
return "".join(tagged_content) + "</pre></body></html>"
4442

4543
@staticmethod
46-
def _get_label_colors(labels: List[str]) -> Dict[str, str]:
44+
def _get_label_colors(labels: list[str]) -> dict[str, str]:
4745
"""Compute the mapping between annotation labels and colors.
4846
4947
The used color schema is available in the global variable COLOR_SCHEMA.
@@ -57,7 +55,7 @@ def _get_label_colors(labels: List[str]) -> Dict[str, str]:
5755
"""
5856
return dict(zip({a[2] for a in sorted(labels)}, cycle(COLOR_SCHEMA)))
5957

60-
def _get_css(self, labels: List[str]) -> str:
58+
def _get_css(self, labels: list[str]) -> str:
6159
"""Compute the CSS to be included into the HTML output.
6260
6361
Args:
@@ -71,18 +69,18 @@ def _get_css(self, labels: List[str]) -> str:
7169
css = []
7270
for label, color in sorted(self._get_label_colors(labels).items()):
7371
css.append(
74-
"pre{{"
72+
"pre{"
7573
" position: relative;\n"
76-
"}}\n"
77-
".{label} {{\n"
78-
" background-color: {color};\n"
74+
"}\n"
75+
f".{label} {{\n"
76+
f" background-color: {color};\n"
7977
" border-radius: 0.4em;\n"
80-
"}}\n"
81-
".{label}-label {{\n"
78+
"}\n"
79+
f".{label}-label {{\n"
8280
" top: -1.0em;\n"
83-
' content: "{label}";\n'
81+
f' content: "{label}";\n'
8482
" position: absolute;\n"
85-
" background-color: {color};\n"
86-
" font-size: 75%; }}\n".format(label=label, color=color)
83+
f" background-color: {color};\n"
84+
" font-size: 75%; }\n"
8785
)
8886
return "\n".join(css)
Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Surface Form Annotation Processor."""
22

3-
from typing import Dict, Any
3+
from typing import Any
44

55
from inscriptis.annotation.output import AnnotationProcessor
66

@@ -10,7 +10,7 @@ class SurfaceExtractor(AnnotationProcessor):
1010

1111
verbatim = False
1212

13-
def __call__(self, annotated_text: Dict[str, Any]) -> Dict[str, Any]:
13+
def __call__(self, annotated_text: dict[str, Any]) -> dict[str, Any]:
1414
"""
1515
Add information on the surface forms to the annotated_text dictionary.
1616
@@ -22,9 +22,6 @@ def __call__(self, annotated_text: Dict[str, Any]) -> Dict[str, Any]:
2222
An extended dictionary which contains the extracted surface-forms
2323
of the annotations under the key 'surface'.
2424
"""
25-
surface_forms = [
26-
(label, annotated_text["text"][start:end])
27-
for start, end, label in annotated_text["label"]
28-
]
25+
surface_forms = [(label, annotated_text["text"][start:end]) for start, end, label in annotated_text["label"]]
2926
annotated_text["surface"] = surface_forms
3027
return annotated_text

src/inscriptis/annotation/output/xml.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""XML Annotation processor."""
22

33
from collections import defaultdict
4-
from typing import Dict, Any
4+
from typing import Any
55

66
from inscriptis.annotation.output import AnnotationProcessor
77

@@ -11,7 +11,7 @@ class XmlExtractor(AnnotationProcessor):
1111

1212
verbatim = True
1313

14-
def __call__(self, annotated_text: Dict[str, Any], root_element="content"):
14+
def __call__(self, annotated_text: dict[str, Any], root_element="content"):
1515
tag_dict = defaultdict(list)
1616
for start, end, tag in reversed(annotated_text["label"]):
1717
tag_dict[start].append(f"<{tag}>")

src/inscriptis/annotation/parser.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,8 @@
1919

2020
from collections import defaultdict
2121
from copy import copy
22-
from typing import Dict, Tuple, List
2322

24-
from inscriptis.model.html_element import HtmlElement, DEFAULT_HTML_ELEMENT
23+
from inscriptis.model.html_element import DEFAULT_HTML_ELEMENT, HtmlElement
2524

2625

2726
class ApplyAnnotation:
@@ -87,7 +86,7 @@ def __init__(self, css_profile, model: dict):
8786
self.css = css_profile
8887

8988
@staticmethod
90-
def _parse(model: dict) -> Tuple[Dict, List]:
89+
def _parse(model: dict) -> tuple[dict, list]:
9190
"""Compute the AnnotationModel from a model dictionary.
9291
9392
Returns:

0 commit comments

Comments
 (0)