Skip to content

Commit f45df87

Browse files
author
Robert Sachunsky
committed
return_boxes_of_images_by_order_of_reading_new: fix no-mother case
- when handling lines without mother, and biggest line already accounts for all columns, but some are too close to the top and therefore must be removed, avoid invalidating `biggest` index, causing `IndexError` - remove try-catch (now unnecessary) - array instead of list operations
1 parent b7a3c0e commit f45df87

File tree

1 file changed

+29
-33
lines changed

1 file changed

+29
-33
lines changed

src/eynollah/utils/__init__.py

Lines changed: 29 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1918,54 +1918,50 @@ def return_boxes_of_images_by_order_of_reading_new(
19181918
x_starting_all_between_nm_wc = x_starting[ind_all_lines_between_nm_wc]
19191919
x_ending_all_between_nm_wc = x_ending[ind_all_lines_between_nm_wc]
19201920

1921-
x_diff_all_between_nm_wc = x_ending_all_between_nm_wc - x_starting_all_between_nm_wc
1922-
if len(x_diff_all_between_nm_wc)>0:
1923-
biggest=np.argmax(x_diff_all_between_nm_wc)
1924-
19251921
columns_covered_by_mothers = set()
1926-
for dj in range(len(x_starting_all_between_nm_wc)):
1922+
for dj in range(len(ind_all_lines_between_nm_wc)):
19271923
columns_covered_by_mothers.update(
19281924
range(x_starting_all_between_nm_wc[dj],
19291925
x_ending_all_between_nm_wc[dj]))
19301926
child_columns = set(range(i_s_nc, x_end_biggest_column))
19311927
columns_not_covered = list(child_columns - columns_covered_by_mothers)
19321928

1933-
should_longest_line_be_extended=0
1934-
if (len(x_diff_all_between_nm_wc) > 0 and
1935-
set(list(range(x_starting_all_between_nm_wc[biggest],
1936-
x_ending_all_between_nm_wc[biggest])) +
1937-
list(columns_not_covered)) != child_columns):
1938-
should_longest_line_be_extended=1
1939-
index_lines_so_close_to_top_separator = \
1940-
np.arange(len(y_all_between_nm_wc))[(y_all_between_nm_wc>y_column_nc[i_c]) &
1941-
(y_all_between_nm_wc<=(y_column_nc[i_c]+500))]
1942-
if len(index_lines_so_close_to_top_separator) > 0:
1943-
indexes_remained_after_deleting_closed_lines= \
1944-
np.array(list(set(list(range(len(y_all_between_nm_wc)))) -
1945-
set(list(index_lines_so_close_to_top_separator))))
1946-
if len(indexes_remained_after_deleting_closed_lines) > 0:
1929+
if len(ind_all_lines_between_nm_wc):
1930+
biggest = np.argmax(x_ending_all_between_nm_wc -
1931+
x_starting_all_between_nm_wc)
1932+
if columns_covered_by_mothers == set(
1933+
range(x_starting_all_between_nm_wc[biggest],
1934+
x_ending_all_between_nm_wc[biggest])):
1935+
# biggest accounts for all columns alone,
1936+
# longest line should be extended
1937+
lines_so_close_to_top_separator = \
1938+
((y_all_between_nm_wc > y_column_nc[i_c]) &
1939+
(y_all_between_nm_wc <= y_column_nc[i_c] + 500))
1940+
if (np.count_nonzero(lines_so_close_to_top_separator) and
1941+
np.count_nonzero(lines_so_close_to_top_separator) <
1942+
len(ind_all_lines_between_nm_wc)):
19471943
y_all_between_nm_wc = \
1948-
y_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
1944+
y_all_between_nm_wc[~lines_so_close_to_top_separator]
19491945
x_starting_all_between_nm_wc = \
1950-
x_starting_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
1946+
x_starting_all_between_nm_wc[~lines_so_close_to_top_separator]
19511947
x_ending_all_between_nm_wc = \
1952-
x_ending_all_between_nm_wc[indexes_remained_after_deleting_closed_lines]
1953-
1954-
y_all_between_nm_wc = np.append(y_all_between_nm_wc, y_column_nc[i_c])
1955-
x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, i_s_nc)
1956-
x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, x_end_biggest_column)
1948+
x_ending_all_between_nm_wc[~lines_so_close_to_top_separator]
19571949

1958-
if len(x_diff_all_between_nm_wc) > 0:
1959-
try:
1950+
y_all_between_nm_wc = np.append(y_all_between_nm_wc, y_column_nc[i_c])
1951+
x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, i_s_nc)
1952+
x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, x_end_biggest_column)
1953+
else:
19601954
y_all_between_nm_wc = np.append(y_all_between_nm_wc, y_column_nc[i_c])
19611955
x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, x_starting_all_between_nm_wc[biggest])
19621956
x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, x_ending_all_between_nm_wc[biggest])
1963-
except:
1964-
logger.exception("cannot append")
19651957

1966-
y_all_between_nm_wc = np.append(y_all_between_nm_wc, [y_column_nc[i_c]] * len(columns_not_covered))
1967-
x_starting_all_between_nm_wc = np.append(x_starting_all_between_nm_wc, np.array(columns_not_covered, int))
1968-
x_ending_all_between_nm_wc = np.append(x_ending_all_between_nm_wc, np.array(columns_not_covered, int) + 1)
1958+
if len(columns_not_covered):
1959+
y_all_between_nm_wc = np.append(
1960+
y_all_between_nm_wc, [y_column_nc[i_c]] * len(columns_not_covered))
1961+
x_starting_all_between_nm_wc = np.append(
1962+
x_starting_all_between_nm_wc, np.array(columns_not_covered, int))
1963+
x_ending_all_between_nm_wc = np.append(
1964+
x_ending_all_between_nm_wc, np.array(columns_not_covered, int) + 1)
19691965

19701966
ind_args_between=np.arange(len(x_ending_all_between_nm_wc))
19711967
for column in range(int(i_s_nc), int(x_end_biggest_column)):

0 commit comments

Comments
 (0)