Skip to content

Commit ab18251

Browse files
committed
updated code, testing things out...
1 parent 94c8df8 commit ab18251

File tree

7 files changed

+476
-229
lines changed

7 files changed

+476
-229
lines changed

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ pillow==9.5.0
88
#git+https://github.com/DIAGNijmegen/pathology-whole-slide-data@main ##inserted this in the shell script
99
jupyter
1010
pandas
11+
scikit-learn
1112
seaborn
1213
shapely==1.8.1 #downgrade to try to fix the error on xml creation (see https://grand-challenge.org/forums/forum/machine-learning-for-optimal-detection-of-inflammatory-cells-in-736/topic/typeerror-__new__-got-an-unexpected-keyword-argument-index-in-point-initialization-2821/)
1314
# development dependencies

source/1_data_prep.ipynb

Lines changed: 99 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
},
6262
{
6363
"cell_type": "code",
64-
"execution_count": null,
64+
"execution_count": 4,
6565
"id": "e47b3da1-7cfb-4b64-a44f-dda0c7924b7f",
6666
"metadata": {},
6767
"outputs": [],
@@ -92,15 +92,21 @@
9292
"\n",
9393
"xml_path = r'./data/monkey-data/annotations/xml/A_P000001.xml'\n",
9494
"output_path = r'./data/monkey-data/annotations_polygon/A_P000001_polygon.xml'\n",
95+
"\n",
96+
"#TODO: check those values because in the eval they changed the numbers!\n",
9597
"lymphocyte_half_box_size = 4.5 # the size of half of the bbox around the lymphocyte dot in um\n",
96-
"monocytes_half_box_size = 11.0 # the size of half of the bbox around the monocytes dot in um\n",
97-
"min_spacing = 0.25\n",
98+
"#NOTE: reduced this to 5.0 as the eval script (it was 11.0)\n",
99+
"monocytes_half_box_size = 5.0 # the size of half of the bbox around the monocytes dot in um\n",
100+
"min_spacing = 0.24199951445730394 # NOTE: changed this!\n",
101+
"#spacing is the zoom level of the image, in micro-meters per pixel (was rounded to 0.25)\n",
102+
"\n",
103+
"# convert the dot annotations (in mm) to polygon bounding boxes\n",
98104
"dot2polygon(xml_path, lymphocyte_half_box_size, monocytes_half_box_size, min_spacing, output_path)"
99105
]
100106
},
101107
{
102108
"cell_type": "code",
103-
"execution_count": 9,
109+
"execution_count": 7,
104110
"id": "4ab2cd84-a475-4433-94b6-c1d259e1af07",
105111
"metadata": {},
106112
"outputs": [
@@ -286,9 +292,9 @@
286292
" print(xml_path)\n",
287293
" output_path = os.path.join(annotation_polygon_dir,os.path.splitext(os.path.basename(xml_path))[0] + '_polygon' + os.path.splitext(os.path.basename(xml_path))[1])\n",
288294
" print(output_path)\n",
289-
" lymphocyte_half_box_size = 4.5 # the size of half of the bbox around the lymphocyte dot in um\n",
290-
" monocytes_half_box_size = 11.0 # the size of half of the bbox around the monocytes dot in um\n",
291-
" min_spacing = 0.25\n",
295+
" # lymphocyte_half_box_size = 4.5 # the size of half of the bbox around the lymphocyte dot in um\n",
296+
" # monocytes_half_box_size = 11.0 # the size of half of the bbox around the monocytes dot in um\n",
297+
" # min_spacing = 0.25\n",
292298
" dot2polygon(xml_path, lymphocyte_half_box_size, monocytes_half_box_size, min_spacing, output_path)"
293299
]
294300
},
@@ -302,7 +308,7 @@
302308
},
303309
{
304310
"cell_type": "code",
305-
"execution_count": 10,
311+
"execution_count": 8,
306312
"id": "a8fc5aaf-c068-4b8f-bf37-91a609a234d5",
307313
"metadata": {},
308314
"outputs": [],
@@ -347,23 +353,103 @@
347353
},
348354
{
349355
"cell_type": "code",
350-
"execution_count": 12,
356+
"execution_count": 10,
351357
"id": "45e85653-a4b7-4977-a143-b15bad959828",
352358
"metadata": {},
353359
"outputs": [
354360
{
355361
"name": "stdout",
356362
"output_type": "stream",
357363
"text": [
358-
"match found: A_P000001\n"
364+
"match found: B_P000004\n",
365+
"match found: D_P000011\n",
366+
"match found: B_P000005\n",
367+
"match found: A_P000036\n",
368+
"match found: A_P000001\n",
369+
"match found: D_P000004\n",
370+
"match found: A_P000011\n",
371+
"match found: C_P000031\n",
372+
"match found: A_P000029\n",
373+
"match found: D_P000010\n",
374+
"match found: C_P000033\n",
375+
"match found: A_P000035\n",
376+
"match found: D_P000015\n",
377+
"match found: C_P000036\n",
378+
"match found: A_P000006\n",
379+
"match found: A_P000018\n",
380+
"match found: B_P000003\n",
381+
"match found: B_P000016\n",
382+
"match found: A_P000034\n",
383+
"match found: C_P000037\n",
384+
"match found: D_P000002\n",
385+
"match found: B_P000007\n",
386+
"match found: B_P000015\n",
387+
"match found: D_P000001\n",
388+
"match found: D_P000009\n",
389+
"match found: A_P000004\n",
390+
"match found: C_P000023\n",
391+
"match found: A_P000003\n",
392+
"match found: A_P000005\n",
393+
"match found: A_P000014\n",
394+
"match found: C_P000035\n",
395+
"match found: C_P000024\n",
396+
"match found: D_P000016\n",
397+
"match found: C_P000038\n",
398+
"match found: B_P000020\n",
399+
"match found: D_P000007\n",
400+
"match found: C_P000039\n",
401+
"match found: B_P000013\n",
402+
"match found: B_P000014\n",
403+
"match found: A_P000038\n",
404+
"match found: B_P000019\n",
405+
"match found: D_P000005\n",
406+
"match found: C_P000025\n",
407+
"match found: D_P000019\n",
408+
"match found: C_P000027\n",
409+
"match found: C_P000040\n",
410+
"match found: C_P000021\n",
411+
"match found: D_P000012\n",
412+
"match found: C_P000026\n",
413+
"match found: A_P000022\n",
414+
"match found: C_P000028\n",
415+
"match found: D_P000006\n",
416+
"match found: D_P000018\n",
417+
"match found: A_P000033\n",
418+
"match found: A_P000021\n",
419+
"match found: A_P000020\n",
420+
"match found: B_P000012\n",
421+
"match found: B_P000001\n",
422+
"match found: A_P000024\n",
423+
"match found: B_P000002\n",
424+
"match found: A_P000007\n",
425+
"match found: A_P000030\n",
426+
"match found: A_P000032\n",
427+
"match found: B_P000009\n",
428+
"match found: A_P000031\n",
429+
"match found: A_P000017\n",
430+
"match found: A_P000037\n",
431+
"match found: C_P000022\n",
432+
"match found: A_P000002\n",
433+
"match found: D_P000014\n",
434+
"match found: B_P000017\n",
435+
"match found: C_P000029\n",
436+
"match found: D_P000013\n",
437+
"match found: C_P000032\n",
438+
"match found: A_P000016\n",
439+
"match found: B_P000010\n",
440+
"match found: D_P000003\n",
441+
"match found: B_P000011\n",
442+
"match found: D_P000017\n",
443+
"match found: C_P000034\n",
444+
"match found: C_P000030\n"
359445
]
360446
}
361447
],
362448
"source": [
363-
"wsi_dir = r'./data_debug/images/pas-cpg'\n",
364-
"wsa_dir = r'./data_debug/annotations_polygon/*_polygon.xml'\n",
449+
"wsi_dir = r'./data/monkey-data/images/pas-cpg' #r'./data_debug/images/pas-cpg'\n",
450+
"wsa_dir = r'./data/monkey-data/annotations_polygon/*_polygon.xml'\n",
365451
"output_dir = r'./source/configs'\n",
366-
"output_name = 'training_sample.yml'\n",
452+
"output_name = 'training_full.yml'\n",
367453
"\n",
368454
"folders_to_yml(wsi_dir, \n",
369455
" wsa_dir,\n",

source/2_training.ipynb

Lines changed: 233 additions & 184 deletions
Large diffs are not rendered by default.

source/3_inference.ipynb

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@
155155
" mask_path=mask_path,\n",
156156
" patch_configuration=patch_configuration,\n",
157157
" cpus=4,\n",
158-
" backend='asap')"
158+
" backend='openslide') #was backend='asap'"
159159
]
160160
},
161161
{
@@ -223,7 +223,8 @@
223223
" annotations = []\n",
224224
" counter = 0\n",
225225
" \n",
226-
" spacing_min = 0.25\n",
226+
" #NOTE / TODO: i used a different spacing for the image (0.24199951445730394), so we need to be shure how this works...\n",
227+
" spacing_min = 0.25 #was used in the original code to edit the annotations to bounding boxes\n",
227228
" ratio = spacing/spacing_min\n",
228229
" with WholeSlideImage(image_path) as wsi:\n",
229230
" spacing = wsi.get_real_spacing(spacing_min)\n",
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
{
2+
'wholeslidedata': {
3+
'default': {
4+
'yaml_source': "./configs/training_full.yml",
5+
"seed": 42,
6+
"image_backend": "openslide", #was asap
7+
'labels': {
8+
"ROI": 0,
9+
"lymphocytes": 1,
10+
"monocytes": 2
11+
},
12+
13+
'batch_shape': {
14+
'batch_size': 10,
15+
'spacing': 0.5,
16+
'shape': [128, 128, 3],
17+
'y_shape': [1000, 6],
18+
},
19+
20+
"annotation_parser": {
21+
"sample_label_names": ['roi'],
22+
},
23+
24+
'point_sampler_name': "RandomPointSampler",
25+
'point_sampler': {
26+
"buffer": {'spacing': "${batch_shape.spacing}", 'value': -64},
27+
},
28+
29+
'patch_label_sampler_name': 'DetectionPatchLabelSampler',
30+
'patch_label_sampler': {
31+
"max_number_objects": 1000,
32+
"detection_labels": ['lymphocytes','monocytes'],
33+
34+
},
35+
36+
}
37+
}
38+
}
File renamed without changes.

source/utils/dot2polygon.py

Lines changed: 102 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,132 @@
11
import xml.etree.ElementTree as ET
22

3-
43
def dot2polygon(xml_path, lymphocyte_half_box_size, monocytes_half_box_size, min_spacing, output_path):
54
'''
6-
:param xml_path (str): the path of the annotation file, ex. root\sub_root\filename.xml
7-
:param lymphocyte_half_box_size (folat): the size of half of the bbox around the lymphocyte dot in um, 4.5 for lymphocyte
8-
:param monocytes_half_box_size (folat): the size of half of the bbox around the monocytes dot in um, 11.0 for monocytes
9-
:param min_spacing (float): the minimum spacing of the wsi corresponding to the annotations
10-
:param output_path (str): the output path
11-
:return:
5+
Convert dot annotations (single points) into polygon annotations (squares) for lymphocytes and monocytes.
6+
7+
Parameters
8+
----------
9+
xml_path : str
10+
The path of the annotation file, for example: "root/sub_root/filename.xml"
11+
12+
lymphocyte_half_box_size : float
13+
The half-size of the square bounding box around a lymphocyte dot in micrometers (µm).
14+
For instance, 4.5 µm is commonly used for lymphocytes.
15+
16+
monocytes_half_box_size : float
17+
The half-size of the square bounding box around a monocyte dot in micrometers (µm).
18+
For example, 11.0 µm is often used for monocytes.
19+
20+
min_spacing : float
21+
The micrometer-to-pixel ratio (µm/px).
22+
This value is used to convert the specified bounding box sizes (in µm) to pixels.
23+
For example, if min_spacing = 0.5, it means 1 pixel = 0.5 µm, so a 4.5 µm half-box
24+
translates to 4.5 / 0.5 = 9 pixels.
25+
26+
output_path : str
27+
The file path where the updated XML with polygon annotations should be saved.
28+
29+
Returns
30+
-------
31+
None
32+
The function modifies the annotation XML and writes it to output_path.
1233
'''
1334

14-
15-
# parsing the annotation
35+
# Parse the annotation XML
1636
tree = ET.parse(xml_path)
1737
root = tree.getroot()
1838

39+
# Convert half-box sizes from micrometers (µm) to pixels.
40+
# Dividing by min_spacing (µm/px) gives the size in pixels.
1941
lymphocyte_half_box_size = lymphocyte_half_box_size / min_spacing
20-
monocytes_half_box_size = monocytes_half_box_size/min_spacing
42+
monocytes_half_box_size = monocytes_half_box_size / min_spacing
2143

22-
# iterating through the dot annotation.
44+
# Iterate over each Annotation element in the XML.
2345
for A in root.iter('Annotation'):
24-
25-
#Lymphocytes:
26-
if (A.get('PartOfGroup')=="lymphocytes") & (A.get('Type')=="Dot"):
27-
# change the type to Polygon
46+
47+
# For Lymphocytes:
48+
# Check if the current annotation is a Dot representing a lymphocyte.
49+
if (A.get('PartOfGroup') == "lymphocytes") and (A.get('Type') == "Dot"):
50+
# Change annotation type from Dot to Polygon.
2851
A.attrib['Type'] = "Polygon"
2952

53+
# 'child' generally corresponds to a Coordinates element.
3054
for child in A:
55+
# Each sub_child is a coordinate element (X, Y).
3156
for sub_child in child:
57+
# Extract original pixel positions of the dot
3258
x_value = sub_child.attrib['X']
3359
y_value = sub_child.attrib['Y']
34-
sub_child.attrib['X'] = str(float(sub_child.attrib['X'])-lymphocyte_half_box_size)
35-
sub_child.attrib['Y'] = str(float(sub_child.attrib['Y'])-lymphocyte_half_box_size)
36-
child.append(ET.Element(sub_child.tag, Order = '1', X=str(float(x_value)-lymphocyte_half_box_size), Y=str(float(y_value)+lymphocyte_half_box_size)))
37-
child.append(ET.Element(sub_child.tag, Order='2', X=str(float(x_value)+lymphocyte_half_box_size), Y=str(float(y_value)+lymphocyte_half_box_size)))
38-
child.append(ET.Element(sub_child.tag, Order='3', X=str(float(x_value)+lymphocyte_half_box_size), Y=str(float(y_value)-lymphocyte_half_box_size) ))
3960

61+
# Move the original dot coordinate up-left by half_box_size
62+
# This sets the top-left corner of the polygon.
63+
sub_child.attrib['X'] = str(float(x_value) - lymphocyte_half_box_size)
64+
sub_child.attrib['Y'] = str(float(y_value) - lymphocyte_half_box_size)
4065

41-
# Monoocytes:
42-
if (A.get('PartOfGroup')=="monocytes") & (A.get('Type')=="Dot"):
43-
# change the type to Polygon
66+
# After adjusting the original point, append three more points
67+
# to form a square polygon around the original dot location.
68+
#
69+
# The polygon is defined as a square centered on the original dot:
70+
# Top-left: (X - half_box, Y - half_box) <- already set above
71+
# Bottom-left: (X - half_box, Y + half_box)
72+
# Bottom-right: (X + half_box, Y + half_box)
73+
# Top-right: (X + half_box, Y - half_box)
74+
#
75+
# Note: We already modified the first point above. Now we add the other three.
76+
77+
child.append(ET.Element(
78+
sub_child.tag,
79+
Order='1',
80+
X=str(float(x_value) - lymphocyte_half_box_size),
81+
Y=str(float(y_value) + lymphocyte_half_box_size)
82+
))
83+
child.append(ET.Element(
84+
sub_child.tag,
85+
Order='2',
86+
X=str(float(x_value) + lymphocyte_half_box_size),
87+
Y=str(float(y_value) + lymphocyte_half_box_size)
88+
))
89+
child.append(ET.Element(
90+
sub_child.tag,
91+
Order='3',
92+
X=str(float(x_value) + lymphocyte_half_box_size),
93+
Y=str(float(y_value) - lymphocyte_half_box_size)
94+
))
95+
96+
# For Monocytes:
97+
# Similar process as above, but using monocytes_half_box_size.
98+
if (A.get('PartOfGroup') == "monocytes") and (A.get('Type') == "Dot"):
99+
# Change annotation type from Dot to Polygon.
44100
A.attrib['Type'] = "Polygon"
45101

46102
for child in A:
47103
for sub_child in child:
48104
x_value = sub_child.attrib['X']
49105
y_value = sub_child.attrib['Y']
50-
sub_child.attrib['X'] = str(float(sub_child.attrib['X'])-monocytes_half_box_size)
51-
sub_child.attrib['Y'] = str(float(sub_child.attrib['Y'])-monocytes_half_box_size)
52-
child.append(ET.Element(sub_child.tag, Order = '1', X=str(float(x_value)-monocytes_half_box_size), Y=str(float(y_value)+monocytes_half_box_size)))
53-
child.append(ET.Element(sub_child.tag, Order='2', X=str(float(x_value)+monocytes_half_box_size), Y=str(float(y_value)+monocytes_half_box_size)))
54-
child.append(ET.Element(sub_child.tag, Order='3', X=str(float(x_value)+monocytes_half_box_size), Y=str(float(y_value)-monocytes_half_box_size) ))
55106

107+
# Adjust the original coordinate to set the top-left corner.
108+
sub_child.attrib['X'] = str(float(x_value) - monocytes_half_box_size)
109+
sub_child.attrib['Y'] = str(float(y_value) - monocytes_half_box_size)
56110

111+
# Append the other three corners of the square.
112+
child.append(ET.Element(
113+
sub_child.tag,
114+
Order='1',
115+
X=str(float(x_value) - monocytes_half_box_size),
116+
Y=str(float(y_value) + monocytes_half_box_size)
117+
))
118+
child.append(ET.Element(
119+
sub_child.tag,
120+
Order='2',
121+
X=str(float(x_value) + monocytes_half_box_size),
122+
Y=str(float(y_value) + monocytes_half_box_size)
123+
))
124+
child.append(ET.Element(
125+
sub_child.tag,
126+
Order='3',
127+
X=str(float(x_value) + monocytes_half_box_size),
128+
Y=str(float(y_value) - monocytes_half_box_size)
129+
))
57130

58-
# writing the new annotation file
131+
# Write the modified tree (with polygons replacing dots) to the output file.
59132
tree.write(output_path)
60-

0 commit comments

Comments
 (0)