Skip to content

Commit 9381c2d

Browse files
bump, tests adjusted
1 parent 8d47d58 commit 9381c2d

File tree

4 files changed

+102
-44
lines changed

4 files changed

+102
-44
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ In July 2023, we started populating a [Github repository](https://github.com/dan
122122

123123
## Version History
124124

125+
* 2025-06-13 v1.4.0: Changed the way how percentages for matching are calculated, now using only matchable code vs. all code as baseline. Minor IDA plugin fixes.
125126
* 2025-05-22 v1.3.22: McritCLI now supports ENV variables (`MCRIT_CLI_SERVER` and `MCRIT_CLI_APITOKEN`) and a `.env` file for setting server and apitoken - THX to @r0ny123 for the suggestion!
126127
* 2025-03-11 v1.3.21: McritCLI now supports submissions with a a spawned worker (requires --worker flag).
127128
* 2025-02-26 v1.3.20: Fixed a bug where crashing SpawningWorker would not be properly handled - THX to @yankovs!.

mcrit/config/McritConfig.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
class McritConfig(object):
1111

1212
# NOTE to self: always change this in setup.py as well!
13-
VERSION = "1.3.22"
13+
VERSION = "1.4.0"
1414
# basic pathing info
1515
CONFIG_FILE_PATH = str(os.path.abspath(__file__))
1616
PROJECT_ROOT = str(os.path.abspath(os.sep.join([CONFIG_FILE_PATH, "..", ".."])))

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
setup(
99
name='mcrit',
10-
version="1.3.22",
10+
version="1.4.0",
1111
description='MCRIT is a framework created for simplified application of the MinHash algorithm to code similarity.',
1212
long_description_content_type="text/markdown",
1313
long_description=README,

tests/testMatcher.py

Lines changed: 99 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -67,50 +67,78 @@ def __init__(self, *args, **kwargs):
6767
(2, 3, 22, 84.375, IS_MINHASH_FLAG),
6868
],
6969
"num_bytes": 354.0,
70+
"num_instructions": 120,
7071
"offset": 0,
7172
},
73+
{
74+
"num_bytes": 35.0,
75+
"num_instructions": 11,
76+
"offset": 2220,
77+
"matches": [],
78+
"fid": 10
79+
},
7280
{
7381
"fid": 11,
7482
"matches": [
7583
(0, 2, 20, 100.0, IS_MINHASH_FLAG + IS_PICHASH_FLAG + IS_LIBRARY_FLAG),
7684
(1, 0, 1, 92.1875, IS_MINHASH_FLAG),
7785
],
7886
"num_bytes": 638.0,
87+
"num_instructions": 207,
7988
"offset": 364,
8089
},
90+
{
91+
"fid": 12,
92+
"matches": [
93+
(0, 2, 21, 100.0, IS_MINHASH_FLAG + IS_PICHASH_FLAG + IS_LIBRARY_FLAG),
94+
(1, 0, 5, 100.0, IS_MINHASH_FLAG + IS_PICHASH_FLAG)
95+
],
96+
"num_bytes": 166.0,
97+
"num_instructions": 64,
98+
"offset": 1004,
99+
},
81100
{
82101
"fid": 13,
83102
"matches": [(1, 0, 3, 67.1875, IS_MINHASH_FLAG)],
84103
"num_bytes": 1047.0,
104+
"num_instructions": 365,
85105
"offset": 1172,
86106
},
107+
{
108+
"num_bytes": 35.0,
109+
"num_instructions": 11,
110+
"offset": 2220,
111+
"matches": [],
112+
"fid": 14
113+
},
114+
{
115+
"num_bytes": 524.0,
116+
"num_instructions": 159,
117+
"offset": 2256,
118+
"matches": [],
119+
"fid": 15
120+
},
87121
{
88122
"fid": 16,
89123
"matches": [(1, 0, 5, 84.375, IS_MINHASH_FLAG)],
90124
"num_bytes": 915.0,
125+
"num_instructions": 287,
91126
"offset": 2780,
92127
},
93128
{
94129
"fid": 17,
95130
"matches": [(1, 0, 6, 98.4375, IS_MINHASH_FLAG)],
96131
"num_bytes": 727.0,
132+
"num_instructions": 226,
97133
"offset": 3696,
98134
},
99135
{
100136
"fid": 18,
101137
"matches": [(1, 0, 7, 67.1875, IS_MINHASH_FLAG)],
102138
"num_bytes": 1850.0,
139+
"num_instructions": 543,
103140
"offset": 4424,
104141
},
105-
{
106-
"fid": 12,
107-
"matches": [
108-
(0, 2, 21, 100.0, IS_MINHASH_FLAG + IS_PICHASH_FLAG + IS_LIBRARY_FLAG),
109-
(1, 0, 5, 100.0, IS_MINHASH_FLAG + IS_PICHASH_FLAG),
110-
],
111-
"num_bytes": 166.0,
112-
"offset": 1004,
113-
},
114142
]
115143

116144
function_matches_expected_vs = [
@@ -120,48 +148,76 @@ def __init__(self, *args, **kwargs):
120148
(1, 0, 0, 84.375, IS_MINHASH_FLAG),
121149
],
122150
"num_bytes": 354.0,
151+
"num_instructions": 120,
123152
"offset": 0,
124153
},
154+
{
155+
"num_bytes": 35.0,
156+
"num_instructions": 11,
157+
"offset": 2220,
158+
"matches": [],
159+
"fid": 10
160+
},
125161
{
126162
"fid": 11,
127163
"matches": [
128164
(1, 0, 1, 92.1875, IS_MINHASH_FLAG),
129165
],
130166
"num_bytes": 638.0,
167+
"num_instructions": 207,
131168
"offset": 364,
132169
},
170+
{
171+
"fid": 12,
172+
"matches": [
173+
(1, 0, 5, 100.0, IS_MINHASH_FLAG + IS_PICHASH_FLAG),
174+
],
175+
"num_bytes": 166.0,
176+
"num_instructions": 64,
177+
"offset": 1004,
178+
},
133179
{
134180
"fid": 13,
135181
"matches": [(1, 0, 3, 67.1875, IS_MINHASH_FLAG)],
136182
"num_bytes": 1047.0,
183+
"num_instructions": 365,
137184
"offset": 1172,
138185
},
186+
{
187+
"num_bytes": 35.0,
188+
"num_instructions": 11,
189+
"offset": 2220,
190+
"matches": [],
191+
"fid": 14
192+
},
193+
{
194+
"num_bytes": 524.0,
195+
"num_instructions": 159,
196+
"offset": 2256,
197+
"matches": [],
198+
"fid": 15
199+
},
139200
{
140201
"fid": 16,
141202
"matches": [(1, 0, 5, 84.375, IS_MINHASH_FLAG)],
142203
"num_bytes": 915.0,
204+
"num_instructions": 287,
143205
"offset": 2780,
144206
},
145207
{
146208
"fid": 17,
147209
"matches": [(1, 0, 6, 98.4375, IS_MINHASH_FLAG)],
148210
"num_bytes": 727.0,
211+
"num_instructions": 226,
149212
"offset": 3696,
150213
},
151214
{
152215
"fid": 18,
153216
"matches": [(1, 0, 7, 67.1875, IS_MINHASH_FLAG)],
154217
"num_bytes": 1850.0,
218+
"num_instructions": 543,
155219
"offset": 4424,
156220
},
157-
{
158-
"fid": 12,
159-
"matches": [
160-
(1, 0, 5, 100.0, IS_MINHASH_FLAG + IS_PICHASH_FLAG),
161-
],
162-
"num_bytes": 166.0,
163-
"offset": 1004,
164-
},
165221
]
166222

167223
minhash_aggregation_expected = {
@@ -229,15 +285,15 @@ def __init__(self, *args, **kwargs):
229285
"frequency_weighted": 4337.59375,
230286
"nonlib_unweighted": 4539.0,
231287
"nonlib_score_weighted": 3434.09375,
232-
"nonlib_frequency_weighted": 3434.09375,
288+
"nonlib_frequency_weighted": 3434.09375
233289
},
234290
"percent": {
235-
"unweighted": 90.977323538805493453,
236-
"score_weighted": 71.653425423187480038,
237-
"frequency_weighted": 69.268504471414883424,
238-
"nonlib_unweighted": 88.930250783699059561,
239-
"nonlib_score_weighted": 67.282401057993730408,
240-
"nonlib_frequency_weighted": 67.282401057993730408,
291+
"unweighted": 90.55793991416309,
292+
"score_weighted": 71.32312033063106,
293+
"frequency_weighted": 68.94919329200445,
294+
"nonlib_unweighted": 88.42781998831093,
295+
"nonlib_score_weighted": 66.90227449834404,
296+
"nonlib_frequency_weighted": 66.90227449834404,
241297
},
242298
},
243299
}
@@ -266,15 +322,15 @@ def __init__(self, *args, **kwargs):
266322
"frequency_weighted": 149.34375,
267323
"nonlib_unweighted": 0,
268324
"nonlib_score_weighted": 0,
269-
"nonlib_frequency_weighted": 0,
325+
"nonlib_frequency_weighted": 0
270326
},
271327
"percent": {
272-
"unweighted": 5.6531459597572660492,
273-
"score_weighted": 4.769841903545193229,
274-
"frequency_weighted": 2.3849209517725966145,
328+
"unweighted": 5.627086313781593,
329+
"score_weighted": 4.747854077253219,
330+
"frequency_weighted": 2.3739270386266096,
275331
"nonlib_unweighted": 0.0,
276332
"nonlib_score_weighted": 0.0,
277-
"nonlib_frequency_weighted": 0.0,
333+
"nonlib_frequency_weighted": 0.0
278334
},
279335
},
280336
}
@@ -303,16 +359,16 @@ def __init__(self, *args, **kwargs):
303359
"frequency_weighted": 4486.9375,
304360
"nonlib_unweighted": 5697.0,
305361
"nonlib_score_weighted": 4486.9375,
306-
"nonlib_frequency_weighted": 4486.9375,
362+
"nonlib_frequency_weighted": 4486.9375
307363
},
308364
"percent": {
309-
"unweighted": 90.977323538805493453,
310-
"score_weighted": 71.653425423187480038,
311-
"frequency_weighted": 71.653425423187480038,
312-
"nonlib_unweighted": 90.977323538805493453,
313-
"nonlib_score_weighted": 71.653425423187480038,
314-
"nonlib_frequency_weighted": 71.653425423187480038,
315-
},
365+
"unweighted": 90.55793991416309,
366+
"score_weighted": 71.32312033063106,
367+
"frequency_weighted": 71.32312033063106,
368+
"nonlib_unweighted": 90.55793991416309,
369+
"nonlib_score_weighted": 71.32312033063106,
370+
"nonlib_frequency_weighted": 71.32312033063106
371+
}
316372
},
317373
}
318374

@@ -343,12 +399,12 @@ def __init__(self, *args, **kwargs):
343399
"nonlib_frequency_weighted": 0,
344400
},
345401
"percent": {
346-
"unweighted": 18.49249441073139572,
347-
"score_weighted": 18.49249441073139572,
348-
"frequency_weighted": 15.665921430852762696,
402+
"unweighted": 18.407248450166904,
403+
"score_weighted": 18.407248450166904,
404+
"frequency_weighted": 15.593705293276109,
349405
"nonlib_unweighted": 0.0,
350406
"nonlib_score_weighted": 0.0,
351-
"nonlib_frequency_weighted": 0.0,
407+
"nonlib_frequency_weighted": 0.0
352408
},
353409
},
354410
}
@@ -490,15 +546,16 @@ def testMatcherQuery(self):
490546

491547
self.assertEqual(result["matches"]["aggregation"]["pichash"], self.pichash_aggregation_expected_query)
492548
self.assertEqual(result["matches"]["aggregation"]["minhash"], self.minhash_aggregation_expected_query)
549+
json.dumps(sorted(result["matches"]["functions"], key=lambda x: x["fid"]), indent=1)
493550
self.assertEqual(
494551
sorted(result["matches"]["functions"], key=lambda x: x["fid"]),
495552
sorted(function_matches_expected, key=lambda x: x["fid"]),
496553
)
497554
self.assertEqual(
498555
result["matches"]["samples"],
499556
[
500-
self.sample_summary_entry_2_expected,
501557
self.sample_summary_lib_entry_expected,
558+
self.sample_summary_entry_2_expected,
502559
self.sample_summary_entry_3_expected,
503560
],
504561
)

0 commit comments

Comments
 (0)