@@ -45,10 +45,15 @@ public class CoNLLUReaderITest {
45
45
{"de" , "allí" , "el" , "rebaja" , "." },
46
46
};
47
47
48
- static final String [][] EXPECTED_CPOS = {
48
+ static final String [][] EXPECTED_UPOS = {
49
49
{"CCONJ" , "DET" , "NOUN" , "ADP" , "NUM" , "ADV" , "ADJ" , "ADP" , "DET" , "DET" , "NOUN" , "ADV" , "AUX" , "VERB" , "PRON" , "ADP" , "DET" , "NOUN" , "ADP" , "NOUN" , "PUNCT" },
50
50
{"ADP" , "ADV" , "DET" , "NOUN" , "PUNCT" },
51
51
};
52
+ static final String [][] EXPECTED_XPOS = {
53
+ {"cc" , "da0fs0" , "ncfs000" , "sps00" , "dn0cp0" , "rg" , "aq0mpp" , "sps00" , "da0fs0" , "di0fs0" , "ncfs000" , "rg" , "vmii3s0" , "vmn0000" , null , "sps00" , "di0ms0" , "ncms000" , "sps00" , "ncfs000" , "fp" },
54
+ {"sps00" , "rg" , "da0fp0" , "ncfp000" , "fp" },
55
+ };
56
+
52
57
53
58
static final String [][] EXPECTED_FEATS = {
54
59
{
@@ -133,15 +138,17 @@ public void testReadingInCoNLLUFile() throws ClassNotFoundException, IOException
133
138
List <CoreLabel > tokens = sentence .get (CoreAnnotations .TokensAnnotation .class );
134
139
assertEquals (EXPECTED_WORD_TEXT [i ].length , tokens .size ());
135
140
assertEquals (EXPECTED_LEMMA_TEXT [i ].length , tokens .size ());
136
- assertEquals (EXPECTED_CPOS [i ].length , tokens .size ());
141
+ assertEquals (EXPECTED_UPOS [i ].length , tokens .size ());
142
+ assertEquals (EXPECTED_XPOS [i ].length , tokens .size ());
137
143
for (int j = 0 ; j < tokens .size (); ++j ) {
138
144
CoreLabel token = tokens .get (j );
139
145
assertEquals (EXPECTED_WORD_TEXT [i ][j ], token .value ());
140
146
assertEquals (EXPECTED_WORD_TEXT [i ][j ], token .word ());
141
147
assertEquals (EXPECTED_WORD_TEXT [i ][j ], token .get (CoreAnnotations .OriginalTextAnnotation .class ));
142
148
143
149
assertEquals (EXPECTED_LEMMA_TEXT [i ][j ], token .lemma ());
144
- assertEquals (EXPECTED_CPOS [i ][j ], token .tag ());
150
+ assertEquals (EXPECTED_UPOS [i ][j ], token .get (CoreAnnotations .CoarseTagAnnotation .class ));
151
+ assertEquals (EXPECTED_XPOS [i ][j ], token .tag ());
145
152
146
153
assertEquals (Integer .valueOf (i ), token .get (CoreAnnotations .SentenceIndexAnnotation .class ));
147
154
assertEquals (Integer .valueOf (j +1 ), token .get (CoreAnnotations .IndexAnnotation .class ));
@@ -239,6 +246,11 @@ public void testReadingInCoNLLUFile() throws ClassNotFoundException, IOException
239
246
assertEquals (expected , feats );
240
247
}
241
248
249
+ // Some of the AnCora sentences don't have XPOS
250
+ if (token .containsKey (CoreAnnotations .PartOfSpeechAnnotation .class )) {
251
+ expectedKeys += 1 ;
252
+ }
253
+
242
254
// the MWT token specifically gets one more field, the MWT text
243
255
if (i == 0 && (j == 13 || j == 14 )) {
244
256
expectedKeys += 1 ;
@@ -252,6 +264,7 @@ public void testReadingInCoNLLUFile() throws ClassNotFoundException, IOException
252
264
// CoreAnnotations.IsNewlineAnnotation
253
265
// CoreAnnotations.LemmaAnnotation
254
266
// CoreAnnotations.PartOfSpeechAnnotation
267
+ // CoreAnnotations.CoarseTagAnnotation
255
268
// CoreAnnotations.IndexAnnotation
256
269
// CoreAnnotations.AfterAnnotation
257
270
// CoreAnnotations.BeforeAnnotation
0 commit comments