Skip to content

Commit 2ddf9aa

Browse files
committed
Properly read XPOS in the CoNLLUReader
1 parent 3996bac commit 2ddf9aa

File tree

2 files changed

+20
-7
lines changed

2 files changed

+20
-7
lines changed

itest/src/edu/stanford/nlp/pipeline/CoNLLUReaderITest.java

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,15 @@ public class CoNLLUReaderITest {
4545
{"de", "allí", "el", "rebaja", "."},
4646
};
4747

48-
static final String[][] EXPECTED_CPOS = {
48+
static final String[][] EXPECTED_UPOS = {
4949
{"CCONJ", "DET", "NOUN", "ADP", "NUM", "ADV", "ADJ", "ADP", "DET", "DET", "NOUN", "ADV", "AUX", "VERB", "PRON", "ADP", "DET", "NOUN", "ADP", "NOUN", "PUNCT"},
5050
{"ADP", "ADV", "DET", "NOUN", "PUNCT"},
5151
};
52+
static final String[][] EXPECTED_XPOS = {
53+
{"cc", "da0fs0", "ncfs000", "sps00", "dn0cp0", "rg", "aq0mpp", "sps00", "da0fs0", "di0fs0", "ncfs000", "rg", "vmii3s0", "vmn0000", null, "sps00", "di0ms0", "ncms000", "sps00", "ncfs000", "fp"},
54+
{"sps00", "rg", "da0fp0", "ncfp000", "fp"},
55+
};
56+
5257

5358
static final String[][] EXPECTED_FEATS = {
5459
{
@@ -133,15 +138,17 @@ public void testReadingInCoNLLUFile() throws ClassNotFoundException, IOException
133138
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
134139
assertEquals(EXPECTED_WORD_TEXT[i].length, tokens.size());
135140
assertEquals(EXPECTED_LEMMA_TEXT[i].length, tokens.size());
136-
assertEquals(EXPECTED_CPOS[i].length, tokens.size());
141+
assertEquals(EXPECTED_UPOS[i].length, tokens.size());
142+
assertEquals(EXPECTED_XPOS[i].length, tokens.size());
137143
for (int j = 0; j < tokens.size(); ++j) {
138144
CoreLabel token = tokens.get(j);
139145
assertEquals(EXPECTED_WORD_TEXT[i][j], token.value());
140146
assertEquals(EXPECTED_WORD_TEXT[i][j], token.word());
141147
assertEquals(EXPECTED_WORD_TEXT[i][j], token.get(CoreAnnotations.OriginalTextAnnotation.class));
142148

143149
assertEquals(EXPECTED_LEMMA_TEXT[i][j], token.lemma());
144-
assertEquals(EXPECTED_CPOS[i][j], token.tag());
150+
assertEquals(EXPECTED_UPOS[i][j], token.get(CoreAnnotations.CoarseTagAnnotation.class));
151+
assertEquals(EXPECTED_XPOS[i][j], token.tag());
145152

146153
assertEquals(Integer.valueOf(i), token.get(CoreAnnotations.SentenceIndexAnnotation.class));
147154
assertEquals(Integer.valueOf(j+1), token.get(CoreAnnotations.IndexAnnotation.class));
@@ -239,6 +246,11 @@ public void testReadingInCoNLLUFile() throws ClassNotFoundException, IOException
239246
assertEquals(expected, feats);
240247
}
241248

249+
// Some of the AnCora sentences don't have XPOS
250+
if (token.containsKey(CoreAnnotations.PartOfSpeechAnnotation.class)) {
251+
expectedKeys += 1;
252+
}
253+
242254
// the MWT token specifically gets one more field, the MWT text
243255
if (i == 0 && (j == 13 || j == 14)) {
244256
expectedKeys += 1;
@@ -252,6 +264,7 @@ public void testReadingInCoNLLUFile() throws ClassNotFoundException, IOException
252264
// CoreAnnotations.IsNewlineAnnotation
253265
// CoreAnnotations.LemmaAnnotation
254266
// CoreAnnotations.PartOfSpeechAnnotation
267+
// CoreAnnotations.CoarseTagAnnotation
255268
// CoreAnnotations.IndexAnnotation
256269
// CoreAnnotations.AfterAnnotation
257270
// CoreAnnotations.BeforeAnnotation

src/edu/stanford/nlp/pipeline/CoNLLUReader.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -316,11 +316,11 @@ public CoreMap convertCoNLLUSentenceToCoreMap(CoNLLUDocument doc, CoNLLUSentence
316316
cl.setLemma(fields.get(CoNLLU_LemmaField));
317317

318318
if (!fields.get(CoNLLU_UPOSField).equals("_"))
319-
cl.setTag(fields.get(CoNLLU_UPOSField));
319+
cl.set(CoreAnnotations.CoarseTagAnnotation.class, fields.get(CoNLLU_UPOSField));
320320

321-
//final String xpos = fields.get(CoNLLU_XPOSField);
322-
//if (!xpos.equals("_"))
323-
// cl.setTag(xpos);
321+
final String xpos = fields.get(CoNLLU_XPOSField);
322+
if (!xpos.equals("_"))
323+
cl.setTag(xpos);
324324

325325
if (!fields.get(CoNLLU_FeaturesField).equals("_")) {
326326
CoNLLUFeatures features = new CoNLLUFeatures(fields.get(CoNLLU_FeaturesField));

0 commit comments

Comments
 (0)