Skip to content

Commit c905429

Browse files
committed
Process features in the CoNLLUReader. Also need to process xpos, keep the misc, and possibly build an extra semantic graph
The Reader is now reading in POS features, but the Spanish pipeline doesn't featurize, so we need to update the test to ignore those features. (Better would be to test the features!)
1 parent 259c2bf commit c905429

File tree

2 files changed

+16
-0
lines changed

2 files changed

+16
-0
lines changed

itest/src/edu/stanford/nlp/pipeline/CoNLLUReaderITest.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ public void testReadingInCoNLLUFile() throws ClassNotFoundException, IOException
4242
for (CoreLabel token : goldDocument.get(CoreAnnotations.TokensAnnotation.class)) {
4343
token.remove(CoreAnnotations.ParentAnnotation.class);
4444
}
45+
for (CoreLabel token : readInDocument.get(CoreAnnotations.TokensAnnotation.class)) {
46+
token.remove(CoreAnnotations.CoNLLUFeats.class);
47+
}
4548
// compare gold vs. read in
4649
// compare document text
4750
assertEquals(goldDocument.get(CoreAnnotations.TextAnnotation.class),

src/edu/stanford/nlp/pipeline/CoNLLUReader.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import edu.stanford.nlp.ling.*;
55
import edu.stanford.nlp.semgraph.*;
66
import edu.stanford.nlp.trees.*;
7+
import edu.stanford.nlp.trees.ud.CoNLLUFeatures;
78
import edu.stanford.nlp.util.*;
89

910
import java.io.*;
@@ -29,6 +30,7 @@ public class CoNLLUReader {
2930
public static final int CoNLLU_LemmaField = 2;
3031
public static final int CoNLLU_UPOSField = 3;
3132
public static final int CoNLLU_XPOSField = 4;
33+
public static final int CoNLLU_FeaturesField = 5;
3234
public static final int CoNLLU_GovField = 6;
3335
public static final int CoNLLU_RelnField = 7;
3436
public static final int CoNLLU_MiscField = 9;
@@ -304,10 +306,21 @@ public CoreMap convertCoNLLUSentenceToCoreMap(CoNLLUDocument doc, CoNLLUSentence
304306
cl.setValue(fields.get(CoNLLU_WordField));
305307
cl.setOriginalText(fields.get(CoNLLU_WordField));
306308
cl.setIsNewline(false);
309+
307310
if (!fields.get(CoNLLU_LemmaField).equals("_"))
308311
cl.setLemma(fields.get(CoNLLU_LemmaField));
312+
309313
if (!fields.get(CoNLLU_UPOSField).equals("_"))
310314
cl.setTag(fields.get(CoNLLU_UPOSField));
315+
316+
//final String xpos = fields.get(CoNLLU_XPOSField);
317+
//if (!xpos.equals("_"))
318+
// cl.setTag(xpos);
319+
320+
if (!fields.get(CoNLLU_FeaturesField).equals("_")) {
321+
CoNLLUFeatures features = new CoNLLUFeatures(fields.get(CoNLLU_FeaturesField));
322+
cl.set(CoreAnnotations.CoNLLUFeats.class, features);
323+
}
311324
for (int extraColumnIdx = 10; extraColumnIdx < columnCount && extraColumnIdx < fields.size();
312325
extraColumnIdx++) {
313326
cl.set(extraColumns.get(extraColumnIdx), fields.get(extraColumnIdx));

0 commit comments

Comments
 (0)