|
| 1 | +package edu.stanford.nlp.semgraph.semgrex.ssurgeon; |
| 2 | + |
| 3 | +import java.util.*; |
| 4 | +import java.io.*; |
| 5 | + |
| 6 | +import edu.stanford.nlp.ling.IndexedWord; |
| 7 | +import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher; |
| 8 | +import edu.stanford.nlp.semgraph.SemanticGraph; |
| 9 | +import edu.stanford.nlp.semgraph.SemanticGraphEdge; |
| 10 | +import edu.stanford.nlp.trees.GrammaticalRelation; |
| 11 | + |
| 12 | +/** |
| 13 | + * Build a new phrase out of the matched words. |
| 14 | + * <br> |
| 15 | + * All of the words must currently be connected to themselves. Eg, there would be one head which points to a different word, and the other words all point to that head. |
| 16 | + * <br> |
| 17 | + * If that condition is matched, then existing internal edges are replaced with edges to the new head, with the given reln <br> |
| 18 | + * If the head is changed, the edge out of the phrase (if it is not root) is changed to come from the new head <br> |
| 19 | + * Edges in to the phrase are also changed to point to the new head. |
| 20 | + * The purpose of that change is so for a noun phrase, for example, modifiers of that noun phrase such as nmod or nmod:desc now modify the new head |
| 21 | + */ |
| 22 | +public class SetPhraseHead extends SsurgeonEdit { |
| 23 | + public static final String LABEL = "setPhraseHead"; |
| 24 | + |
| 25 | + final List<String> phrase; |
| 26 | + final int headIndex; |
| 27 | + final GrammaticalRelation relation; |
| 28 | + final double weight; |
| 29 | + |
| 30 | + public SetPhraseHead(List<String> nodes, Integer headIndex, GrammaticalRelation relation, double weight) { |
| 31 | + if (headIndex == null) { |
| 32 | + throw new SsurgeonParseException("SetPhraseHead expected a -headIndex, 0-indexed for the node to use as the new head"); |
| 33 | + } |
| 34 | + if (headIndex < 0 || headIndex >= nodes.size()) { |
| 35 | + throw new SsurgeonParseException("-headIndex of " + headIndex + " is out of bounds for a phrase with " + nodes.size() + " words"); |
| 36 | + } |
| 37 | + |
| 38 | + if (relation == null) { |
| 39 | + throw new SsurgeonParseException("SetPhraseHead expected a -reln to represent the dependency to use for the new phrase"); |
| 40 | + } |
| 41 | + |
| 42 | + this.phrase = new ArrayList<>(nodes); |
| 43 | + this.headIndex = headIndex; |
| 44 | + this.relation = relation; |
| 45 | + this.weight = weight; |
| 46 | + } |
| 47 | + |
| 48 | + @Override |
| 49 | + public String toEditString() { |
| 50 | + StringWriter buf = new StringWriter(); |
| 51 | + buf.write(LABEL); |
| 52 | + buf.write("\t"); |
| 53 | + for (String node : phrase) { |
| 54 | + buf.write("-node " + node + "\t"); |
| 55 | + } |
| 56 | + buf.write("-headIndex " + headIndex + "\t"); |
| 57 | + buf.write("-reln " + relation.toString()); |
| 58 | + return buf.toString(); |
| 59 | + } |
| 60 | + |
| 61 | + |
| 62 | + @Override |
| 63 | + public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) { |
| 64 | + List<IndexedWord> matchedNodes = new ArrayList<>(); |
| 65 | + IndexedWord newHead = null; |
| 66 | + int idx = 0; |
| 67 | + for (String word : phrase) { |
| 68 | + IndexedWord node = sm.getNode(word); |
| 69 | + if (node == null) { |
| 70 | + return false; |
| 71 | + } |
| 72 | + matchedNodes.add(node); |
| 73 | + |
| 74 | + if (idx == headIndex) { |
| 75 | + newHead = node; |
| 76 | + } |
| 77 | + ++idx; |
| 78 | + } |
| 79 | + |
| 80 | + SemanticGraphEdge edgeOut = null; |
| 81 | + List<SemanticGraphEdge> deleteEdges = new ArrayList<>(); |
| 82 | + List<SemanticGraphEdge> relocateEdges = new ArrayList<>(); |
| 83 | + for (IndexedWord node : matchedNodes) { |
| 84 | + for (SemanticGraphEdge edge : sg.incomingEdgeIterable(node)) { |
| 85 | + if (matchedNodes.contains(edge.getSource())) { |
| 86 | + // TODO: not sure keeping extra edges is correct |
| 87 | + if (edge.getSource() != newHead && !edge.isExtra()) { |
| 88 | + deleteEdges.add(edge); |
| 89 | + } |
| 90 | + } else if (edgeOut == null) { |
| 91 | + edgeOut = edge; |
| 92 | + } else { |
| 93 | + // oops, this wasn't a self-contained phrase. guess we don't try to rearrange it after all |
| 94 | + // TODO: if the heads are the same, we could make it a phrase |
| 95 | + return false; |
| 96 | + } |
| 97 | + } |
| 98 | + for (SemanticGraphEdge edge : sg.outgoingEdgeIterable(node)) { |
| 99 | + // edges which point outside the phrase will be set to have the source be the new head |
| 100 | + if (!matchedNodes.contains(edge.getTarget())) { |
| 101 | + if (edge.getSource() != newHead) { |
| 102 | + relocateEdges.add(edge); |
| 103 | + } |
| 104 | + } |
| 105 | + } |
| 106 | + } |
| 107 | + |
| 108 | + boolean modified = false; |
| 109 | + if (edgeOut == null) { |
| 110 | + // the newHead should be the root now |
| 111 | + Set<IndexedWord> roots = new HashSet<>(sg.getRoots()); |
| 112 | + if (!roots.contains(newHead)) { |
| 113 | + modified = true; |
| 114 | + for (IndexedWord other : matchedNodes) { |
| 115 | + roots.remove(other); |
| 116 | + } |
| 117 | + roots.add(newHead); |
| 118 | + } |
| 119 | + sg.setRoots(roots); |
| 120 | + } else if (edgeOut.getTarget() != newHead) { |
| 121 | + SemanticGraphEdge newEdge = new SemanticGraphEdge(edgeOut.getSource(), |
| 122 | + newHead, |
| 123 | + edgeOut.getRelation(), |
| 124 | + edgeOut.getWeight(), |
| 125 | + edgeOut.isExtra()); |
| 126 | + boolean success = sg.removeEdge(edgeOut); |
| 127 | + if (!success) { |
| 128 | + throw new RuntimeException("Between when the outgoing edge was found and now, the edge was somehow deleted"); |
| 129 | + } |
| 130 | + sg.addEdge(newEdge); |
| 131 | + modified = true; |
| 132 | + } |
| 133 | + |
| 134 | + for (SemanticGraphEdge edge : relocateEdges) { |
| 135 | + SemanticGraphEdge newEdge = new SemanticGraphEdge(newHead, |
| 136 | + edge.getTarget(), |
| 137 | + edge.getRelation(), |
| 138 | + edge.getWeight(), |
| 139 | + edge.isExtra()); |
| 140 | + boolean success = sg.removeEdge(edge); |
| 141 | + if (!success) { |
| 142 | + throw new RuntimeException("Between when the incoming edge was found and now, the edge was somehow deleted"); |
| 143 | + } |
| 144 | + sg.addEdge(newEdge); |
| 145 | + modified = true; |
| 146 | + } |
| 147 | + |
| 148 | + for (SemanticGraphEdge edge : deleteEdges) { |
| 149 | + boolean success = sg.removeEdge(edge); |
| 150 | + if (!success) { |
| 151 | + throw new RuntimeException("Between when the internal phrase edge was found and now, the edge was somehow deleted"); |
| 152 | + } |
| 153 | + modified = true; |
| 154 | + } |
| 155 | + for (IndexedWord other : matchedNodes) { |
| 156 | + if (other == newHead) |
| 157 | + continue; |
| 158 | + |
| 159 | + found: { |
| 160 | + for (SemanticGraphEdge existingEdge : sg.getAllEdges(newHead, other)) { |
| 161 | + if (existingEdge.getRelation().equals(relation)) { |
| 162 | + break found; |
| 163 | + } |
| 164 | + } |
| 165 | + SemanticGraphEdge newEdge = new SemanticGraphEdge(newHead, |
| 166 | + other, |
| 167 | + relation, |
| 168 | + weight, |
| 169 | + false); |
| 170 | + sg.addEdge(newEdge); |
| 171 | + modified = true; |
| 172 | + } |
| 173 | + } |
| 174 | + |
| 175 | + return modified; |
| 176 | + } |
| 177 | + |
| 178 | +} |
0 commit comments