001 package org.maltparser.transform.pseudo; 002 003 import java.util.Vector; 004 005 import org.apache.log4j.Logger; 006 import org.maltparser.core.exception.MaltChainedException; 007 import org.maltparser.core.symbol.SymbolTable; 008 import org.maltparser.core.symbol.SymbolTableHandler; 009 import org.maltparser.core.syntaxgraph.DependencyStructure; 010 import org.maltparser.core.syntaxgraph.node.DependencyNode; 011 012 /** 013 * This class contains methods for projectivizing and deprojectivizing 014 * 015 * @author Jens Nilsson 016 * @since 1.0 017 */ 018 public class PseudoProjectivity { 019 static int id = 0; 020 021 private enum PseudoProjectiveEncoding { 022 NONE, BASELINE, HEAD, PATH, HEADPATH, TRACE 023 }; 024 025 private enum CoveredRootAttachment { 026 NONE, LEFT, RIGHT, HEAD 027 }; 028 029 private enum LiftingOrder { 030 SHORTEST, DEEPEST 031 }; 032 033 private PseudoProjectiveEncoding markingStrategy; 034 private CoveredRootAttachment rootAttachment; 035 private LiftingOrder liftingOrder; 036 private Logger configLogger; 037 038 private SymbolTable deprelSymbolTable; 039 private SymbolTable pppathSymbolTable; 040 private SymbolTable ppliftedSymbolTable; 041 private SymbolTable ppcoveredRootSymbolTable; 042 private Vector<Boolean> nodeLifted; 043 private Vector<Vector<DependencyNode>> nodeTrace; 044 private Vector<DependencyNode> headDeprel; 045 private Vector<Boolean> nodePath; 046 private Vector<Boolean> isCoveredRoot; 047 private Vector<Integer> nodeRelationLength; 048 private Vector<String> synacticHeadDeprel; 049 050 051 public PseudoProjectivity() { } 052 053 public void initialize(String markingStrategyString, String coveredRoot, String liftingOrder, Logger configLogger, 054 SymbolTableHandler symbolTables) throws MaltChainedException { 055 nodeLifted = new Vector<Boolean>(); 056 nodeTrace = new Vector<Vector<DependencyNode>>(); 057 headDeprel = new Vector<DependencyNode>(); 058 nodePath = new Vector<Boolean>(); 059 isCoveredRoot = new Vector<Boolean>(); 060 nodeRelationLength = new Vector<Integer>(); 061 synacticHeadDeprel = new Vector<String>(); 062 063 this.configLogger = configLogger; 064 if (markingStrategyString.equalsIgnoreCase("none")) { 065 markingStrategy = PseudoProjectiveEncoding.NONE; 066 } else if (markingStrategyString.equalsIgnoreCase("baseline")) { 067 markingStrategy = PseudoProjectiveEncoding.BASELINE; 068 } else if (markingStrategyString.equalsIgnoreCase("head")) { 069 markingStrategy = PseudoProjectiveEncoding.HEAD; 070 } else if (markingStrategyString.equalsIgnoreCase("path")) { 071 markingStrategy = PseudoProjectiveEncoding.PATH; 072 } else if (markingStrategyString.equalsIgnoreCase("head+path")) { 073 markingStrategy = PseudoProjectiveEncoding.HEADPATH; 074 } else if (markingStrategyString.equalsIgnoreCase("trace")) { 075 markingStrategy = PseudoProjectiveEncoding.TRACE; 076 } 077 078 this.deprelSymbolTable = symbolTables.getSymbolTable("DEPREL"); 079 if (markingStrategy == PseudoProjectiveEncoding.HEAD || markingStrategy == PseudoProjectiveEncoding.PATH 080 || markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 081 this.ppliftedSymbolTable = symbolTables.addSymbolTable("PPLIFTED", deprelSymbolTable); 082 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) { 083 ppliftedSymbolTable.addSymbol("#true#"); 084 ppliftedSymbolTable.addSymbol("#false#"); 085 } else { 086 ppliftedSymbolTable.addSymbol("#false#"); 087 } 088 } 089 090 if (markingStrategy == PseudoProjectiveEncoding.PATH || markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 091 pppathSymbolTable = symbolTables.addSymbolTable("PPPATH", deprelSymbolTable); 092 pppathSymbolTable.addSymbol("#true#"); 093 pppathSymbolTable.addSymbol("#false#"); 094 } 095 096 if (coveredRoot.equalsIgnoreCase("none")) { 097 this.rootAttachment = CoveredRootAttachment.NONE; 098 } else if (coveredRoot.equalsIgnoreCase("left")) { 099 this.rootAttachment = CoveredRootAttachment.LEFT; 100 } else if (coveredRoot.equalsIgnoreCase("right")) { 101 this.rootAttachment = CoveredRootAttachment.RIGHT; 102 } else if (coveredRoot.equalsIgnoreCase("head")) { 103 this.rootAttachment = CoveredRootAttachment.HEAD; 104 } 105 106 if (this.rootAttachment != CoveredRootAttachment.NONE) { 107 this.ppcoveredRootSymbolTable = symbolTables.addSymbolTable("PPCOVERED", deprelSymbolTable); 108 ppcoveredRootSymbolTable.addSymbol("#true#"); 109 ppcoveredRootSymbolTable.addSymbol("#false#"); 110 } 111 if (liftingOrder.equalsIgnoreCase("shortest")) { 112 this.liftingOrder = LiftingOrder.SHORTEST; 113 } else if (liftingOrder.equalsIgnoreCase("deepest")) { 114 this.liftingOrder = LiftingOrder.DEEPEST; 115 } 116 } 117 118 private void initProjectivization(DependencyStructure pdg) throws MaltChainedException { 119 nodeLifted.clear(); 120 nodeTrace.clear(); 121 headDeprel.clear(); 122 nodePath.clear(); 123 isCoveredRoot.clear(); 124 nodeRelationLength.clear(); 125 126 for (int index : pdg.getDependencyIndices()) { 127 nodeLifted.add(false); 128 nodeTrace.add(new Vector<DependencyNode>()); 129 headDeprel.add(null); 130 nodePath.add(false); 131 isCoveredRoot.add(false); 132 if (ppliftedSymbolTable != null && index != 0) { 133 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#false#")); 134 } 135 if (pppathSymbolTable != null && index != 0) { 136 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#false#")); 137 } 138 if (ppcoveredRootSymbolTable != null && index != 0) { 139 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#false#")); 140 } 141 } 142 computeRelationLength(pdg); 143 } 144 145 public void projectivize(DependencyStructure pdg) throws MaltChainedException { 146 id++; 147 if (!pdg.isTree()) { 148 configLogger.info("\n[Warning: Sentence '" + id + "' cannot projectivize, because the dependency graph is not a tree]\n"); 149 return; 150 } 151 DependencyNode deepestNonProjectiveNode; 152 initProjectivization(pdg); 153 154 if (markingStrategy != PseudoProjectiveEncoding.NONE) { 155 while (!pdg.isProjective()) { 156 if (liftingOrder == LiftingOrder.DEEPEST) { 157 deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg); 158 } else { 159 deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg); 160 } 161 if (!attachCoveredRoots(pdg, deepestNonProjectiveNode)) { 162 nodeLifted.set(deepestNonProjectiveNode.getIndex(), true); 163 setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead()); 164 setPath(deepestNonProjectiveNode.getHead()); 165 pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex()); 166 } 167 } 168 if (rootAttachment == CoveredRootAttachment.NONE) { 169 deattachCoveredRootsForProjectivization(pdg); 170 } 171 } else if (rootAttachment != CoveredRootAttachment.NONE) { 172 for (int index : pdg.getTokenIndices()) { 173 attachCoveredRoots(pdg, pdg.getTokenNode(index)); 174 } 175 } 176 // collectTraceStatistics(pdg); 177 assignPseudoProjectiveDeprels(pdg); 178 } 179 180 181 public void mergeArclabels(DependencyStructure pdg) throws MaltChainedException { 182 assignPseudoProjectiveDeprelsForMerge(pdg); 183 } 184 185 public void splitArclabels(DependencyStructure pdg) throws MaltChainedException { 186 int pathLabelIndex = -1, movedLabelIndex = -1, coveredArcLabelIndex; 187 String label; 188 initDeprojeciviztion(pdg); 189 for (int index : pdg.getTokenIndices()) { 190 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 191 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 192 if (label != null && (pathLabelIndex = label.indexOf("%")) != -1) { 193 label = label.substring(0, pathLabelIndex); 194 setLabel(pdg.getTokenNode(index), label); 195 pdg.getTokenNode(index).getHeadEdge().addLabel(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#true#")); 196 } 197 if (label != null && (movedLabelIndex = label.indexOf("|")) != -1 && label.indexOf("|null") == -1) { 198 if (movedLabelIndex + 1 < label.length()) { 199 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode(label.substring(movedLabelIndex + 1))); 200 } else { 201 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#true#")); 202 } 203 label = label.substring(0, movedLabelIndex); 204 setLabel(pdg.getTokenNode(index), label); 205 } 206 } 207 } 208 for (int index : pdg.getTokenIndices()) { 209 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 210 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 211 if ((coveredArcLabelIndex = label.indexOf("|null")) != -1) { 212 label = label.substring(0, coveredArcLabelIndex); 213 setLabel(pdg.getTokenNode(index), label); 214 pdg.getTokenNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#")); 215 } 216 } 217 } 218 } 219 220 private void setHeadDeprel(DependencyNode node, DependencyNode parent) { 221 if (headDeprel.get(node.getIndex()) == null) { 222 headDeprel.set(node.getIndex(), parent); 223 } 224 nodeTrace.set(node.getIndex(), headDeprel); 225 } 226 227 private void setPath(DependencyNode node) { 228 nodePath.set(node.getIndex(), true); 229 } 230 231 private boolean isCoveredRoot(DependencyNode node) { 232 return isCoveredRoot.get(node.getIndex()); 233 } 234 235 private void deattachCoveredRootsForProjectivization(DependencyStructure pdg) throws MaltChainedException { 236 for (int index : pdg.getTokenIndices()) { 237 if (isCoveredRoot(pdg.getTokenNode(index))) { 238 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getTokenNode(index).getIndex()); 239 } 240 } 241 } 242 243 private boolean attachCoveredRoots(DependencyStructure pdg, DependencyNode deepest) throws MaltChainedException { 244 int i; 245 boolean foundCoveredRoot = false; 246 DependencyNode coveredRootHead; 247 for (i = Math.min(deepest.getIndex(), deepest.getHead().getIndex()) + 1; i < Math.max(deepest.getIndex(), deepest.getHead() 248 .getIndex()); i++) { 249 int leftMostIndex = pdg.getDependencyNode(i).getLeftmostProperDescendantIndex(); 250 if (leftMostIndex == -1) { 251 leftMostIndex = i; 252 } 253 int rightMostIndex = pdg.getDependencyNode(i).getRightmostProperDescendantIndex(); 254 if (rightMostIndex == -1) { 255 rightMostIndex = i; 256 } 257 // if (!nodeLifted.get(i) && pdg.getDependencyNode(i).getHead().isRoot() && !deepest.getHead().isRoot() 258 // && Math.min(deepest.getIndex(), deepest.getHead().getIndex()) < pdg.getDependencyNode(i).getLeftmostDescendantIndex() 259 // && pdg.getDependencyNode(i).getRightmostDescendantIndex() < Math.max(deepest.getIndex(), deepest.getHead().getIndex())) { 260 if (!nodeLifted.get(i) && pdg.getDependencyNode(i).getHead().isRoot() && !deepest.getHead().isRoot() 261 && Math.min(deepest.getIndex(), deepest.getHead().getIndex()) < leftMostIndex 262 && rightMostIndex < Math.max(deepest.getIndex(), deepest.getHead().getIndex())) { 263 if (rootAttachment == CoveredRootAttachment.LEFT) { 264 if (deepest.getHead().getIndex() < deepest.getIndex()) { 265 coveredRootHead = deepest.getHead(); 266 } else { 267 coveredRootHead = deepest; 268 } 269 } else if (rootAttachment == CoveredRootAttachment.RIGHT) { 270 if (deepest.getIndex() < deepest.getHead().getIndex()) { 271 coveredRootHead = deepest.getHead(); 272 } else { 273 coveredRootHead = deepest; 274 } 275 } else { 276 coveredRootHead = deepest.getHead(); 277 } 278 pdg.moveDependencyEdge(coveredRootHead.getIndex(), pdg.getDependencyNode(i).getIndex()); 279 setCoveredRoot(pdg.getDependencyNode(i)); 280 foundCoveredRoot = true; 281 } 282 } 283 return foundCoveredRoot; 284 } 285 286 private void setCoveredRoot(DependencyNode node) { 287 isCoveredRoot.set(node.getIndex(), true); 288 } 289 290 private DependencyNode getDeepestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException { 291 DependencyNode deepestNonProjectiveNode = null; 292 for (int index : pdg.getDependencyIndices()) { 293 if (!pdg.getDependencyNode(index).isProjective() 294 && (deepestNonProjectiveNode == null 295 || pdg.getDependencyNode(index).getDependencyNodeDepth() > pdg.getDependencyNode(deepestNonProjectiveNode.getIndex()).getDependencyNodeDepth())) { 296 deepestNonProjectiveNode = pdg.getDependencyNode(index); 297 } 298 } 299 300 return deepestNonProjectiveNode; 301 } 302 303 private DependencyNode getShortestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException { 304 DependencyNode shortestNonProjectiveNode = null; 305 for (int index : pdg.getDependencyIndices()) { 306 if (!pdg.getDependencyNode(index).isProjective() 307 && (shortestNonProjectiveNode == null 308 || nodeRelationLength.get(index) < nodeRelationLength.get(shortestNonProjectiveNode.getIndex()) 309 || (nodeRelationLength.get(index) == nodeRelationLength.get(shortestNonProjectiveNode.getIndex())))) { 310 shortestNonProjectiveNode = pdg.getDependencyNode(index); 311 } 312 } 313 return shortestNonProjectiveNode; 314 } 315 316 317 private void computeRelationLength(DependencyStructure pdg) throws MaltChainedException { 318 nodeRelationLength.add(0); 319 for (int index : pdg.getTokenIndices()) { 320 nodeRelationLength.add(Math.abs(pdg.getDependencyNode(index).getIndex() - pdg.getDependencyNode(index).getHead().getIndex())); 321 } 322 } 323 324 private void assignPseudoProjectiveDeprels(DependencyStructure pdg) throws MaltChainedException { 325 int newLabelCode; 326 for (int index : pdg.getTokenIndices()) { 327 if (!isCoveredRoot(pdg.getDependencyNode(index))) { 328 if (this.markingStrategy == PseudoProjectiveEncoding.HEAD || this.markingStrategy == PseudoProjectiveEncoding.PATH 329 || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 330 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) { 331 if (nodeLifted.get(index)) { 332 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#true#"); 333 } else { 334 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#"); 335 } 336 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode); 337 } else { 338 if (nodeLifted.get(index)) { 339 newLabelCode = ppliftedSymbolTable.addSymbol(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode( 340 headDeprel.get(index).getIndex()).getHeadEdge().getLabelCode(deprelSymbolTable))); 341 } else { 342 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#"); 343 } 344 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode); 345 } 346 } 347 348 if (this.markingStrategy == PseudoProjectiveEncoding.PATH || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 349 if (nodePath.get(index)) { 350 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#true#"); 351 } else { 352 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#false#"); 353 } 354 pdg.getDependencyNode(index).getHeadEdge().addLabel(pppathSymbolTable, newLabelCode); 355 } 356 357 // if (markingStrategy == PseudoProjectiveEncoding.TRACE) { 358 // if (nodeLifted.get(i)) { 359 // newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getNode(i).getLabelCode(deprelSymbolTable)) + "|"; 360 // addArc(pdg, pdg.getNode(i), pdg.getNode(i).getHead(), newLabel); 361 // } 362 // } 363 } else if (rootAttachment != CoveredRootAttachment.NONE) { 364 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#")); 365 } 366 } 367 } 368 369 private void setLabel(DependencyNode node, String label) throws MaltChainedException { 370 // node.getLabelCode().clear(); 371 node.getHeadEdge().getLabelSet().put(deprelSymbolTable, deprelSymbolTable.addSymbol(label)); 372 } 373 374 private void assignPseudoProjectiveDeprelsForMerge(DependencyStructure pdg) throws MaltChainedException { 375 Vector<String> originalDeprel = new Vector<String>(); 376 String newLabel; 377 originalDeprel.add(null); 378 for (int index : pdg.getTokenIndices()) { 379 originalDeprel.add(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable))); 380 } 381 for (int index : pdg.getTokenIndices()) { 382 newLabel = null; 383 if (!isCoveredRoot(pdg.getDependencyNode(index))) { 384 if (markingStrategy == PseudoProjectiveEncoding.HEAD) { 385 if (nodeLifted.get(index)) { 386 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 387 + originalDeprel.get(headDeprel.get(index).getIndex()); 388 // } else { 389 // newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 390 } 391 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) { 392 if (nodeLifted.get(index) && nodePath.get(index)) { 393 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|%"; 394 } else if (nodeLifted.get(index) && !nodePath.get(index)) { 395 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"; 396 } else if (!nodeLifted.get(index) && nodePath.get(index)) { 397 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "%"; 398 } 399 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 400 if (nodeLifted.get(index) && nodePath.get(index)) { 401 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 402 + originalDeprel.get(headDeprel.get(index).getIndex()) + "%"; 403 } else if (nodeLifted.get(index) && !nodePath.get(index)) { 404 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 405 + originalDeprel.get(headDeprel.get(index).getIndex()); 406 } else if (!nodeLifted.get(index) && nodePath.get(index)) { 407 newLabel = originalDeprel.get(pdg.getDependencyNode(index).getIndex()) + "%"; 408 } 409 } else if (markingStrategy == PseudoProjectiveEncoding.TRACE) { 410 if (nodeLifted.get(index)) { 411 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"; 412 } 413 } 414 } else if (rootAttachment != CoveredRootAttachment.NONE) { 415 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|null"; 416 } 417 if (newLabel != null) { 418 setLabel(pdg.getDependencyNode(index), newLabel); 419 } 420 } 421 } 422 423 public void deprojectivize(DependencyStructure pdg) throws MaltChainedException { 424 initDeprojeciviztion(pdg); 425 426 for (int index : pdg.getTokenIndices()) { 427 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 428 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(pppathSymbolTable) 429 && pppathSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(pppathSymbolTable)).equals("#true#")) { 430 setPath(pdg.getDependencyNode(index)); 431 } 432 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppliftedSymbolTable) 433 && !ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#false#")) { 434 nodeLifted.set(index, true); 435 if (!ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#true#")) { 436 synacticHeadDeprel.set(index, ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge() 437 .getLabelCode(ppliftedSymbolTable))); 438 } 439 } 440 } 441 } 442 deattachCoveredRootsForDeprojectivization(pdg); 443 if (markingStrategy == PseudoProjectiveEncoding.HEAD && needsDeprojectivizeWithHead(pdg)) { 444 deprojectivizeWithHead(pdg, pdg.getDependencyRoot()); 445 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) { 446 deprojectivizeWithPath(pdg, pdg.getDependencyRoot()); 447 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 448 deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot()); 449 // } else if (markingStrategy == PseudoProjectiveEncoding.TRACE) { 450 // deprojectivizeWithTrace(pdg, pdg.getRoot()); 451 } 452 } 453 454 private void initDeprojeciviztion(DependencyStructure pdg) { 455 nodeLifted.clear(); 456 nodePath.clear(); 457 synacticHeadDeprel.clear(); 458 for (int index : pdg.getDependencyIndices()) { 459 nodeLifted.add(false); 460 nodePath.add(false); 461 synacticHeadDeprel.add(null); 462 } 463 } 464 465 private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg) throws MaltChainedException { 466 for (int index : pdg.getTokenIndices()) { 467 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 468 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppcoveredRootSymbolTable) 469 && ppcoveredRootSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppcoveredRootSymbolTable)).equals( 470 "#true#")) { 471 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex()); 472 } 473 } 474 } 475 } 476 477 // Check whether there is at least one node in the specified dependency structure that can be lifted. 478 // If this is not the case, there is no need to call deprojectivizeWithHead. 479 480 private boolean needsDeprojectivizeWithHead(DependencyStructure pdg) throws MaltChainedException { 481 for (int index : pdg.getDependencyIndices()) { 482 if (nodeLifted.get(index)) { 483 DependencyNode node = pdg.getDependencyNode(index); 484 if (breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, synacticHeadDeprel.get(index)) != null) { 485 return true; 486 } 487 } 488 } 489 return false; 490 } 491 492 private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 493 boolean success = true, childSuccess = false; 494 int i, childAttempts = 2; 495 DependencyNode child, possibleSyntacticHead; 496 String syntacticHeadDeprel; 497 if (nodeLifted.get(node.getIndex())) { 498 syntacticHeadDeprel = synacticHeadDeprel.get(node.getIndex()); 499 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel); 500 if (possibleSyntacticHead != null) { 501 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 502 // addEdge(pdg, possibleSyntacticHead, node, node.getHeadEdge().getLabelSet()); 503 nodeLifted.set(node.getIndex(), false); 504 } else { 505 success = false; 506 } 507 } 508 while (!childSuccess && childAttempts > 0) { 509 childSuccess = true; 510 Vector<DependencyNode> children = new Vector<DependencyNode>(); 511 i = 0; 512 while ((child = node.getLeftDependent(i)) != null) { 513 children.add(child); 514 i++; 515 } 516 i = 0; 517 while ((child = node.getRightDependent(i)) != null) { 518 children.add(child); 519 i++; 520 } 521 for (i = 0; i < children.size(); i++) { 522 child = children.get(i); 523 if (!deprojectivizeWithHead(pdg, child)) { 524 childSuccess = false; 525 } 526 } 527 childAttempts--; 528 } 529 return childSuccess && success; 530 } 531 532 private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel) 533 throws MaltChainedException { 534 DependencyNode dependent; 535 String dependentDeprel; 536 Vector<DependencyNode> nodes = new Vector<DependencyNode>(); 537 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false)); 538 while (nodes.size() > 0) { 539 dependent = nodes.remove(0); 540 if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) { 541 dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)); 542 if (dependentDeprel.equals(syntacticHeadDeprel)) { 543 return dependent; 544 } 545 } 546 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false)); 547 } 548 return null; 549 } 550 551 private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid, 552 boolean percentOnly) { 553 int i, j; 554 Vector<DependencyNode> dependents = new Vector<DependencyNode>(); 555 DependencyNode leftChild, rightChild; 556 557 i = governor.getLeftDependentCount() - 1; 558 j = 0; 559 leftChild = governor.getLeftDependent(i--); 560 rightChild = governor.getRightDependent(j++); 561 562 while (leftChild != null && rightChild != null) { 563 if (leftChild == avoid) { 564 leftChild = governor.getLeftDependent(i--); 565 } else if (rightChild == avoid) { 566 rightChild = governor.getRightDependent(j++); 567 } else if (Math.abs(leftChild.getIndex() - avoid.getIndex()) < Math.abs(rightChild.getIndex() - avoid.getIndex())) { 568 if (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex()))) { 569 dependents.add(leftChild); 570 } 571 leftChild = governor.getLeftDependent(i--); 572 } else { 573 if (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex()))) { 574 dependents.add(rightChild); 575 } 576 rightChild = governor.getRightDependent(j++); 577 } 578 } 579 while (leftChild != null) { 580 if (leftChild != avoid && (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex())))) { 581 dependents.add(leftChild); 582 } 583 leftChild = governor.getLeftDependent(i--); 584 } 585 while (rightChild != null) { 586 if (rightChild != avoid && (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex())))) { 587 dependents.add(rightChild); 588 } 589 rightChild = governor.getRightDependent(j++); 590 } 591 return dependents; 592 } 593 594 private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 595 boolean success = true, childSuccess = false; 596 int i, childAttempts = 2; 597 DependencyNode child, possibleSyntacticHead; 598 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) { 599 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node); 600 if (possibleSyntacticHead != null) { 601 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 602 // addEdge(pdg, possibleSyntacticHead, node, node.getHeadEdge().getLabelSet()); 603 nodeLifted.set(node.getIndex(), false); 604 } else { 605 success = false; 606 } 607 } 608 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) { 609 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node); 610 if (possibleSyntacticHead != null) { 611 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 612 // addEdge(pdg, possibleSyntacticHead, node, node.getHeadEdge().getLabelSet()); 613 nodeLifted.set(node.getIndex(), false); 614 } else { 615 success = false; 616 } 617 } 618 while (!childSuccess && childAttempts > 0) { 619 childSuccess = true; 620 Vector<DependencyNode> children = new Vector<DependencyNode>(); 621 i = 0; 622 while ((child = node.getLeftDependent(i)) != null) { 623 children.add(child); 624 i++; 625 } 626 i = 0; 627 while ((child = node.getRightDependent(i)) != null) { 628 children.add(child); 629 i++; 630 } 631 for (i = 0; i < children.size(); i++) { 632 child = children.get(i); 633 if (!deprojectivizeWithPath(pdg, child)) { 634 childSuccess = false; 635 } 636 } 637 childAttempts--; 638 } 639 return childSuccess && success; 640 } 641 642 private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid) { 643 DependencyNode dependent; 644 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes; 645 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true)); 646 while (nodes.size() > 0) { 647 dependent = nodes.remove(0); 648 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0) { 649 return dependent; 650 } 651 nodes.addAll(newNodes); 652 } 653 return null; 654 } 655 656 private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 657 boolean success = true, childSuccess = false; 658 int i, childAttempts = 2; 659 DependencyNode child, possibleSyntacticHead; 660 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) { 661 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node 662 .getIndex())); 663 if (possibleSyntacticHead != null) { 664 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 665 // addEdge(pdg, possibleSyntacticHead, node, node.getHeadEdge().getLabelSet()); 666 nodeLifted.set(node.getIndex(), false); 667 } else { 668 success = false; 669 } 670 } 671 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) { 672 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node 673 .getIndex())); 674 if (possibleSyntacticHead != null) { 675 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 676 // addEdge(pdg, possibleSyntacticHead, node, node.getHeadEdge().getLabelSet()); 677 nodeLifted.set(node.getIndex(), false); 678 } else { 679 success = false; 680 } 681 } 682 while (!childSuccess && childAttempts > 0) { 683 childSuccess = true; 684 Vector<DependencyNode> children = new Vector<DependencyNode>(); 685 i = 0; 686 while ((child = node.getLeftDependent(i)) != null) { 687 children.add(child); 688 i++; 689 } 690 i = 0; 691 while ((child = node.getRightDependent(i)) != null) { 692 children.add(child); 693 i++; 694 } 695 for (i = 0; i < children.size(); i++) { 696 child = children.get(i); 697 if (!deprojectivizeWithHeadAndPath(pdg, child)) { 698 childSuccess = false; 699 } 700 } 701 childAttempts--; 702 } 703 return childSuccess && success; 704 } 705 706 private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode) 707 throws MaltChainedException { 708 DependencyNode dependent; 709 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes = null, secondChance = new Vector<DependencyNode>(); 710 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true)); 711 while (nodes.size() > 0) { 712 dependent = nodes.remove(0); 713 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0 714 && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) { 715 return dependent; 716 } 717 nodes.addAll(newNodes); 718 if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode) 719 && newNodes.size() != 0) { 720 secondChance.add(dependent); 721 } 722 } 723 if (secondChance.size() > 0) { 724 return secondChance.firstElement(); 725 } 726 return null; 727 } 728 }