001 package org.maltparser.core.syntaxgraph.writer; 002 003 import java.io.BufferedWriter; 004 import java.io.FileNotFoundException; 005 import java.io.FileOutputStream; 006 import java.io.IOException; 007 import java.io.OutputStream; 008 import java.io.OutputStreamWriter; 009 import java.io.UnsupportedEncodingException; 010 import java.util.Iterator; 011 012 import org.maltparser.core.exception.MaltChainedException; 013 import org.maltparser.core.io.dataformat.ColumnDescription; 014 import org.maltparser.core.io.dataformat.DataFormatException; 015 import org.maltparser.core.io.dataformat.DataFormatInstance; 016 import org.maltparser.core.syntaxgraph.DependencyStructure; 017 import org.maltparser.core.syntaxgraph.TokenStructure; 018 import org.maltparser.core.syntaxgraph.node.TokenNode; 019 /** 020 * 021 * 022 * @author Johan Hall 023 */ 024 public class TabWriter implements SyntaxGraphWriter { 025 private BufferedWriter writer; 026 private DataFormatInstance dataFormatInstance; 027 private final StringBuilder output; 028 private boolean closeStream = true; 029 // private String ID = "ID"; 030 // private String IGNORE_COLUMN_SIGN = "_"; 031 private final char TAB = '\t'; 032 private final char NEWLINE = '\n'; 033 034 035 public TabWriter() { 036 output = new StringBuilder(); 037 } 038 039 public void open(String fileName, String charsetName) throws MaltChainedException { 040 try { 041 open(new OutputStreamWriter(new FileOutputStream(fileName),charsetName)); 042 } catch (FileNotFoundException e) { 043 throw new DataFormatException("The output file '"+fileName+"' cannot be found.", e); 044 } catch (UnsupportedEncodingException e) { 045 throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e); 046 } 047 } 048 049 public void open(OutputStream os, String charsetName) throws MaltChainedException { 050 try { 051 if (os == System.out || os == System.err) { 052 closeStream = false; 053 } 054 open(new OutputStreamWriter(os, charsetName)); 055 } catch (UnsupportedEncodingException e) { 056 throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e); 057 } 058 } 059 060 private void open(OutputStreamWriter osw) throws MaltChainedException { 061 setWriter(new BufferedWriter(osw)); 062 } 063 064 public void writeProlog() throws MaltChainedException { 065 066 } 067 068 public void writeSentence(TokenStructure syntaxGraph) throws MaltChainedException { 069 if (syntaxGraph == null || dataFormatInstance == null || !syntaxGraph.hasTokens()) { 070 return; 071 } 072 Iterator<ColumnDescription> columns = dataFormatInstance.iterator(); 073 074 for (int i : syntaxGraph.getTokenIndices()) { 075 try { 076 ColumnDescription column = null; 077 while (columns.hasNext()) { 078 column = columns.next(); 079 080 if (column.getCategory() == ColumnDescription.INPUT && column.getType() != ColumnDescription.IGNORE) { 081 TokenNode node = syntaxGraph.getTokenNode(i); 082 if (!column.getName().equals("ID")) { 083 if (node.hasLabel(column.getSymbolTable())) { 084 output.append(node.getLabelSymbol(column.getSymbolTable())); 085 if (output.length() != 0) { 086 writer.write(output.toString()); 087 } else { 088 writer.write('_'); 089 } 090 } else { 091 writer.write('_'); 092 } 093 } else { 094 writer.write(Integer.toString(i)); 095 } 096 } else if (column.getCategory() == ColumnDescription.HEAD && column.getType() != ColumnDescription.IGNORE && syntaxGraph instanceof DependencyStructure) { 097 if (((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHead()) { 098 writer.write(Integer.toString(((DependencyStructure)syntaxGraph).getDependencyNode(i).getHead().getIndex())); 099 } else { 100 writer.write(Integer.toString(0)); 101 } 102 103 } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && column.getType() != ColumnDescription.IGNORE && syntaxGraph instanceof DependencyStructure) { 104 if (((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHead() && ((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHeadEdgeLabel(column.getSymbolTable())) { 105 output.append(((DependencyStructure)syntaxGraph).getDependencyNode(i).getHeadEdgeLabelSymbol(column.getSymbolTable())); 106 } else { 107 output.append(((DependencyStructure)syntaxGraph).getDefaultRootEdgeLabelSymbol(column.getSymbolTable())); 108 } 109 110 if (output.length() != 0) { 111 writer.write(output.toString()); 112 } 113 } else { 114 writer.write(column.getDefaultOutput()); 115 } 116 if (columns.hasNext()) { 117 writer.write(TAB); 118 } 119 output.setLength(0); 120 } 121 writer.write(NEWLINE); 122 columns = dataFormatInstance.iterator(); 123 } catch (IOException e) { 124 close(); 125 throw new DataFormatException("Could not write to the output file. ", e); 126 } 127 } 128 129 try { 130 writer.write('\n'); 131 writer.flush(); 132 } catch (IOException e) { 133 close(); 134 throw new DataFormatException("Could not write to the output file. ", e); 135 } 136 } 137 138 public void writeEpilog() throws MaltChainedException { 139 140 } 141 142 public BufferedWriter getWriter() { 143 return writer; 144 } 145 146 public void setWriter(BufferedWriter writer) throws MaltChainedException { 147 close(); 148 this.writer = writer; 149 } 150 151 public DataFormatInstance getDataFormatInstance() { 152 return dataFormatInstance; 153 } 154 155 public void setDataFormatInstance(DataFormatInstance dataFormatInstance) { 156 this.dataFormatInstance = dataFormatInstance; 157 } 158 159 public String getOptions() { 160 return null; 161 } 162 163 public void setOptions(String optionString) throws MaltChainedException { 164 165 } 166 167 public void close() throws MaltChainedException { 168 try { 169 if (writer != null) { 170 writer.flush(); 171 if (closeStream) { 172 writer.close(); 173 } 174 writer = null; 175 } 176 } catch (IOException e) { 177 throw new DataFormatException("Could not close the output file. ", e); 178 } 179 180 } 181 }