1 /**
2 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3 */
4 package net.sourceforge.pmd.cpd;
5
6 import org.apache.tools.ant.BuildException;
7 import org.apache.tools.ant.DirectoryScanner;
8 import org.apache.tools.ant.Project;
9 import org.apache.tools.ant.Task;
10 import org.apache.tools.ant.types.EnumeratedAttribute;
11 import org.apache.tools.ant.types.FileSet;
12
13 import java.io.File;
14 import java.io.IOException;
15 import java.util.ArrayList;
16 import java.util.List;
17 import java.util.Properties;
18
19 /**
20 * CPDTask
21 * <p/>
22 * Runs the CPD utility via ant. The ant task looks like this:
23 * <p/>
24 * <project name="CPDProj" default="main" basedir=".">
25 * <taskdef name="cpd" classname="net.sourceforge.pmd.cpd.CPDTask" />
26 * <target name="main">
27 * <cpd encoding="UTF-16LE" language="java" ignoreIdentifiers="true" ignoreLiterals="true" minimumTokenCount="100" outputFile="c:\cpdrun.txt">
28 * <fileset dir="/path/to/my/src">
29 * <include name="*.java"/>
30 * </fileset>
31 * </cpd>
32 * </target>
33 * </project>
34 * <p/>
35 * Required: minimumTokenCount, outputFile, and at least one file
36 */
37 public class CPDTask extends Task {
38
39 private static final String TEXT_FORMAT = "text";
40 private static final String XML_FORMAT = "xml";
41 private static final String CSV_FORMAT = "csv";
42
43 private String format = TEXT_FORMAT;
44 private String language = "java";
45 private int minimumTokenCount;
46 private boolean ignoreLiterals;
47 private boolean ignoreIdentifiers;
48 private File outputFile;
49 private String encoding = System.getProperty("file.encoding");
50 private List<FileSet> filesets = new ArrayList<FileSet>();
51
52 public void execute() throws BuildException {
53 try {
54 validateFields();
55
56 log("Starting run, minimumTokenCount is " + minimumTokenCount, Project.MSG_INFO);
57
58 log("Tokenizing files", Project.MSG_INFO);
59 CPD cpd = new CPD(minimumTokenCount, createLanguage());
60 cpd.setEncoding(encoding);
61 tokenizeFiles(cpd);
62
63 log("Starting to analyze code", Project.MSG_INFO);
64 long timeTaken = analyzeCode(cpd);
65 log("Done analyzing code; that took " + timeTaken + " milliseconds");
66
67 log("Generating report", Project.MSG_INFO);
68 report(cpd);
69 } catch (IOException ioe) {
70 log(ioe.toString(), Project.MSG_ERR);
71 throw new BuildException("IOException during task execution", ioe);
72 } catch (ReportException re) {
73 re.printStackTrace();
74 log(re.toString(), Project.MSG_ERR);
75 throw new BuildException("ReportException during task execution", re);
76 }
77 }
78
79 private Language createLanguage() {
80 Properties p = new Properties();
81 if (ignoreLiterals) {
82 p.setProperty(JavaTokenizer.IGNORE_LITERALS, "true");
83 }
84 if (ignoreIdentifiers) {
85 p.setProperty(JavaTokenizer.IGNORE_IDENTIFIERS, "true");
86 }
87 return new LanguageFactory().createLanguage(language, p);
88 }
89
90 private void report(CPD cpd) throws ReportException {
91 if (!cpd.getMatches().hasNext()) {
92 log("No duplicates over " + minimumTokenCount + " tokens found", Project.MSG_INFO);
93 }
94 Renderer renderer = createRenderer();
95 FileReporter reporter;
96 if (outputFile == null) {
97 reporter = new FileReporter(encoding);
98 } else if (outputFile.isAbsolute()) {
99 reporter = new FileReporter(outputFile, encoding);
100 } else {
101 reporter = new FileReporter(new File(getProject().getBaseDir(), outputFile.toString()), encoding);
102 }
103 reporter.report(renderer.render(cpd.getMatches()));
104 }
105
106 private void tokenizeFiles(CPD cpd) throws IOException {
107 for (FileSet fileSet: filesets) {
108 DirectoryScanner directoryScanner = fileSet.getDirectoryScanner(getProject());
109 String[] includedFiles = directoryScanner.getIncludedFiles();
110 for (int i = 0; i < includedFiles.length; i++) {
111 File file = new File(directoryScanner.getBasedir() + System.getProperty("file.separator") + includedFiles[i]);
112 log("Tokenizing " + file.getAbsolutePath(), Project.MSG_VERBOSE);
113 cpd.add(file);
114 }
115 }
116 }
117
118 private long analyzeCode(CPD cpd) {
119 long start = System.currentTimeMillis();
120 cpd.go();
121 long stop = System.currentTimeMillis();
122 return stop - start;
123 }
124
125 private Renderer createRenderer() {
126 if (format.equals(TEXT_FORMAT)) {
127 return new SimpleRenderer();
128 } else if (format.equals(CSV_FORMAT)) {
129 return new CSVRenderer();
130 }
131 return new XMLRenderer(encoding);
132 }
133
134 private void validateFields() throws BuildException {
135 if (minimumTokenCount == 0) {
136 throw new BuildException("minimumTokenCount is required and must be greater than zero");
137 } else if (filesets.isEmpty()) {
138 throw new BuildException("Must include at least one FileSet");
139 }
140 }
141
142 public void addFileset(FileSet set) {
143 filesets.add(set);
144 }
145
146 public void setMinimumTokenCount(int minimumTokenCount) {
147 this.minimumTokenCount = minimumTokenCount;
148 }
149
150 public void setIgnoreLiterals(boolean value) {
151 this.ignoreLiterals = value;
152 }
153
154 public void setIgnoreIdentifiers(boolean value) {
155 this.ignoreIdentifiers = value;
156 }
157
158 public void setOutputFile(File outputFile) {
159 this.outputFile = outputFile;
160 }
161
162 public void setFormat(FormatAttribute formatAttribute) {
163 format = formatAttribute.getValue();
164 }
165
166 public void setLanguage(LanguageAttribute languageAttribute) {
167 language = languageAttribute.getValue();
168 }
169
170 public void setEncoding(String encodingValue) {
171 encoding = encodingValue;
172 }
173
174 public static class FormatAttribute extends EnumeratedAttribute {
175 private static final String[] FORMATS = new String[]{XML_FORMAT, TEXT_FORMAT, CSV_FORMAT};
176 public String[] getValues() {
177 return FORMATS;
178 }
179 }
180
181
182
183
184
185
186
187 public static class LanguageAttribute extends EnumeratedAttribute {
188 private static final String[] LANGUAGES = new String[]{"java","jsp","cpp", "c","php", "ruby", "fortran"};
189 public String[] getValues() {
190 return LANGUAGES;
191 }
192 }
193 }