Skip to content

Commit fae0c90

Browse files
committed
v1.4: Add Filename comparator with FuzzyWuzzy search library
1 parent cb7e563 commit fae0c90

File tree

14 files changed

+1523
-863
lines changed

14 files changed

+1523
-863
lines changed

.classpath

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<classpath>
3+
<classpathentry kind="src" path="src"/>
4+
<classpathentry kind="src" path="resources"/>
5+
<classpathentry kind="src" path="config"/>
6+
<classpathentry kind="src" path="tests"/>
7+
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6"/>
8+
<classpathentry kind="lib" path="lib/jackson-core-asl-1.9.12.jar"/>
9+
<classpathentry kind="lib" path="lib/jackson-mapper-asl-1.9.12.jar"/>
10+
<classpathentry kind="lib" path="lib/log4j-1.2.17.jar"/>
11+
<classpathentry kind="lib" path="lib/cqengine-1.0.3-all.jar"/>
12+
<classpathentry kind="lib" path="lib/commons-configuration-1.10.jar" sourcepath="/mnt/data01/workfolder/Java/libs/commons-configuration-1.10/commons-configuration-1.10-sources.jar"/>
13+
<classpathentry kind="lib" path="lib/commons-lang-2.6.jar"/>
14+
<classpathentry kind="lib" path="lib/commons-logging-1.2.jar"/>
15+
<classpathentry kind="lib" path="lib/commons-collections-3.2.1.jar"/>
16+
<classpathentry kind="lib" path="lib/commons-text-1.1.jar"/>
17+
<classpathentry kind="lib" path="lib/junit-4.12.jar"/>
18+
<classpathentry kind="lib" path="lib/fuzzywuzzy-1.1.7.jar"/>
19+
<classpathentry kind="output" path="bin"/>
20+
</classpath>

buildjar.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
<zipfileset excludes="META-INF/*.SF" src="lib/commons-lang-2.6.jar"/>
1515
<zipfileset excludes="META-INF/*.SF" src="lib/commons-logging-1.2.jar"/>
1616
<zipfileset excludes="META-INF/*.SF" src="lib/cqengine-1.0.3-all.jar"/>
17+
<zipfileset excludes="META-INF/*.SF" src="lib/fuzzywuzzy-1.1.7.jar"/>
1718
<zipfileset excludes="META-INF/*.SF" src="lib/jackson-core-asl-1.9.12.jar"/>
1819
<zipfileset excludes="META-INF/*.SF" src="lib/jackson-mapper-asl-1.9.12.jar"/>
1920
<zipfileset excludes="META-INF/*.SF" src="lib/log4j-1.2.17.jar"/>

config/log4j.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
log4j.rootLogger=DEBUG, A1
33
log4j.logger.nnwl.jduplicatefinder.engine.comparators.DateTime=INFO
44
log4j.logger.nnwl.jduplicatefinder.engine.comparators.Digest=DEBUG
5+
log4j.logger.nnwl.jduplicatefinder.engine.comparators.Filename=INFO
56
log4j.logger.nnwl.jduplicatefinder.engine.comparators.Filesize=INFO
67

78
# A1 is set to be a ConsoleAppender.

lib/commons-text-1.1.jar

126 KB
Binary file not shown.

lib/fuzzywuzzy-1.1.7.jar

32 KB
Binary file not shown.
Lines changed: 115 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1,98 +1,115 @@
1-
package nnwl.jduplicatefinder.engine;
2-
3-
import nnwl.jduplicatefinder.engine.comparators.AbstractDuplicateComparator;
4-
import org.apache.log4j.Logger;
5-
6-
import java.nio.file.Path;
7-
import java.util.ArrayList;
8-
import java.util.List;
9-
import java.util.Map;
10-
import java.util.TreeMap;
11-
12-
/**
13-
* JDuplicateFinder
14-
*
15-
* @author Anael Ollier <nanawel NOSPAM [at] gmail [dot] com>
16-
* @license GPLv3 - See LICENSE
17-
*/
18-
public class FileResult {
19-
20-
private static final Logger logger = Logger.getLogger(FileResult.class);
21-
22-
public static final int UNIQUE = 0;
23-
public static final int NOT_UNIQUE = 1;
24-
25-
protected Path referenceFile;
26-
27-
protected List<SimilarityResult> similarityResults = new ArrayList<SimilarityResult>();
28-
29-
protected int status;
30-
31-
protected List<AbstractDuplicateComparator> currentRunComparators;
32-
33-
protected Map<String, ? extends SimilarityResult> combinedSimilarityResults;
34-
35-
public FileResult(Path file) {
36-
this.referenceFile = file;
37-
}
38-
39-
public FileResult(Path file, List<SimilarityResult> similarityResults) {
40-
this(file);
41-
this.appendSimilarityResults(similarityResults);
42-
}
43-
44-
public Path getReferenceFile() {
45-
return referenceFile;
46-
}
47-
48-
public List<SimilarityResult> getSimilarityResults() {
49-
return similarityResults;
50-
}
51-
52-
public int getStatus() {
53-
return status;
54-
}
55-
56-
public void appendSimilarityResult(SimilarityResult similarResult) {
57-
this.similarityResults.add(similarResult);
58-
this.updateStatus();
59-
this.combinedSimilarityResults = null;
60-
}
61-
62-
public void appendSimilarityResults(List<SimilarityResult> similarityResults) {
63-
this.similarityResults.addAll(similarityResults);
64-
this.updateStatus();
65-
this.combinedSimilarityResults = null;
66-
}
67-
68-
protected void updateStatus() {
69-
if (this.similarityResults.isEmpty()) {
70-
this.status = UNIQUE;
71-
} else {
72-
this.status = NOT_UNIQUE;
73-
}
74-
}
75-
76-
public void setCurrentRunComparators(List<AbstractDuplicateComparator> comparators) {
77-
this.currentRunComparators = comparators;
78-
}
79-
80-
public Map<String, ? extends SimilarityResult> getCombinedSimilarityResults() {
81-
if (this.combinedSimilarityResults == null) {
82-
Map<String, CombinedSimilarityResult> combinedSimilarityResults = new TreeMap<>();
83-
for (SimilarityResult sr : this.similarityResults) {
84-
CombinedSimilarityResult csr;
85-
if (combinedSimilarityResults.containsKey(sr.getSimilarFile().toString())) {
86-
csr = combinedSimilarityResults.get(sr.getSimilarFile().toString());
87-
csr.addSimilarityResult(sr);
88-
} else {
89-
csr = new CombinedSimilarityResult(sr);
90-
csr.setComparators(this.currentRunComparators);
91-
combinedSimilarityResults.put(csr.getSimilarFile().toString(), csr);
92-
}
93-
}
94-
this.combinedSimilarityResults = combinedSimilarityResults;
95-
}
96-
return this.combinedSimilarityResults;
97-
}
98-
}
1+
package nnwl.jduplicatefinder.engine;
2+
3+
import nnwl.jduplicatefinder.engine.comparators.AbstractDuplicateComparator;
4+
import org.apache.log4j.Logger;
5+
6+
import java.nio.file.Path;
7+
import java.util.ArrayList;
8+
import java.util.List;
9+
import java.util.Map;
10+
import java.util.TreeMap;
11+
12+
/**
13+
* JDuplicateFinder
14+
*
15+
* @author Anael Ollier <nanawel NOSPAM [at] gmail [dot] com>
16+
* @license GPLv3 - See LICENSE
17+
*/
18+
public class FileResult {
19+
20+
private static final Logger logger = Logger.getLogger(FileResult.class);
21+
22+
public static final int UNIQUE = 0;
23+
public static final int NOT_UNIQUE = 1;
24+
25+
protected Path referenceFile;
26+
27+
protected List<SimilarityResult> similarityResults = new ArrayList<SimilarityResult>();
28+
29+
protected int status;
30+
31+
protected List<AbstractDuplicateComparator> currentRunComparators;
32+
33+
protected Map<String, ? extends SimilarityResult> combinedSimilarityResults;
34+
35+
public FileResult(Path file) {
36+
this.referenceFile = file;
37+
}
38+
39+
public FileResult(Path file, List<SimilarityResult> similarityResults) {
40+
this(file);
41+
this.appendSimilarityResults(similarityResults);
42+
}
43+
44+
public Path getReferenceFile() {
45+
return referenceFile;
46+
}
47+
48+
public List<SimilarityResult> getSimilarityResults() {
49+
return similarityResults;
50+
}
51+
52+
public int getStatus() {
53+
return status;
54+
}
55+
56+
public void appendSimilarityResult(SimilarityResult similarResult) {
57+
this.similarityResults.add(similarResult);
58+
this.updateStatus();
59+
this.combinedSimilarityResults = null;
60+
}
61+
62+
public void appendSimilarityResults(List<SimilarityResult> similarityResults) {
63+
this.similarityResults.addAll(similarityResults);
64+
this.updateStatus();
65+
this.combinedSimilarityResults = null;
66+
}
67+
68+
protected void updateStatus() {
69+
if (this.similarityResults.isEmpty()) {
70+
this.status = UNIQUE;
71+
} else {
72+
this.status = NOT_UNIQUE;
73+
}
74+
}
75+
76+
public void setCurrentRunComparators(List<AbstractDuplicateComparator> comparators) {
77+
this.currentRunComparators = comparators;
78+
}
79+
80+
public Map<String, ? extends SimilarityResult> getCombinedSimilarityResults() {
81+
if (this.combinedSimilarityResults == null) {
82+
Map<String, CombinedSimilarityResult> combinedSimilarityResults = new TreeMap<>();
83+
for (SimilarityResult sr : this.similarityResults) {
84+
CombinedSimilarityResult csr;
85+
if (combinedSimilarityResults.containsKey(sr.getSimilarFile().toString())) {
86+
csr = combinedSimilarityResults.get(sr.getSimilarFile().toString());
87+
csr.addSimilarityResult(sr);
88+
} else {
89+
csr = new CombinedSimilarityResult(sr);
90+
csr.setComparators(this.currentRunComparators);
91+
combinedSimilarityResults.put(csr.getSimilarFile().toString(), csr);
92+
}
93+
}
94+
this.combinedSimilarityResults = combinedSimilarityResults;
95+
}
96+
return this.combinedSimilarityResults;
97+
}
98+
99+
public String toString() {
100+
StringBuilder sb = new StringBuilder();
101+
102+
sb.append(this.getClass())
103+
.append(": ")
104+
.append(this.referenceFile.toString())
105+
.append(" {\n");
106+
for (Map.Entry<String, ? extends SimilarityResult> csr : this.getCombinedSimilarityResults().entrySet()) {
107+
sb.append(String.format(" [%3d] ", csr.getValue().getSimilarity()))
108+
.append(csr.getValue().getSimilarFile().toString())
109+
.append("\n");
110+
}
111+
sb.append("}");
112+
113+
return sb.toString();
114+
}
115+
}

0 commit comments

Comments
 (0)