001// License: GPL. See LICENSE file for details.
002package org.openstreetmap.josm.data.validation.tests;
003
004import static org.openstreetmap.josm.tools.I18n.tr;
005
006import java.awt.geom.Point2D;
007import java.util.ArrayList;
008import java.util.HashMap;
009import java.util.List;
010import java.util.Map;
011
012import org.openstreetmap.josm.data.osm.OsmPrimitive;
013import org.openstreetmap.josm.data.osm.Way;
014import org.openstreetmap.josm.data.validation.Severity;
015import org.openstreetmap.josm.data.validation.Test;
016import org.openstreetmap.josm.data.validation.TestError;
017import org.openstreetmap.josm.data.validation.util.ValUtil;
018import org.openstreetmap.josm.gui.progress.ProgressMonitor;
019import org.openstreetmap.josm.tools.MultiMap;
020import org.openstreetmap.josm.tools.Utils;
021
022/**
023 * Checks for similar named ways, symptom of a possible typo. It uses the
024 * Levenshtein distance to check for similarity
025 *
026 * @author frsantos
027 */
028public class SimilarNamedWays extends Test {
029
030    protected static final int SIMILAR_NAMED = 701;
031
032    /** All ways, grouped by cells */
033    private Map<Point2D,List<Way>> cellWays;
034    /** The already detected errors */
035    private MultiMap<Way, Way> errorWays;
036
037    /**
038     * Constructor
039     */
040    public SimilarNamedWays() {
041        super(tr("Similarly named ways"),
042                tr("This test checks for ways with similar names that may have been misspelled."));
043    }
044
045    @Override
046    public void startTest(ProgressMonitor monitor) {
047        super.startTest(monitor);
048        cellWays = new HashMap<Point2D,List<Way>>(1000);
049        errorWays = new MultiMap<Way, Way>();
050    }
051
052    @Override
053    public void endTest() {
054        cellWays = null;
055        errorWays = null;
056        super.endTest();
057    }
058
059    @Override
060    public void visit(Way w) {
061        if (!w.isUsable())
062            return;
063
064        String name = w.get("name");
065        if (name == null || name.length() < 6)
066            return;
067
068        List<List<Way>> theCellWays = ValUtil.getWaysInCell(w, cellWays);
069        for (List<Way> ways : theCellWays) {
070            for (Way w2 : ways) {
071                if (errorWays.contains(w, w2) || errorWays.contains(w2, w)) {
072                    continue;
073                }
074
075                String name2 = w2.get("name");
076                if (name2 == null || name2.length() < 6) {
077                    continue;
078                }
079
080                int levenshteinDistance = getLevenshteinDistance(name, name2);
081                if (0 < levenshteinDistance && levenshteinDistance <= 2) {
082                    List<OsmPrimitive> primitives = new ArrayList<OsmPrimitive>(2);
083                    primitives.add(w);
084                    primitives.add(w2);
085                    errors.add(new TestError(this, Severity.WARNING, tr("Similarly named ways"), SIMILAR_NAMED, primitives));
086                    errorWays.put(w, w2);
087                }
088            }
089            ways.add(w);
090        }
091    }
092
093    /**
094     * Compute Levenshtein distance
095     *
096     * @param s First word
097     * @param t Second word
098     * @return The distance between words
099     */
100    public int getLevenshteinDistance(String s, String t) {
101        int[][] d; // matrix
102        int n; // length of s
103        int m; // length of t
104        int i; // iterates through s
105        int j; // iterates through t
106        char s_i; // ith character of s
107        char t_j; // jth character of t
108        int cost; // cost
109
110        // Step 1
111        n = s.length();
112        m = t.length();
113        if (n == 0)
114            return m;
115        if (m == 0)
116            return n;
117        d = new int[n + 1][m + 1];
118
119        // Step 2
120        for (i = 0; i <= n; i++) {
121            d[i][0] = i;
122        }
123        for (j = 0; j <= m; j++) {
124            d[0][j] = j;
125        }
126
127        // Step 3
128        for (i = 1; i <= n; i++) {
129
130            s_i = s.charAt(i - 1);
131
132            // Step 4
133            for (j = 1; j <= m; j++) {
134
135                t_j = t.charAt(j - 1);
136
137                // Step 5
138                if (s_i == t_j) {
139                    cost = 0;
140                } else {
141                    cost = 1;
142                }
143
144                // Step 6
145                d[i][j] = Utils.min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost);
146            }
147        }
148
149        // Step 7
150        return d[n][m];
151    }
152}