Skip to content

Commit 9fb099e

Browse files
committed
tag new version
1 parent acf6623 commit 9fb099e

File tree

4 files changed

+16
-20
lines changed

4 files changed

+16
-20
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "StringDistances"
22
uuid = "88034a9c-02f8-509d-84a9-84ec65e18404"
3-
version = "0.11.0"
3+
version = "0.11.1"
44

55
[deps]
66
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"

src/StringDistances.jl

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ module StringDistances
22

33
using Distances
44
import StatsAPI: pairwise, pairwise!
5+
# Distances API
56
abstract type StringSemiMetric <: SemiMetric end
67
abstract type StringMetric <: Metric end
7-
(dist::Union{StringSemiMetric, StringMetric})(s1, s2; max_dist = nothing) = dist(s1, s2)
8-
8+
const StringDistance = Union{StringSemiMetric, StringMetric}
99
function Distances.result_type(dist::Union{StringSemiMetric, StringMetric}, s1::Type, s2::Type)
1010
T = typeof(dist("", ""))
1111
if (Missing <: s1) | (Missing <: s2)
@@ -15,6 +15,9 @@ function Distances.result_type(dist::Union{StringSemiMetric, StringMetric}, s1::
1515
end
1616
Distances.result_type(dist::Union{StringSemiMetric, StringMetric}, s1, s2) = result_type(dist, typeof(s1), typeof(s2))
1717

18+
19+
20+
(dist::Union{StringSemiMetric, StringMetric})(s1, s2; max_dist = nothing) = dist(s1, s2)
1821
include("utils.jl")
1922
include("distances/edit.jl")
2023
include("distances/qgram.jl")
@@ -24,7 +27,6 @@ include("find.jl")
2427
include("fuzzywuzzy.jl")
2528

2629

27-
const StringDistance = Union{StringSemiMetric, StringMetric}
2830
##############################################################################
2931
##
3032
## Export

src/fuzzywuzzy.jl

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,15 @@ function (dist::Partial)(s1, s2; max_dist = nothing)
3434
return out
3535
end
3636

37+
# specialized (faster) version for RatcliffObershelp
3738
function (dist::Partial{<: Union{RatcliffObershelp, Normalized{RatcliffObershelp}}})(s1, s2; max_dist = nothing)
3839
(s1 === missing) | (s2 === missing) && return missing
3940
s1, s2 = reorder(s1, s2)
4041
len1, len2 = length(s1), length(s2)
4142
len1 == len2 && return dist.dist(s1, s2)
4243
out = 1.0
43-
for r in matching_blocks(s1, s2, 1, 1, len1, len2)
44+
for s2_start in matching_blocks(s1, s2, 1, 1, len1, len2)
4445
# Make sure the substring of s2 has length len1
45-
s2_start = r[2] - r[1] + 1
4646
if s2_start < 1
4747
s2_start = 1
4848
elseif s2_start + len1 - 1 > len2
@@ -56,20 +56,16 @@ function (dist::Partial{<: Union{RatcliffObershelp, Normalized{RatcliffObershelp
5656
end
5757

5858
function matching_blocks(s1, s2, start1::Integer, start2::Integer, end1::Integer, end2::Integer)
59-
x = Set{Tuple{Int, Int, Int}}()
59+
x = Set{Int}()
6060
p = zeros(Int, max(end1 - start1, end2 - start2) + 1)
6161
matching_blocks!(x, p, s1, s2, start1, start2, end1, end2)
6262
end
6363

64-
function matching_blocks!(x::Set{Tuple{Int, Int, Int}}, p::Vector{Int}, s1, s2, start1::Integer, start2::Integer, end1::Integer, end2::Integer)
64+
function matching_blocks!(x::Set{Int}, p::Vector{Int}, s1, s2, start1::Integer, start2::Integer, end1::Integer, end2::Integer)
6565
j1, j2, len = longest_common_pattern!(p, s1, s2, start1, start2, end1, end2)
66-
# exit if there is no common substring
6766
len == 0 && return x
68-
# add the info of the common to the existing set
69-
push!(x, (j1, j2, len))
70-
# add the longest common substring that happens before
67+
push!(x, j2 - j1 + 1)
7168
matching_blocks!(x, p, s1, s2, start1, start2, j1 - 1, j2 - 1)
72-
# add the longest common substring that happens after
7369
matching_blocks!(x, p, s1, s2, j1 + len, j2 + len, end1, end2)
7470
return x
7571
end
@@ -137,10 +133,9 @@ function (dist::TokenSet)(s1::Union{AbstractString, Missing}, s2::Union{Abstract
137133
s1 = join(v1, " ")
138134
s2 = join(v2, " ")
139135
isempty(s0) && return dist.dist(s1, s2; max_dist = max_dist)
140-
out_01 = dist.dist(s0, s1; max_dist = max_dist)
141-
out_02 = dist.dist(s0, s2; max_dist = max_dist)
142-
out_12 = dist.dist(s1, s2; max_dist = max_dist)
143-
min(out_01, out_02, out_12)
136+
min(dist.dist(s0, s1; max_dist = max_dist),
137+
dist.dist(s0, s2; max_dist = max_dist),
138+
dist.dist(s1, s2; max_dist = max_dist))
144139
end
145140

146141
Normalized(dist::TokenSet) = Normalized{typeof(TokenSet(Normalized(dist.dist)))}(TokenSet(Normalized(dist.dist)))

src/utils.jl

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,11 @@ string_with_length(s::AbstractString) = StringWithLength(s, length(s))
2626
# Not really needed but avoid multi-encapsulation
2727
string_with_length(s::StringWithLength) = s
2828
Base.length(s::StringWithLength) = s.l
29-
Base.iterate(s::StringWithLength, i::Integer = firstindex(s.s)) = iterate(s.s, i)
29+
Base.iterate(s::StringWithLength) = iterate(s.s)
30+
Base.iterate(s::StringWithLength, i::Integer) = iterate(s.s, i)
3031
Base.nextind(s::StringWithLength, i::Int, n::Int = 1) = nextind(s.s, i, n)
3132
Base.ncodeunits(s::StringWithLength) = ncodeunits(s.s)
3233
Base.isvalid(s::StringWithLength, i::Int) = isvalid(s.s, i)
33-
34-
3534
function reorder(s1::AbstractString, s2::AbstractString)
3635
s1 = string_with_length(s1)
3736
s2 = string_with_length(s2)

0 commit comments

Comments
 (0)