@@ -34,15 +34,15 @@ function (dist::Partial)(s1, s2; max_dist = nothing)
3434 return out
3535end
3636
37+ # specialized (faster) version for RatcliffObershelp
3738function (dist:: Partial{<: Union{RatcliffObershelp, Normalized{RatcliffObershelp}}} )(s1, s2; max_dist = nothing )
3839 (s1 === missing ) | (s2 === missing ) && return missing
3940 s1, s2 = reorder (s1, s2)
4041 len1, len2 = length (s1), length (s2)
4142 len1 == len2 && return dist. dist (s1, s2)
4243 out = 1.0
43- for r in matching_blocks (s1, s2, 1 , 1 , len1, len2)
44+ for s2_start in matching_blocks (s1, s2, 1 , 1 , len1, len2)
4445 # Make sure the substring of s2 has length len1
45- s2_start = r[2 ] - r[1 ] + 1
4646 if s2_start < 1
4747 s2_start = 1
4848 elseif s2_start + len1 - 1 > len2
@@ -56,20 +56,16 @@ function (dist::Partial{<: Union{RatcliffObershelp, Normalized{RatcliffObershelp
5656end
5757
5858function matching_blocks (s1, s2, start1:: Integer , start2:: Integer , end1:: Integer , end2:: Integer )
59- x = Set {Tuple{ Int, Int, Int} } ()
59+ x = Set {Int} ()
6060 p = zeros (Int, max (end1 - start1, end2 - start2) + 1 )
6161 matching_blocks! (x, p, s1, s2, start1, start2, end1, end2)
6262end
6363
64- function matching_blocks! (x:: Set{Tuple{ Int, Int, Int} } , p:: Vector{Int} , s1, s2, start1:: Integer , start2:: Integer , end1:: Integer , end2:: Integer )
64+ function matching_blocks! (x:: Set{Int} , p:: Vector{Int} , s1, s2, start1:: Integer , start2:: Integer , end1:: Integer , end2:: Integer )
6565 j1, j2, len = longest_common_pattern! (p, s1, s2, start1, start2, end1, end2)
66- # exit if there is no common substring
6766 len == 0 && return x
68- # add the info of the common to the existing set
69- push! (x, (j1, j2, len))
70- # add the longest common substring that happens before
67+ push! (x, j2 - j1 + 1 )
7168 matching_blocks! (x, p, s1, s2, start1, start2, j1 - 1 , j2 - 1 )
72- # add the longest common substring that happens after
7369 matching_blocks! (x, p, s1, s2, j1 + len, j2 + len, end1, end2)
7470 return x
7571end
@@ -137,10 +133,9 @@ function (dist::TokenSet)(s1::Union{AbstractString, Missing}, s2::Union{Abstract
137133 s1 = join (v1, " " )
138134 s2 = join (v2, " " )
139135 isempty (s0) && return dist. dist (s1, s2; max_dist = max_dist)
140- out_01 = dist. dist (s0, s1; max_dist = max_dist)
141- out_02 = dist. dist (s0, s2; max_dist = max_dist)
142- out_12 = dist. dist (s1, s2; max_dist = max_dist)
143- min (out_01, out_02, out_12)
136+ min (dist. dist (s0, s1; max_dist = max_dist),
137+ dist. dist (s0, s2; max_dist = max_dist),
138+ dist. dist (s1, s2; max_dist = max_dist))
144139end
145140
146141Normalized (dist:: TokenSet ) = Normalized {typeof(TokenSet(Normalized(dist.dist)))} (TokenSet (Normalized (dist. dist)))
0 commit comments