Wednesday, August 26, 2015

VBA Pattern Matching with the Levenshtein Algorithm

Public Sub match()
    'MsgBox ("asdfs")
    For i = 3 To 200
    
        If (Cells(i, 2).Value <> "") Then
        
            If (Trim(UCase(Cells(i, 2).Value)) = Trim(UCase(Cells(i, 3).Value))) _
            And (Trim(UCase(Cells(i, 4).Value)) = Trim(UCase(Cells(i, 5).Value))) _
            And (Trim(UCase(Cells(i, 6).Value)) = Trim(UCase(Cells(i, 7).Value))) _
            And (Trim(UCase(Cells(i, 8).Value)) = Trim(UCase(Cells(i, 9).Value))) _
            Then
                Cells(i, 13).Value = "Y"
            Else
                Cells(i, 13).Value = "N"
            End If
            Cells(i, 14).Value = Levenshtein(Cells(i, 2).Value, Cells(i, 3).Value)
            Cells(i, 15).Value = Levenshtein(Cells(i, 4).Value, Cells(i, 5).Value)
            Cells(i, 16).Value = Levenshtein(Cells(i, 6).Value, Cells(i, 7).Value)
            Cells(i, 17).Value = Levenshtein(Cells(i, 8).Value, Cells(i, 9).Value)
            Cells(i, 18).Value = Cells(i, 14).Value + Cells(i, 15).Value + Cells(i, 16).Value + Cells(i, 17).Value
        Else
            Cells(i, 13).Value = ""
            Cells(i, 14).Value = ""
            Cells(i, 15).Value = ""
            Cells(i, 16).Value = ""
            Cells(i, 17).Value = ""
            Cells(i, 18).Value = ""
        End If
        
        ' Write sql
        ' Good ones
         If ((Cells(i, 13).Value = "Y") Or (Cells(i, 18).Value < 5)) And (Cells(i, 2).Value <> "") Then
        ' Likely good
        ' If (Cells(i, 18).Value > 4) (Cells(i, 18).Value < 10) Then
            writeSQL (i)
        ' the names are an exact match but there is not city/state/zip information
        ' this is the case with a lot of the SMO_2015_Voting_Member_List ones
        ElseIf (UCase(Trim(Cells(i, 2).Value)) = UCase(Trim(Cells(i, 3).Value))) _
            And UCase(Trim((Cells(i, 4).Value)) = UCase(Trim(Cells(i, 5).Value))) _
            And UCase(Trim((Cells(i, 6).Value)) = "NULL") _
            And UCase(Trim((Cells(i, 8).Value)) = "NULL") _
            And UCase(Trim((Cells(i, 10).Value)) = "NULL") Then
            writeSQLNULL (i)
        Else
            Cells(i, 20).Value = ""
            Cells(i, 21).Value = ""
        End If
        
    Next i
    
End Sub


Function Levenshtein(ByVal string1 As String, ByVal string2 As String) As Long
    'https://en.wikipedia.org/wiki/Levenshtein_distance
    'http://stackoverflow.com/questions/4243036/levenshtein-distance-in-excel

    string1 = UCase(Trim(string1))
    string2 = UCase(Trim(string2))

    Dim i As Long, j As Long
    Dim string1_length As Long
    Dim string2_length As Long
    Dim distance() As Long
    
    string1_length = Len(string1)
    string2_length = Len(string2)
    ReDim distance(string1_length, string2_length)
    
    For i = 0 To string1_length
        distance(i, 0) = i
    Next
    
    For j = 0 To string2_length
        distance(0, j) = j
    Next
    
    For i = 1 To string1_length
        For j = 1 To string2_length
            If Asc(Mid$(string1, i, 1)) = Asc(Mid$(string2, j, 1)) Then
                distance(i, j) = distance(i - 1, j - 1)
            Else
                distance(i, j) = Application.WorksheetFunction.Min _
                (distance(i - 1, j) + 1, _
                 distance(i, j - 1) + 1, _
                 distance(i - 1, j - 1) + 1)
            End If
        Next
    Next
    
    Levenshtein = distance(string1_length, string2_length)
    
End Function

Function writeSQL(ByVal i As Integer)

    Dim customer As String, firstName As String, lastName As String, _
    city As String, state As String, zip As String, _
    _id As String, sqlSelect As String, sqlUpdate As String
        
    customer = RTrim(Cells(i, 1).Value)
    firstName = RTrim(Cells(i, 3).Value)
    lastName = RTrim(Cells(i, 5).Value)
    city = RTrim(Cells(i, 6).Value)
    state = RTrim(Cells(i, 8).Value)
    zip = RTrim(Cells(i, 10).Value)
    _id = RTrim(Right("0000000000000" & Cells(i, 12).Value, 12))
    
    sqlSelect = "-- SELECT * FROM ca WHERE ca.firstName = '" + _
    firstName + "' AND ca.lastName = '" + lastName + "' AND city = '" + city + _
    "' AND [state] = '" + state + "' AND zip = '" + zip + "'"

    sqlUpdate = "' -- UPDATE ca SET ca.Member = 40, ca. = '" + _id + _
    "' FROM ca WHERE ca.firstName = '" + firstName + "' AND " + _
    "ca.lastName = '" + lastName + "' AND city = '" + city + "' AND [state] = '" + state + _
    "' AND zip = '" + zip + "' AND customer = '" + customer + "'"

    If (firstName = "firstName") Then
        Cells(i, 20).Value = ""
        Cells(i, 21).Value = ""
        Rows(i).Interior.Color = 45535
    Else
        'Cells(i, 20).Value = sqlSelect
        Cells(i, 20).Value = ""
        Cells(i, 21).Value = sqlUpdate
        Rows(i).Interior.Color = 65535
    End If
    
End Function

Function writeSQLNULL(ByVal i As Integer)

    Dim customer As String, firstName As String, lastName As String, _
    city As String, state As String, zip As String, _
    ship_master_customer_id As String, sqlSelect As String, sqlUpdate As String
        
    customer = Cells(i, 1).Value
    firstName = Cells(i, 3).Value
    lastName = Cells(i, 5).Value
    city = Cells(i, 6).Value
    state = Cells(i, 8).Value
    zip = Cells(i, 10).Value
    _id = Right("0000000000000" & Cells(i, 12).Value, 12)
    
    sqlSelect = "-- SELECT * FROM  ca WHERE ca.firstName = '" + _
    firstName + "' AND ca.lastName = '" + lastName + "' AND city = '" + city + _
    "' AND [state] = '" + state + "' AND zip = '" + zip + "'"

    sqlUpdate = "' -- UPDATE ca SET ca.Member = 40, ca.= '" + _id + _
    "' FROM ca ca WHERE ca.firstName = '" + firstName + "' AND " + _
    "ca.lastName = '" + lastName + "' AND city IS NULL AND [state] IS NULL AND zip IS NULL AND customer = '" + customer + "'"

    If (firstName = "firstName") Then
        Cells(i, 20).Value = ""
        Cells(i, 21).Value = ""
        Rows(i).Interior.Color = 45535
    Else
        'Cells(i, 20).Value = sqlSelect
        Cells(i, 20).Value = ""
        Cells(i, 21).Value = sqlUpdate
        Rows(i).Interior.Color = 65535
    End If
    
End Function



No comments: