Results 1 to 5 of 5
  1. #1
    Join Date
    May 2005
    Posts
    1,191

    Unanswered: Keyword List from Memo Field in Table

    I figured this may be useful to someone else out there. I'm sure someone else has run into this before, and probably figured out a better way to do it, but I couldn't find anything online, so here's my quick and dirty approach:

    Basically what I'm after is how do I pick out just keywords from a large set of data without having to manually read through every field (imagine a user-generated description memo field or something, and many, many rows of them). I came up with the following that generates a table that can then be read either by a listbox, report, or what-have-you.

    DISCLAIMER: I'm sure this is not the most efficient way it can be done, what with the lookups and SQL inserts and updates, so I would welcome any helpful suggestions for how to speed up the process. Also, there may not be adequate error handling for your particular application; I leave that up to you should you choose to use this code. Use at your own discretion, it's your responsibility not mine

    The whole thing was too long to fit in one post, so see follow-up posts for all the instructions and code.
    Me.Geek = True

  2. #2
    Join Date
    May 2005
    Posts
    1,191
    The code to generate the table:
    Code:
    Public Function GenerateKeywordTable(ByVal strDescr As String, _
                                            Optional strTableName As String, _
                                            Optional intMinWordLength As Integer, _
                                            Optional intMinOccurences As Integer) As Boolean
        
        'generates a temp table with all keywords and the number of times they appear in a passed string
        
        On Error GoTo ErrorHandler
        
        GenerateKeywordTable = False
        
        'instantiate
        Dim i As Long, _
            iCnt As Long
        Dim strSQL As String, _
            strWord As String
        Dim varWords As Variant
        Dim rsStopWords As dao.Recordset
        
        'error handling
        If Not Len(strDescr) > 0 Then GoTo ExitProc
        
        'defaults
        If Not Len(strTableName) > 0 Then strTableName = "tblTempKeywords"
        If Not intMinWordLength > 0 Then intMinWordLength = 3
        If Not intMinOccurences > 0 Then intMinOccurences = 2
        
        'sanitize string
        SysCmd acSysCmdSetStatus, "Sanatizing Data String..."
        strDescr = UCase(strDescr)
        AlphaNumericString strDescr
        strDescr = Replace(strDescr, chr(10), "")
        strDescr = Replace(strDescr, chr(13), "")
        While InStr(strDescr, "  ") > 1
            strDescr = Replace(strDescr, "  ", " ")
        Wend
        
        'analyze string
        varWords = Split(strDescr)
        
        'clear temp table if exists
        SysCmd acSysCmdSetStatus, "Clearing Word Table..."
        If ObjectExists(acTable, strTableName) Then DoCmd.DeleteObject acTable, strTableName
        
        'build temp table
        SysCmd acSysCmdInitMeter, "Building Word Table...", UBound(varWords)
        strSQL = ""
        'make table
        strWord = varWords(0)
        iCnt = CountOccurrences(strDescr, strWord)
        strSQL = "SELECT " & _
                        "('" & strWord & "') AS [fldWord], " & _
                        "(" & iCnt & ") AS [fldCount] " & _
                        vbNewLine & _
                    "INTO [" & strTableName & "];"
        CurrentDb.Execute strSQL
        'cycle through words array and add to table
        For i = 1 To UBound(varWords)
            'update status
            SysCmd acSysCmdUpdateMeter, i
            'make sure word is longer than minimum
            strWord = varWords(i)
            If Len(strWord) >= intMinWordLength Then
                iCnt = Nz(ELookup("fldCount", strTableName, "(fldWord) = '" & strWord & "'"), 0)
                'create if unique
                If iCnt = 0 Then
                    iCnt = 1
                    strSQL = "INSERT INTO [" & strTableName & "] " & _
                                "(fldWord, fldCount) " & _
                                "VALUES " & _
                                "('" & strWord & "', " & iCnt & ");"
                    CurrentDb.Execute strSQL
                'add if duplicate
                ElseIf iCnt > 0 Then
                    iCnt = iCnt + 1
                    strSQL = "UPDATE [" & strTableName & "] " & _
                                "SET [" & strTableName & "].fldCount = " & iCnt & " " & _
                                "WHERE [" & strTableName & "].fldWord = '" & strWord & "';"
                    CurrentDb.Execute strSQL
                End If
            End If
        Next i
        
        'clear stop words
        SysCmd acSysCmdSetStatus, "Clearing Stop Words..."
        strSQL = "SELECT tblStopWords.[fldStopWord] " & _
                    "FROM tblStopWords " & _
                    "ORDER BY tblStopWords.[fldStopWord];"
        Set rsStopWords = CurrentDb.OpenRecordset(strSQL, dbOpenSnapshot)
        If rsStopWords.RecordCount > 0 Then rsStopWords.MoveLast
        SysCmd acSysCmdInitMeter, "Clearing Stop Words...", rsStopWords.RecordCount
        If Not rsStopWords.BOF Then rsStopWords.MoveFirst
        i = 0
        Do While Not rsStopWords.EOF
            'update status
            SysCmd acSysCmdUpdateMeter, i
            
            'remove table entry
            'rsStopWords.Fields(0)
            strSQL = "DELETE [" & strTableName & "].* " & _
                        "FROM [" & strTableName & "] " & _
                        "WHERE [" & strTableName & "].fldWord = '" & rsStopWords.Fields(0) & "';"
            CurrentDb.Execute strSQL
            
            'increment
            rsStopWords.MoveNext
            i = i + 1
        Loop
        rsStopWords.Close
        Set rsStopWords = Nothing
        
        'clear too few count words
        SysCmd acSysCmdSetStatus, "Clearing small words..."
        strSQL = "DELETE [" & strTableName & "].* " & _
                    "FROM [" & strTableName & "] " & _
                    "WHERE NOT Len([" & strTableName & "].fldWord) > 0;"
        CurrentDb.Execute strSQL
        strSQL = "DELETE [" & strTableName & "].* " & _
                    "FROM [" & strTableName & "] " & _
                    "WHERE [" & strTableName & "].fldCount < " & intMinOccurences & ";"
        CurrentDb.Execute strSQL
        
        'clear out
        GenerateKeywordTable = True
    
    ExitProc:
        SysCmd acSysCmdSetStatus, " "
        Exit Function
        
    ErrorHandler:
        On Error Resume Next
        GenerateKeywordTable = False
        GoTo ExitProc
        
    End Function
    
    
    Public Function AlphaNumericString(ByRef strInput As String) As Boolean
        
        'strips all but alpha-numeric, space, underscore and cr-lf characters from a string
        'pass string by reference
        
        On Error GoTo ErrorHandler
        
        AlphaNumericString = False
        
        Dim i As Integer
        
        For i = 0 To 255
            Select Case i
                Case 10, 13, 32, 48 To 57, 65 To 90, 95, 97 To 122
                    'leave alone
                Case Else
                    strInput = Replace(strInput, chr(i), "")
            End Select
        Next i
        
        AlphaNumericString = True
        
    ExitProc:
        Exit Function
        
    ErrorHandler:
        On Error Resume Next
        AlphaNumericString = False
        GoTo ExitProc
        
    End Function
    
    
    
    
    Public Function ELookup(Expr As String, _
                            Domain As String, _
                            Optional Criteria As Variant, _
                            Optional OrderClause As Variant, _
                            Optional db As dao.Database, _
                            Optional strcSep As String = ",") As Variant
        
        On Error GoTo ErrorHandler
        
        ' http://allenbrowne.com/ser-42.html
        
        'Purpose:   Faster and more flexible replacement for DLookup()
        'Arguments: Same as DLookup, with additional Order By option.
        'Return:    Value of the Expr if found, else Null.
        '           Delimited list for multi-value field.
        'Author:    Allen Browne. allen@allenbrowne.com
        'Updated:   December 2006, to handle multi-value fields (Access 2007.)
        'Examples:
        '           1. To find the last value, include DESC in the OrderClause, e.g.:
        '               ELookup("[Surname] & [FirstName]", "tblClient", , "ClientID DESC")
        '           2. To find the lowest non-null value of a field, use the Criteria, e.g.:
        '               ELookup("ClientID", "tblClient", "Surname Is Not Null" , "Surname")
        'Note:      Requires a reference to the DAO library.
        
        Dim rs As dao.Recordset         'To retrieve the value to find.
        Dim rsMVF As dao.Recordset      'Child recordset to use for multi-value fields.
        Dim varResult As Variant        'Return value for function.
        Dim strSQL As String            'SQL statement.
        Dim strOut As String            'Output string to build up (multi-value field.)
        Dim lngLen As Long              'Length of string.
        
        'Initialize to null.
        varResult = Null
        
        'Build the SQL string.
        strSQL = "SELECT TOP 1 " & Expr & " FROM " & Domain
        If Not IsMissing(Criteria) Then
            strSQL = strSQL & " WHERE " & Criteria
        End If
        If Not IsMissing(OrderClause) Then
            strSQL = strSQL & " ORDER BY " & OrderClause
        End If
        strSQL = strSQL & ";"
        
        'Lookup the value.
        If (db Is Nothing) Then Set db = DBEngine(0)(0)
        Set rs = db.OpenRecordset(strSQL, dbOpenForwardOnly)
        If rs.RecordCount > 0 Then
            'Will be an object if multi-value field.
            If VarType(rs(0)) = vbObject Then
                Set rsMVF = rs(0).Value
                Do While Not rsMVF.EOF
                    If rs(0).Type = 101 Then        'dbAttachment
                        strOut = strOut & rsMVF!FileName & strcSep
                    Else
                        strOut = strOut & rsMVF![Value].Value & strcSep
                    End If
                    rsMVF.MoveNext
                Loop
                'Remove trailing separator.
                lngLen = Len(strOut) - Len(strcSep)
                If lngLen > 0& Then
                    varResult = Left(strOut, lngLen)
                End If
                Set rsMVF = Nothing
            Else
                'Not a multi-value field: just return the value.
                varResult = rs(0)
            End If
        End If
        rs.Close
        
    ExitProc:
        ELookup = varResult
        Set rs = Nothing
        Set db = Nothing
        Exit Function
        
    ErrorHandler:
        On Error Resume Next
        varResult = Null
        Resume ExitProc
        
    End Function
    Me.Geek = True

  3. #3
    Join Date
    May 2005
    Posts
    1,191
    Create a Stop Words table named 'tblStopWords' with a single string field called 'fldStopWord' and insert the following (modify at your discretion):
    Code:
    a
    about
    above
    across
    after
    afterwards
    again
    against
    all
    almost
    alone
    along
    already
    also
    although
    always
    am
    among
    amongst
    amoungst
    amount
    an
    and
    another
    any
    anyhow
    anyone
    anything
    anyway
    anywhere
    are
    around
    as
    at
    back
    be
    became
    because
    become
    becomes
    becoming
    been
    before
    beforehand
    behind
    being
    below
    beside
    besides
    between
    beyond
    both
    but
    by
    call
    can
    cannot
    cant
    co
    con
    could
    couldnt
    cry
    de
    describe
    detail
    do
    done
    down
    due
    during
    each
    eg
    eight
    either
    eleven
    else
    elsewhere
    enough
    etc
    even
    ever
    every
    everyone
    everything
    everywhere
    except
    few
    fifteen
    fify
    find
    fire
    first
    five
    for
    former
    formerly
    forty
    found
    four
    from
    further
    get
    give
    go
    had
    has
    hasnt
    have
    he
    hence
    her
    here
    hereafter
    hereby
    herein
    hereupon
    hers
    herself
    him
    himself
    his
    how
    however
    hundred
    i
    ie
    if
    in
    inc
    indeed
    interest
    into
    is
    it
    its
    itself
    keep
    last
    latter
    least
    less
    ltd
    made
    many
    may
    me
    meanwhile
    might
    mill
    mine
    more
    moreover
    most
    mostly
    move
    much
    must
    my
    myself
    name
    namely
    neither
    never
    nevertheless
    next
    nine
    no
    nobody
    none
    noone
    nor
    nothing
    now
    nowhere
    of
    off
    often
    on
    once
    one
    only
    onto
    or
    other
    others
    otherwise
    our
    ours
    ourselves
    out
    over
    own
    part
    per
    perhaps
    please
    put
    rather
    re
    same
    see
    seem
    seemed
    seeming
    seems
    serious
    several
    she
    should
    show
    side
    since
    sincere
    six
    sixty
    so
    some
    somehow
    someone
    something
    sometime
    sometimes
    somewhere
    still
    such
    system
    take
    ten
    than
    that
    the
    their
    them
    themselves
    then
    thence
    there
    thereafter
    thereby
    therefore
    therein
    thereupon
    these
    they
    third
    this
    those
    though
    three
    through
    throughout
    thru
    thus
    to
    together
    too
    top
    toward
    towards
    twelve
    twenty
    two
    un
    under
    until
    up
    upon
    us
    very
    via
    was
    we
    well
    were
    what
    whatever
    when
    whence
    whenever
    where
    whereafter
    whereas
    whereby
    wherein
    whereupon
    wherever
    whether
    which
    while
    whither
    who
    whoever
    whole
    whom
    whose
    why
    will
    with
    within
    without
    would
    yet
    you
    your
    yours
    yourself
    yourselve
    Last edited by nckdryr; 02-14-12 at 12:32. Reason: Added hyperlink to Wiki article on Stop Words
    Me.Geek = True

  4. #4
    Join Date
    May 2005
    Posts
    1,191
    I call it like this (note, change the sql as needed for your application):
    Code:
    Private Sub cmdResultsWordList_Click()
        
        On Error GoTo ErrorHandler
        
        DoCmd.Hourglass True
        
        'instantiate
        Dim i As Long, _
            iWordMinLength As Integer
        Dim strDescr As String, _
            strFileText As String, _
            strSQL As String, _
            strCriteria As String, _
            strTableName As String
        Dim rs As dao.Recordset
        
        'initiate
        strDescr = ""
        strFileText = ""
        strSQL = ""
        strTableName = "tblTempKeywords"
        iWordMinLength = 3
        
        'get data
        SysCmd acSysCmdSetStatus, "Preparing Data..."
        'ADJUST THE FOLLOWING SQL AS NEEDED!!!!!
        strCriteria  = ""
        strSQL = strSQL & "SELECT [tblName].[fldName] " & vbNewLine
        strSQL = strSQL & "FROM [tblName] " & vbNewLine
        strSQL = strSQL & "WHERE " & strCriteria & ";"
        Set rs = CurrentDb.OpenRecordset(strSQL, dbOpenSnapshot)
        If rs.RecordCount > 0 Then rs.MoveLast
        
        'build description string
        SysCmd acSysCmdInitMeter, "Concatenating Data String...", rs.RecordCount
        i = 0
        If Not rs.BOF Then rs.MoveFirst
        Do While Not rs.EOF
            'update status
            SysCmd acSysCmdUpdateMeter, i
            
            'build string
            strDescr = strDescr & Nz(rs.Fields(0), "") & vbNewLine
            
            'increment
            rs.MoveNext
            i = i + 1
        Loop
        rs.Close
        Set rs = Nothing
        
        'build table
        GenerateKeywordTable strDescr, strTableName, iWordMinLength
        
        'do whatever you want to with the table here
        
    ExitSub:
        SysCmd acSysCmdSetStatus, " "
        DoCmd.SetWarnings True
        DoCmd.Echo True
        DoCmd.Hourglass False
        Exit Sub
        
    ErrorHandler:
        On Error Resume Next
        GoTo ExitSub
        
    End Sub
    Me.Geek = True

  5. #5
    Join Date
    May 2005
    Posts
    1,191
    One more bit of code I forgot in the second post:
    Code:
    Public Function ObjectExists(intObjectType As AcObjectType, _
                                    strObjectName As String, _
                                    Optional ByVal db As dao.Database) As Boolean
        ' Pass the Object type: Table, Query, Form, Report, Macro, or Module
        ' Pass the Object Name
        
        On Error GoTo ErrorHandler
        ObjectExists = False
        
        Dim tbl As TableDef
        Dim qry As QueryDef
        Dim i As Integer
        
        'assign default db if null
        If (db Is Nothing) Then Set db = CurrentDb
        
        Select Case intObjectType
            Case acTable
                For Each tbl In db.TableDefs
                    If tbl.Name = strObjectName Then
                        ObjectExists = True
                        Exit For
                    End If
                Next tbl
            Case acQuery
                For Each qry In db.QueryDefs
                    If qry.Name = strObjectName Then
                        ObjectExists = True
                        Exit For
                    End If
                Next qry
            Case acForm, acReport, acModule
                For i = 0 To db.Containers(intObjectType & "s").Documents.Count - 1
                    If db.Containers(intObjectType & "s").Documents(i).Name = strObjectName Then
                        ObjectExists = True
                        Exit For
                    End If
                Next i
            Case acMacro
                For i = 0 To db.Containers("Scripts").Documents.Count - 1
                    If db.Containers("Scripts").Documents(i).Name = strObjectName Then
                        ObjectExists = True
                        Exit For
                    End If
                Next i
            Case Else
                MsgBox WrapText("Invalid Object Type passed, must be Table, " & _
                                "Query, Form, Report, Macro, or Module."), _
                        vbCritical
        End Select
        
    ExitProc:
        Exit Function
        
    ErrorHandler:
        ObjectExists = False
        GoTo ExitProc
        
    End Function
    
    
    
    Public Function WrapText(strOriginalText As String, Optional intMaxLineLength As Integer) As String
        
        ' This function wraps a text string so that a
        ' specified amount of characters are on each line,
        ' taking as many lines as necessary.1
        
        ' intMaxLineLength needs to be a minimum of 2 or
        ' the function will force it's own default value
        
        On Error GoTo ErrorHandler
        
        Dim i As Integer
        If Nz(intMaxLineLength, 0) < 2 Then intMaxLineLength = 64 'default value
        WrapText = ""
        While Len(strOriginalText) > intMaxLineLength
            'Find last space in first intMax characters
            For i = intMaxLineLength To 1 Step -1
                If Nz(Mid(strOriginalText, i, 1), "") = " " Then Exit For
            Next i
            'split strings
            If i = 0 Then 'no space within limits
                WrapText = WrapText & Trim(Left(strOriginalText, intMaxLineLength)) & vbNewLine
                strOriginalText = Trim(Right(strOriginalText, Len(strOriginalText) - intMaxLineLength))
            Else
                WrapText = WrapText & Trim(Left(strOriginalText, i)) & vbNewLine
                strOriginalText = Trim(Right(strOriginalText, Len(strOriginalText) - i))
            End If
        Wend
        WrapText = WrapText & strOriginalText
        
        Exit Function
        
    ErrorHandler:
            WrapText = "ERROR"
            Exit Function
        
    End Function
    Me.Geek = True

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •