If this is your first visit, be sure to check out the FAQ by clicking the link above. You may have to register before you can post: click the register link above to proceed. To start viewing messages, select the forum that you want to visit from the selection below.

 
Go Back  dBforums > PC based Database Applications > Microsoft Access > Keyword List from Memo Field in Table

Reply
 
LinkBack Thread Tools Search this Thread Display Modes
  #1 (permalink)  
Old 02-07-12, 20:40
nckdryr nckdryr is offline
Computer Monkey
 
Join Date: May 2005
Posts: 1,191
Keyword List from Memo Field in Table

I figured this may be useful to someone else out there. I'm sure someone else has run into this before, and probably figured out a better way to do it, but I couldn't find anything online, so here's my quick and dirty approach:

Basically what I'm after is how do I pick out just keywords from a large set of data without having to manually read through every field (imagine a user-generated description memo field or something, and many, many rows of them). I came up with the following that generates a table that can then be read either by a listbox, report, or what-have-you.

DISCLAIMER: I'm sure this is not the most efficient way it can be done, what with the lookups and SQL inserts and updates, so I would welcome any helpful suggestions for how to speed up the process. Also, there may not be adequate error handling for your particular application; I leave that up to you should you choose to use this code. Use at your own discretion, it's your responsibility not mine

The whole thing was too long to fit in one post, so see follow-up posts for all the instructions and code.
__________________
Me.Geek = True
Reply With Quote
  #2 (permalink)  
Old 02-07-12, 20:40
nckdryr nckdryr is offline
Computer Monkey
 
Join Date: May 2005
Posts: 1,191
The code to generate the table:
Code:
Public Function GenerateKeywordTable(ByVal strDescr As String, _
                                        Optional strTableName As String, _
                                        Optional intMinWordLength As Integer, _
                                        Optional intMinOccurences As Integer) As Boolean
    
    'generates a temp table with all keywords and the number of times they appear in a passed string
    
    On Error GoTo ErrorHandler
    
    GenerateKeywordTable = False
    
    'instantiate
    Dim i As Long, _
        iCnt As Long
    Dim strSQL As String, _
        strWord As String
    Dim varWords As Variant
    Dim rsStopWords As dao.Recordset
    
    'error handling
    If Not Len(strDescr) > 0 Then GoTo ExitProc
    
    'defaults
    If Not Len(strTableName) > 0 Then strTableName = "tblTempKeywords"
    If Not intMinWordLength > 0 Then intMinWordLength = 3
    If Not intMinOccurences > 0 Then intMinOccurences = 2
    
    'sanitize string
    SysCmd acSysCmdSetStatus, "Sanatizing Data String..."
    strDescr = UCase(strDescr)
    AlphaNumericString strDescr
    strDescr = Replace(strDescr, chr(10), "")
    strDescr = Replace(strDescr, chr(13), "")
    While InStr(strDescr, "  ") > 1
        strDescr = Replace(strDescr, "  ", " ")
    Wend
    
    'analyze string
    varWords = Split(strDescr)
    
    'clear temp table if exists
    SysCmd acSysCmdSetStatus, "Clearing Word Table..."
    If ObjectExists(acTable, strTableName) Then DoCmd.DeleteObject acTable, strTableName
    
    'build temp table
    SysCmd acSysCmdInitMeter, "Building Word Table...", UBound(varWords)
    strSQL = ""
    'make table
    strWord = varWords(0)
    iCnt = CountOccurrences(strDescr, strWord)
    strSQL = "SELECT " & _
                    "('" & strWord & "') AS [fldWord], " & _
                    "(" & iCnt & ") AS [fldCount] " & _
                    vbNewLine & _
                "INTO [" & strTableName & "];"
    CurrentDb.Execute strSQL
    'cycle through words array and add to table
    For i = 1 To UBound(varWords)
        'update status
        SysCmd acSysCmdUpdateMeter, i
        'make sure word is longer than minimum
        strWord = varWords(i)
        If Len(strWord) >= intMinWordLength Then
            iCnt = Nz(ELookup("fldCount", strTableName, "(fldWord) = '" & strWord & "'"), 0)
            'create if unique
            If iCnt = 0 Then
                iCnt = 1
                strSQL = "INSERT INTO [" & strTableName & "] " & _
                            "(fldWord, fldCount) " & _
                            "VALUES " & _
                            "('" & strWord & "', " & iCnt & ");"
                CurrentDb.Execute strSQL
            'add if duplicate
            ElseIf iCnt > 0 Then
                iCnt = iCnt + 1
                strSQL = "UPDATE [" & strTableName & "] " & _
                            "SET [" & strTableName & "].fldCount = " & iCnt & " " & _
                            "WHERE [" & strTableName & "].fldWord = '" & strWord & "';"
                CurrentDb.Execute strSQL
            End If
        End If
    Next i
    
    'clear stop words
    SysCmd acSysCmdSetStatus, "Clearing Stop Words..."
    strSQL = "SELECT tblStopWords.[fldStopWord] " & _
                "FROM tblStopWords " & _
                "ORDER BY tblStopWords.[fldStopWord];"
    Set rsStopWords = CurrentDb.OpenRecordset(strSQL, dbOpenSnapshot)
    If rsStopWords.RecordCount > 0 Then rsStopWords.MoveLast
    SysCmd acSysCmdInitMeter, "Clearing Stop Words...", rsStopWords.RecordCount
    If Not rsStopWords.BOF Then rsStopWords.MoveFirst
    i = 0
    Do While Not rsStopWords.EOF
        'update status
        SysCmd acSysCmdUpdateMeter, i
        
        'remove table entry
        'rsStopWords.Fields(0)
        strSQL = "DELETE [" & strTableName & "].* " & _
                    "FROM [" & strTableName & "] " & _
                    "WHERE [" & strTableName & "].fldWord = '" & rsStopWords.Fields(0) & "';"
        CurrentDb.Execute strSQL
        
        'increment
        rsStopWords.MoveNext
        i = i + 1
    Loop
    rsStopWords.Close
    Set rsStopWords = Nothing
    
    'clear too few count words
    SysCmd acSysCmdSetStatus, "Clearing small words..."
    strSQL = "DELETE [" & strTableName & "].* " & _
                "FROM [" & strTableName & "] " & _
                "WHERE NOT Len([" & strTableName & "].fldWord) > 0;"
    CurrentDb.Execute strSQL
    strSQL = "DELETE [" & strTableName & "].* " & _
                "FROM [" & strTableName & "] " & _
                "WHERE [" & strTableName & "].fldCount < " & intMinOccurences & ";"
    CurrentDb.Execute strSQL
    
    'clear out
    GenerateKeywordTable = True

ExitProc:
    SysCmd acSysCmdSetStatus, " "
    Exit Function
    
ErrorHandler:
    On Error Resume Next
    GenerateKeywordTable = False
    GoTo ExitProc
    
End Function


Public Function AlphaNumericString(ByRef strInput As String) As Boolean
    
    'strips all but alpha-numeric, space, underscore and cr-lf characters from a string
    'pass string by reference
    
    On Error GoTo ErrorHandler
    
    AlphaNumericString = False
    
    Dim i As Integer
    
    For i = 0 To 255
        Select Case i
            Case 10, 13, 32, 48 To 57, 65 To 90, 95, 97 To 122
                'leave alone
            Case Else
                strInput = Replace(strInput, chr(i), "")
        End Select
    Next i
    
    AlphaNumericString = True
    
ExitProc:
    Exit Function
    
ErrorHandler:
    On Error Resume Next
    AlphaNumericString = False
    GoTo ExitProc
    
End Function




Public Function ELookup(Expr As String, _
                        Domain As String, _
                        Optional Criteria As Variant, _
                        Optional OrderClause As Variant, _
                        Optional db As dao.Database, _
                        Optional strcSep As String = ",") As Variant
    
    On Error GoTo ErrorHandler
    
    ' http://allenbrowne.com/ser-42.html
    
    'Purpose:   Faster and more flexible replacement for DLookup()
    'Arguments: Same as DLookup, with additional Order By option.
    'Return:    Value of the Expr if found, else Null.
    '           Delimited list for multi-value field.
    'Author:    Allen Browne. allen@allenbrowne.com
    'Updated:   December 2006, to handle multi-value fields (Access 2007.)
    'Examples:
    '           1. To find the last value, include DESC in the OrderClause, e.g.:
    '               ELookup("[Surname] & [FirstName]", "tblClient", , "ClientID DESC")
    '           2. To find the lowest non-null value of a field, use the Criteria, e.g.:
    '               ELookup("ClientID", "tblClient", "Surname Is Not Null" , "Surname")
    'Note:      Requires a reference to the DAO library.
    
    Dim rs As dao.Recordset         'To retrieve the value to find.
    Dim rsMVF As dao.Recordset      'Child recordset to use for multi-value fields.
    Dim varResult As Variant        'Return value for function.
    Dim strSQL As String            'SQL statement.
    Dim strOut As String            'Output string to build up (multi-value field.)
    Dim lngLen As Long              'Length of string.
    
    'Initialize to null.
    varResult = Null
    
    'Build the SQL string.
    strSQL = "SELECT TOP 1 " & Expr & " FROM " & Domain
    If Not IsMissing(Criteria) Then
        strSQL = strSQL & " WHERE " & Criteria
    End If
    If Not IsMissing(OrderClause) Then
        strSQL = strSQL & " ORDER BY " & OrderClause
    End If
    strSQL = strSQL & ";"
    
    'Lookup the value.
    If (db Is Nothing) Then Set db = DBEngine(0)(0)
    Set rs = db.OpenRecordset(strSQL, dbOpenForwardOnly)
    If rs.RecordCount > 0 Then
        'Will be an object if multi-value field.
        If VarType(rs(0)) = vbObject Then
            Set rsMVF = rs(0).Value
            Do While Not rsMVF.EOF
                If rs(0).Type = 101 Then        'dbAttachment
                    strOut = strOut & rsMVF!FileName & strcSep
                Else
                    strOut = strOut & rsMVF![Value].Value & strcSep
                End If
                rsMVF.MoveNext
            Loop
            'Remove trailing separator.
            lngLen = Len(strOut) - Len(strcSep)
            If lngLen > 0& Then
                varResult = Left(strOut, lngLen)
            End If
            Set rsMVF = Nothing
        Else
            'Not a multi-value field: just return the value.
            varResult = rs(0)
        End If
    End If
    rs.Close
    
ExitProc:
    ELookup = varResult
    Set rs = Nothing
    Set db = Nothing
    Exit Function
    
ErrorHandler:
    On Error Resume Next
    varResult = Null
    Resume ExitProc
    
End Function
__________________
Me.Geek = True
Reply With Quote
  #3 (permalink)  
Old 02-07-12, 20:41
nckdryr nckdryr is offline
Computer Monkey
 
Join Date: May 2005
Posts: 1,191
Create a Stop Words table named 'tblStopWords' with a single string field called 'fldStopWord' and insert the following (modify at your discretion):
Code:
a
about
above
across
after
afterwards
again
against
all
almost
alone
along
already
also
although
always
am
among
amongst
amoungst
amount
an
and
another
any
anyhow
anyone
anything
anyway
anywhere
are
around
as
at
back
be
became
because
become
becomes
becoming
been
before
beforehand
behind
being
below
beside
besides
between
beyond
both
but
by
call
can
cannot
cant
co
con
could
couldnt
cry
de
describe
detail
do
done
down
due
during
each
eg
eight
either
eleven
else
elsewhere
enough
etc
even
ever
every
everyone
everything
everywhere
except
few
fifteen
fify
find
fire
first
five
for
former
formerly
forty
found
four
from
further
get
give
go
had
has
hasnt
have
he
hence
her
here
hereafter
hereby
herein
hereupon
hers
herself
him
himself
his
how
however
hundred
i
ie
if
in
inc
indeed
interest
into
is
it
its
itself
keep
last
latter
least
less
ltd
made
many
may
me
meanwhile
might
mill
mine
more
moreover
most
mostly
move
much
must
my
myself
name
namely
neither
never
nevertheless
next
nine
no
nobody
none
noone
nor
nothing
now
nowhere
of
off
often
on
once
one
only
onto
or
other
others
otherwise
our
ours
ourselves
out
over
own
part
per
perhaps
please
put
rather
re
same
see
seem
seemed
seeming
seems
serious
several
she
should
show
side
since
sincere
six
sixty
so
some
somehow
someone
something
sometime
sometimes
somewhere
still
such
system
take
ten
than
that
the
their
them
themselves
then
thence
there
thereafter
thereby
therefore
therein
thereupon
these
they
third
this
those
though
three
through
throughout
thru
thus
to
together
too
top
toward
towards
twelve
twenty
two
un
under
until
up
upon
us
very
via
was
we
well
were
what
whatever
when
whence
whenever
where
whereafter
whereas
whereby
wherein
whereupon
wherever
whether
which
while
whither
who
whoever
whole
whom
whose
why
will
with
within
without
would
yet
you
your
yours
yourself
yourselve
__________________
Me.Geek = True

Last edited by nckdryr; 02-14-12 at 11:32. Reason: Added hyperlink to Wiki article on Stop Words
Reply With Quote
  #4 (permalink)  
Old 02-07-12, 20:42
nckdryr nckdryr is offline
Computer Monkey
 
Join Date: May 2005
Posts: 1,191
I call it like this (note, change the sql as needed for your application):
Code:
Private Sub cmdResultsWordList_Click()
    
    On Error GoTo ErrorHandler
    
    DoCmd.Hourglass True
    
    'instantiate
    Dim i As Long, _
        iWordMinLength As Integer
    Dim strDescr As String, _
        strFileText As String, _
        strSQL As String, _
        strCriteria As String, _
        strTableName As String
    Dim rs As dao.Recordset
    
    'initiate
    strDescr = ""
    strFileText = ""
    strSQL = ""
    strTableName = "tblTempKeywords"
    iWordMinLength = 3
    
    'get data
    SysCmd acSysCmdSetStatus, "Preparing Data..."
    'ADJUST THE FOLLOWING SQL AS NEEDED!!!!!
    strCriteria  = ""
    strSQL = strSQL & "SELECT [tblName].[fldName] " & vbNewLine
    strSQL = strSQL & "FROM [tblName] " & vbNewLine
    strSQL = strSQL & "WHERE " & strCriteria & ";"
    Set rs = CurrentDb.OpenRecordset(strSQL, dbOpenSnapshot)
    If rs.RecordCount > 0 Then rs.MoveLast
    
    'build description string
    SysCmd acSysCmdInitMeter, "Concatenating Data String...", rs.RecordCount
    i = 0
    If Not rs.BOF Then rs.MoveFirst
    Do While Not rs.EOF
        'update status
        SysCmd acSysCmdUpdateMeter, i
        
        'build string
        strDescr = strDescr & Nz(rs.Fields(0), "") & vbNewLine
        
        'increment
        rs.MoveNext
        i = i + 1
    Loop
    rs.Close
    Set rs = Nothing
    
    'build table
    GenerateKeywordTable strDescr, strTableName, iWordMinLength
    
    'do whatever you want to with the table here
    
ExitSub:
    SysCmd acSysCmdSetStatus, " "
    DoCmd.SetWarnings True
    DoCmd.Echo True
    DoCmd.Hourglass False
    Exit Sub
    
ErrorHandler:
    On Error Resume Next
    GoTo ExitSub
    
End Sub
__________________
Me.Geek = True
Reply With Quote
  #5 (permalink)  
Old 02-08-12, 13:51
nckdryr nckdryr is offline
Computer Monkey
 
Join Date: May 2005
Posts: 1,191
One more bit of code I forgot in the second post:
Code:
Public Function ObjectExists(intObjectType As AcObjectType, _
                                strObjectName As String, _
                                Optional ByVal db As dao.Database) As Boolean
    ' Pass the Object type: Table, Query, Form, Report, Macro, or Module
    ' Pass the Object Name
    
    On Error GoTo ErrorHandler
    ObjectExists = False
    
    Dim tbl As TableDef
    Dim qry As QueryDef
    Dim i As Integer
    
    'assign default db if null
    If (db Is Nothing) Then Set db = CurrentDb
    
    Select Case intObjectType
        Case acTable
            For Each tbl In db.TableDefs
                If tbl.Name = strObjectName Then
                    ObjectExists = True
                    Exit For
                End If
            Next tbl
        Case acQuery
            For Each qry In db.QueryDefs
                If qry.Name = strObjectName Then
                    ObjectExists = True
                    Exit For
                End If
            Next qry
        Case acForm, acReport, acModule
            For i = 0 To db.Containers(intObjectType & "s").Documents.Count - 1
                If db.Containers(intObjectType & "s").Documents(i).Name = strObjectName Then
                    ObjectExists = True
                    Exit For
                End If
            Next i
        Case acMacro
            For i = 0 To db.Containers("Scripts").Documents.Count - 1
                If db.Containers("Scripts").Documents(i).Name = strObjectName Then
                    ObjectExists = True
                    Exit For
                End If
            Next i
        Case Else
            MsgBox WrapText("Invalid Object Type passed, must be Table, " & _
                            "Query, Form, Report, Macro, or Module."), _
                    vbCritical
    End Select
    
ExitProc:
    Exit Function
    
ErrorHandler:
    ObjectExists = False
    GoTo ExitProc
    
End Function



Public Function WrapText(strOriginalText As String, Optional intMaxLineLength As Integer) As String
    
    ' This function wraps a text string so that a
    ' specified amount of characters are on each line,
    ' taking as many lines as necessary.1
    
    ' intMaxLineLength needs to be a minimum of 2 or
    ' the function will force it's own default value
    
    On Error GoTo ErrorHandler
    
    Dim i As Integer
    If Nz(intMaxLineLength, 0) < 2 Then intMaxLineLength = 64 'default value
    WrapText = ""
    While Len(strOriginalText) > intMaxLineLength
        'Find last space in first intMax characters
        For i = intMaxLineLength To 1 Step -1
            If Nz(Mid(strOriginalText, i, 1), "") = " " Then Exit For
        Next i
        'split strings
        If i = 0 Then 'no space within limits
            WrapText = WrapText & Trim(Left(strOriginalText, intMaxLineLength)) & vbNewLine
            strOriginalText = Trim(Right(strOriginalText, Len(strOriginalText) - intMaxLineLength))
        Else
            WrapText = WrapText & Trim(Left(strOriginalText, i)) & vbNewLine
            strOriginalText = Trim(Right(strOriginalText, Len(strOriginalText) - i))
        End If
    Wend
    WrapText = WrapText & strOriginalText
    
    Exit Function
    
ErrorHandler:
        WrapText = "ERROR"
        Exit Function
    
End Function
__________________
Me.Geek = True
Reply With Quote
Reply

Thread Tools Search this Thread
Search this Thread:

Advanced Search
Display Modes

Posting Rules
You may not post new threads
You may not post replies
You may not post attachments
You may not edit your posts

BB code is On
Smilies are On
[IMG] code is Off
HTML code is Off
Trackbacks are On
Pingbacks are On
Refbacks are On