Results 1 to 5 of 5
  1. #1
    Join Date
    Feb 2004
    Location
    One Flump in One Place
    Posts
    14,912

    Unanswered: Parse words from string matching pattern

    Hi

    I'm trying to produce the below result from the below data. Return one row for every word that matches the pattern specified. I'm 99% sure this is a recursive CTE job (at least, if I want it efficient) but I am not too sure how to code it.....
    Any help greatly appreciated!
    Code:
    USE test    
    GO 
    
    DECLARE    @t TABLE
        (
            the_text        VARCHAR(MAX)    COLLATE    Latin1_general_BIN
        )
    
    INSERT @t
    SELECT    'This and That'
    UNION ALL
    SELECT    'this and that'
    UNION ALL
    SELECT    'This And That'
    UNION ALL
    SELECT    'THIS AND THAT'
    
    --So far - about to stick into CTE:
    DECLARE      @pattern            AS VARCHAR(100)
            
    SELECT      @pattern = '%[^A-Za-z0-9"''][A-Z][a-z]%'
    
    SELECT      the_text
            , parsed_text    = SUBSTRING(the_text, PATINDEX(@pattern, the_text) + 1, COALESCE(NULLIF(PATINDEX('%[^a-zA-Z0-9]%', SUBSTRING(the_text, PATINDEX(@pattern, the_text) + 1, LEN(the_text))), 0), LEN(the_text)) - 1)
    FROM    @t
    WHERE    the_text LIKE @pattern
    
    /*
    RESULT:
        the_text            parsed_text
        ------------------------------------
        This and That        That
        This And That        And
    
    --DESIRED RESULT:
        the_text            parsed_text
        ------------------------------------
        This and That        That
        This And That        And
        This And That        That
    */
    Testimonial:
    pootle flump
    ur codings are working excelent.

  2. #2
    Join Date
    Feb 2004
    Location
    One Flump in One Place
    Posts
    14,912
    I've got the query without a CTE - but scale well it will not.

    Code:
    USE test    
    GO 
    
    DECLARE    @t TABLE
        (
            the_text        VARCHAR(MAX)    COLLATE    Latin1_general_BIN
        )
    
    INSERT @t
    SELECT    'This and That'
    UNION ALL
    SELECT    'this and that'
    UNION ALL
    SELECT    'This And That'
    UNION ALL
    SELECT    'THIS AND THAT'
    
    --So far - about to stick into CTE:
    DECLARE      @pattern            AS VARCHAR(100)
            
    SELECT      @pattern = '%[^A-Za-z0-9"''][A-Z][a-z]%'
    
    SELECT      DISTINCT 
              the_text
            , parsed_text    = SUBSTRING(the_text, PATINDEX(@pattern, SUBSTRING(the_text, number, LEN(the_text))) + number, COALESCE(NULLIF(PATINDEX('%[^a-zA-Z0-9]%', SUBSTRING(the_text, PATINDEX(@pattern, SUBSTRING(the_text, number, LEN(the_text))) + number, LEN(the_text))), 0), LEN(the_text)) - 1)
    FROM    @t AS t
    INNER JOIN
            dbo.numbers
    ON    numbers.number BETWEEN PATINDEX(@pattern, the_text) AND LEN(the_text)
    WHERE    SUBSTRING(the_text, PATINDEX(@pattern, SUBSTRING(the_text, number, LEN(the_text))) + number - 1, LEN(the_text)) LIKE @pattern
    
    /*
    RESULT:
        the_text            parsed_text
        ------------------------------------
        This And That        And
        This And That        That
        This and That        That
    */
    Testimonial:
    pootle flump
    ur codings are working excelent.

  3. #3
    Join Date
    Feb 2004
    Location
    One Flump in One Place
    Posts
    14,912
    Well as ever your help has been almost impossible to guage

    Code:
    ;WITH parser AS 
        (
            SELECT      the_text
                    , first_word_index
                    , first_word            = SUBSTRING(the_text, first_word_index, first_word_end_index)
                    , is_anchor = 1 
                    , first_word_end_index
            FROM    --Derived data
                    (
                        SELECT    the_text
                                , first_word_index
                                , first_word_end_index        = COALESCE(NULLIF(PATINDEX('%[^a-zA-Z0-9]%', SUBSTRING(the_text, first_word_index + 1, max_len)), 0), max_len)
                        FROM    --Derived data
                                (
                                    SELECT      the_text
                                            , first_word_index        = PATINDEX(@pattern, the_text) + 1
                                            , max_len                = LEN(the_text)
                                    FROM    @t
                                    WHERE    the_text LIKE @pattern
                                ) AS anchor
                    ) AS anchor
            UNION ALL
            SELECT      t.the_text
                    , first_word_index        = parser.first_word_index + PATINDEX(@pattern, SUBSTRING(t.the_text, parser.first_word_index, LEN(t.the_text)))
                    , first_word            = SUBSTRING(t.the_text, parser.first_word_index + PATINDEX(@pattern, SUBSTRING(t.the_text, parser.first_word_index, LEN(t.the_text))), LEN(t.the_text))
                    , is_anchor                = 0 
                    , first_word_end_index    = NULL
            FROM    @t    AS t
            INNER JOIN 
                    parser
            ON    parser.the_text                = t.the_text
            WHERE    PATINDEX(@pattern, SUBSTRING(t.the_text, parser.first_word_index + 1, LEN(t.the_text))) > 0
        )
    SELECT      the_text
            , first_word
    FROM    parser
    Testimonial:
    pootle flump
    ur codings are working excelent.

  4. #4
    Join Date
    Feb 2004
    Location
    One Flump in One Place
    Posts
    14,912
    Well, no one spotted the deliberate mistake!
    Code:
    ......
    UNION ALL
    SELECT    'One More Final Test'
    --Query doesn't handle text with 3+ eligible words
    Code:
    ;WITH parser AS 
        (
            SELECT      the_text
                    , first_word_index
                    , first_word            = SUBSTRING(the_text, first_word_index, first_word_end_index)
                    , first_word_end_index
                    , max_len
            FROM    --Derived data
                    (
                        SELECT    the_text
                                , first_word_index
                                , first_word_end_index        = COALESCE(NULLIF(PATINDEX('%[^a-zA-Z0-9]%', SUBSTRING(the_text, first_word_index + 1, max_len)), 0), max_len)
                                , max_len
                        FROM    --Derived data
                                (
                                    SELECT      the_text
                                            , first_word_index        = PATINDEX(@pattern, the_text) + 1
                                            , max_len                = LEN(the_text)
                                    FROM    @t
                                    WHERE    the_text LIKE @pattern
                                ) AS anchor
                    ) AS anchor
            UNION ALL
    
            SELECT      the_text
                    , first_word_index
                    , first_word            = SUBSTRING(the_text, first_word_index, first_word_end_index)
                    , first_word_end_index
                    , max_len
            FROM    --Derived data
                    (
                        SELECT      the_text
                                , first_word_index
                                , first_word_end_index        = COALESCE(NULLIF(PATINDEX('%[^a-zA-Z0-9]%', SUBSTRING(the_text, first_word_index + 1, max_len)), 0), max_len)
                                , max_len
                        FROM    --Derived_data
                                (
                                        SELECT      t.the_text
                                                , first_word_index        = parser.first_word_index + PATINDEX(@pattern, SUBSTRING(t.the_text, parser.first_word_index, max_len))
                                                , max_len
                                        FROM    @t    AS t
                                        INNER JOIN 
                                                parser
                                        ON    parser.the_text                = t.the_text
                                        WHERE    PATINDEX(@pattern, SUBSTRING(t.the_text, parser.first_word_index + 1, LEN(t.the_text))) > 0
                                ) AS recursor
                    ) AS recursor
        )
    SELECT      the_text
            , first_word
    FROM    parser
    ORDER BY the_text
    P.S. - CONNECT FOUR! Yay for Poots!!!11!
    Testimonial:
    pootle flump
    ur codings are working excelent.

  5. #5
    Join Date
    Jun 2003
    Location
    Ohio
    Posts
    12,592
    Provided Answers: 1
    I think this is the code that you want:
    Code:
    ;WITH parser AS 
        (
            SELECT      the_text
                    , first_word_index
                    , first_word            = SUBSTRING(the_text, first_word_index, first_word_end_index)
                    , first_word_end_index
                    , max_len
            FROM    --Derived data
                    (
                        SELECT    the_text
                                , first_word_index
                                , first_word_end_index        = COALESCE(NULLIF(PATINDEX('%[^a-zA-Z0-9]%', SUBSTRING(the_text, first_word_index + 1, max_len)), 0), max_len)
                                , max_len
                        FROM    --Derived data
                                (
                                    SELECT      the_text
                                            , first_word_index        = PATINDEX(@pattern, the_text) + 1
                                            , max_len                = LEN(the_text)
                                    FROM    @t
                                    WHERE    the_text LIKE @pattern
                                ) AS anchor
                    ) AS anchor
            UNION ALL
    
            SELECT      the_text
                    , first_word_index
                    , first_word            = SUBSTRING(the_text, first_word_index, first_word_end_index)
                    , first_word_end_index
                    , max_len
            FROM    --Derived data
                    (
                        SELECT      the_text
                                , first_word_index
                                , first_word_end_index        = COALESCE(NULLIF(PATINDEX('%[^a-zA-Z0-9]%', SUBSTRING(the_text, first_word_index + 1, max_len)), 0), max_len)
                                , max_len
                        FROM    --Derived_data
                                (
                                        SELECT      t.the_text
                                                , first_word_index        = parser.first_word_index + PATINDEX(@pattern, SUBSTRING(t.the_text, parser.first_word_index, max_len))
                                                , max_len
                                        FROM    @t    AS t
                                        INNER JOIN 
                                                parser
                                        ON    parser.the_text                = t.the_text
                                        WHERE    PATINDEX(@pattern, SUBSTRING(t.the_text, parser.first_word_index + 1, LEN(t.the_text))) > 0
                                ) AS recursor
                    ) AS recursor
        )
    SELECT      the_text
            , first_word
    FROM    parser
    ORDER BY the_text
    Oops. Too late, I guess. Well, at least I tried...
    If it's not practically useful, then it's practically useless.

    blindman
    www.chess.com: "sqlblindman"
    www.LobsterShot.blogspot.com

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •