x002548's Blog

Not Just a Number - Brett Kaiser
posts - 89, comments - 586, trackbacks - 39

Wednesday, December 20, 2006

Deconstruct Text, word by word, into a single column

Why, I still didn't get an answer from the poster in this post
But with Rudy laying the ground work I came up with the following.  
Why anyone would need to do thids, I have no idea.  I added a twist to this version 
where it counts the occurances.  The end goal here though was to find the text
row with the most occurances of any 1 word, and order the rows in that order.
Why?  I have no idea.
  

 

create table somewords
( id integer not null primary key identity
, blah text not null
);
insert into somewords (blah)
values ('a word that appears maximum number of times in a column')
insert into somewords (blah)
values ('Is it possible to get words from text columns in a sql server database')
insert into somewords (blah)
values ('This could solve my problem if reffered column contain only single word')
insert into somewords (blah)
values ('that''s going to require that you split out every word in the column individually')
insert into somewords (blah)
values ('the query will definitely not be easy to write')
insert into somewords (blah)
values ('Please read the sticky at the top of the board')
insert into somewords (blah)
values ('The physical order of data in a database has no meaning')
GO

CREATE TABLE UniqueWords (
   Word varchar(256)
 , WordId int IDENTITY(1,1)
 , WordCount int DEFAULT(1)
 , Add_Dt datetime DEFAULT (GetDate()))
 GO

CREATE UNIQUE INDEX UnqueWords_PK ON UniqueWords(Word)
GO

CREATE PROC isp_INS_UNIQUE_WORDS
AS
BEGIN
 SET NOCOUNT ON
 DECLARE @Words INT, @Pos INT, @x Int, @str varchar(256)
    , @word varchar(256), @start int, @end int, @exitstart int
 SELECT @Words = 0, @Pos = 1, @x = -1, @Word = '', @start = 1

 DECLARE myCursor CURSOR FOR SELECT Blah FROM SomeWords
 OPEN myCursor
 FETCH NEXT FROM myCursor INTO @str

 WHILE @@FETCH_STATUS = 0
   BEGIN
  WHILE (@x <> 0)
   BEGIN
    SET @x     = CHARINDEX(' ', @str, @Pos)
    IF @x <> 0
      BEGIN
     SET @end   = @x - @start
     SET @word  = SUBSTRING(@str,@start,@end)
     IF NOT EXISTS (SELECT * FROM UniqueWords WHERE Word = @Word)
      INSERT INTO UniqueWords(Word) SELECT @word
       ELSE
      UPDATE UniqueWords SET WordCount = WordCount + 1 WHERE Word = @Word
     -- SELECT @Word, @@ROWCOUNT,@@ERROR
     -- SELECT @x, @Word, @start, @end, @str
     SET @exitstart = @start + @end + 1
     SET @Pos   = @x + 1
     SET @start = @x + 1
     SET @Words = @Words + 1
      END
    IF @x = 0
      BEGIN
     SET @word  = SUBSTRING(@str,@exitstart,LEN(@str)-@exitstart+1)
     IF NOT EXISTS (SELECT * FROM UniqueWords WHERE Word = @Word)
      INSERT INTO UniqueWords(Word) SELECT @word
       ELSE
      UPDATE UniqueWords SET WordCount = WordCount + 1 WHERE Word = @Word
     -- SELECT @Word, @@ROWCOUNT,@@ERROR
     -- SELECT @x, @Word, @exitstart, LEN(@str)-@exitstart, @str
      END
   END
  FETCH NEXT FROM myCursor INTO @str
  SELECT @Words = 0, @Pos = 1, @x = -1, @Word = '', @start = 1
   END 

   CLOSE myCursor
   DEALLOCATE myCursor
   SET NOCOUNT OFF
 RETURN @Words
END
GO

EXEC isp_INS_UNIQUE_WORDS
GO

SELECT * FROM UniqueWords ORDER BY Word
GO

DROP PROC isp_INS_UNIQUE_WORDS
DROP TABLE UniqueWords, somewords
GO

posted @ Wednesday, December 20, 2006 3:27 PM | Feedback (2) | Filed Under [ SQL Gimmicks ]

Powered by:
Powered By Subtext Powered By ASP.NET