create table somewords
( id integer not null primary key identity
, blah text not null
);
insert into somewords (blah)
values ('a word that appears maximum number of times in a column')
insert into somewords (blah)
values ('Is it possible to get words from text columns in a sql server database')
insert into somewords (blah)
values ('This could solve my problem if reffered column contain only single word')
insert into somewords (blah)
values ('that''s going to require that you split out every word in the column individually')
insert into somewords (blah)
values ('the query will definitely not be easy to write')
insert into somewords (blah)
values ('Please read the sticky at the top of the board')
insert into somewords (blah)
values ('The physical order of data in a database has no meaning')
GO
CREATE TABLE UniqueWords (
Word varchar(256)
, WordId int IDENTITY(1,1)
, WordCount int DEFAULT(1)
, Add_Dt datetime DEFAULT (GetDate()))
GO
CREATE UNIQUE INDEX UnqueWords_PK ON UniqueWords(Word)
GO
CREATE PROC isp_INS_UNIQUE_WORDS
AS
BEGIN
SET NOCOUNT ON
DECLARE @Words INT, @Pos INT, @x Int, @str varchar(256)
, @word varchar(256), @start int, @end int, @exitstart int
SELECT @Words = 0, @Pos = 1, @x = -1, @Word = '', @start = 1
DECLARE myCursor CURSOR FOR SELECT Blah FROM SomeWords
OPEN myCursor
FETCH NEXT FROM myCursor INTO @str
WHILE @@FETCH_STATUS = 0
BEGIN
WHILE (@x <> 0)
BEGIN
SET @x = CHARINDEX(' ', @str, @Pos)
IF @x <> 0
BEGIN
SET @end = @x - @start
SET @word = SUBSTRING(@str,@start,@end)
IF NOT EXISTS (SELECT * FROM UniqueWords WHERE Word = @Word)
INSERT INTO UniqueWords(Word) SELECT @word
ELSE
UPDATE UniqueWords SET WordCount = WordCount + 1 WHERE Word = @Word
-- SELECT @Word, @@ROWCOUNT,@@ERROR
-- SELECT @x, @Word, @start, @end, @str
SET @exitstart = @start + @end + 1
SET @Pos = @x + 1
SET @start = @x + 1
SET @Words = @Words + 1
END
IF @x = 0
BEGIN
SET @word = SUBSTRING(@str,@exitstart,LEN(@str)-@exitstart+1)
IF NOT EXISTS (SELECT * FROM UniqueWords WHERE Word = @Word)
INSERT INTO UniqueWords(Word) SELECT @word
ELSE
UPDATE UniqueWords SET WordCount = WordCount + 1 WHERE Word = @Word
-- SELECT @Word, @@ROWCOUNT,@@ERROR
-- SELECT @x, @Word, @exitstart, LEN(@str)-@exitstart, @str
END
END
FETCH NEXT FROM myCursor INTO @str
SELECT @Words = 0, @Pos = 1, @x = -1, @Word = '', @start = 1
END
CLOSE myCursor
DEALLOCATE myCursor
SET NOCOUNT OFF
RETURN @Words
END
GO
EXEC isp_INS_UNIQUE_WORDS
GO
SELECT * FROM UniqueWords ORDER BY Word
GO
DROP PROC isp_INS_UNIQUE_WORDS
DROP TABLE UniqueWords, somewords
GO