Peter Larsson Blog

Patron Saint of Lost Yaks

Bayesian Estimate and Root Mean Square

Sometimes you face the situation where you have some items ranked, and someone always tries to bump the ranking by voting a "perfect 100" to a particular item.

And when you rank the items with average function or root mean square, that single "perfect 100" vote still bumps the item at top of ranking.

Well, have you looked a Bayesian Estimate?

DECLARE       @Sample TABLE
       (
              userID INT,
              vote INT
       )
INSERT @Sample
SELECT 3, 40 UNION ALL
SELECT 3, 60 UNION ALL
SELECT 0, 100 UNION ALL
SELECT 1, 100 UNION ALL
SELECT 1, 100 UNION ALL
SELECT 1, 90 UNION ALL
SELECT 1, 100 UNION ALL
SELECT 1, 90 UNION ALL
SELECT 1, 100 UNION ALL
SELECT 1, 90 UNION ALL
SELECT 2, 60 UNION ALL
SELECT 2, 70 UNION ALL
SELECT 2, 100 UNION ALL
SELECT 2, 90 UNION ALL
SELECT 2, 60 UNION ALL
SELECT 2, 20
 
DECLARE       @minVotes INT
 
SET    @minVotes = 30
 
-- Average
SELECT        userID,
              AVG(1.0E * vote) AS Average
FROM          @Sample
GROUP BY      userID
--HAVING             COUNT(*) >= @minVotes
ORDER BY      userID
 
-- Root Mean Square
SELECT        userID,
              SQRT(SUM(Items * thePow) / SUM(Items)) AS [Root Mean Square]
FROM          (
                     SELECT        userID,
                                  POWER(vote, 2.0E) AS thePow,
                                  1.0E * COUNT(*) AS Items
                     FROM          @Sample
                     GROUP BY      userID,
                                  vote
              ) AS d
GROUP BY      userID
--HAVING             SUM(Items) >= @minVotes
ORDER BY      userID
 
-- Bayesian estimate
SELECT        s.userID,
              1.0E * COUNT(*) /(1.0E * COUNT(*) + @minVotes) * AVG(s.vote) +(@minVotes /(1.0E * COUNT(*) + @minVotes)) * MIN(c.c) AS [Bayesian estimate]
FROM          @Sample AS s
CROSS JOIN    (
                     SELECT AVG(1.0E * vote) AS c
                     FROM   @Sample
              ) AS c
GROUP BY      s.userID
--HAVING             COUNT(*) >= @minVotes
ORDER BY      s.userID