Linear and piecewise linear regression with LazyPPL

Extensions and imports for this Literate Haskell file
module RegressionDemo where
import LazyPPL
import Distr
import Data.Colour
import Data.Colour.Names

import Graphics.Matplotlib hiding (density)

Regression is about finding fitting a function to some data. Bayesian regression is about finding a posterior distribution on functions, given the data.

We start with a random linear function:

linear :: Prob (Double -> Double)
linear =
  do
    a <- normal 0 3
    b <- normal 0 3
    let f = \x -> a * x + b
    return f
Note that this returns a random function. Here are 1000 draws from the distribution.
Graphing code
plotLinearPrior =
  do
    fs' <- mh 1 (sample linear) 
    let fs = map fst $ take 1000 $ fs'
    plotFuns "images/regression-linear-prior.svg" [] fs

Here is a sample dataset that we will find a function for:
dataset :: [(Double, Double)]
dataset = [(0,0.6), (1, 0.7), (2,1.2), (3,3.2), (4,6.8), (5, 8.2), (6,8.4)]
Graphing code
plotDataset =
  do
    plotFuns "images/regression-dataset.svg" dataset []

Our regression here is noisy: the function has not generated this data set, because the points are not colinear.

Our generic regression function takes a random function “prior”, and some input/output observations “dataset”, which are assumed to be noisy according to “sigma”, returns a conditioned random linear function (unnormalized).
regress :: Double -> Prob (a -> Double) -> [(a, Double)] -> Meas (a -> Double)
regress sigma prior dataset =
  do
    f <- sample prior
    forM_ dataset (\(x, y) -> score $ normalPdf (f x) sigma y)
    return f
Now we can run Bayesian linear regression as follows:
plotLinReg =
  do fs' <- mh 0.5 (regress 0.1 linear dataset)
     let fs = map fst $ take 1000 $ every 10 $ drop 100 fs'
     plotFuns "images/regression-linear-reg.svg" dataset fs

Graphing routines
-- Plot the points drawn from weighted samples
-- epsilon: smallest y axis difference to worry about
-- delta: smallest x axis difference to worry about
interestingPoints :: (Double -> Double) -> Double -> Double -> Double -> Double -> [Double] -> [Double]
interestingPoints f lower upper epsilon delta acc =
  if abs(upper - lower) < delta then acc
  else
    let mid = (upper - lower) / 2 + lower in
    if abs((f(upper) - f(lower)) / 2 + f(lower) - f(mid)) < epsilon
    then acc
    else interestingPoints f lower mid epsilon delta (mid : (interestingPoints f mid upper epsilon delta acc))

sampleFun f =
--  [ (x, f x) | x <- [(-0.25),(-0.25+0.1)..6.2]]
  let xs = ((-0.25) : (interestingPoints f (-0.25) 6.2 0.3 0.001 [6.2])) in
  map (\x -> (x,f x)) xs


plotFuns :: String -> [(Double,Double)] -> [Double -> Double] -> IO ()
plotFuns filename dataset funs = 
    do  putStrLn $ "Plotting " ++ filename ++ "..."
        file filename $ foldl (\a f -> let xfs = sampleFun f in a % plot (map fst xfs) (map snd xfs) @@ [o1 "go-", o2 "linewidth" (0.5 :: Double), o2 "alpha" (0.01 :: Double), o2 "ms" (0 :: Int)]) (scatter (map fst dataset) (map snd dataset) @@ [o2 "c" "black"] % xlim (0 :: Int) (6 :: Int) % ylim (-2 :: Int) (10 :: Int)) funs
        putStrLn "Done."
        return ()


main :: IO ()
main = do {plotLinearPrior ; plotDataset ; plotLinReg}


Generated by Pandoc from Literate Haskell. Full source on .