Blog move

18 08 2009

My new blog can be found at http://blog.poucet.org





Flattening Data.Map

18 04 2009

While at the haskell hackathon, I decided to work on adaptive containers, which is an initiative that had been started by Don Stewart, to get Data.Map and Data.IntMap more memory compact. Together with Nicolas Pouillard(ertai) we tackled this project. He worked on IntMap while I worked on Map. You can find the code at: http://patch-tag.com/repo/adaptive-containers.

The main motivation for these adaptive containers it that it is impossible to flatten (or unpack) small data-types directly into the memory used by the data-constructor, so you end up with extra pointers for each value inside a container (specifically 2 pointers, 1 from the container to the data-type, and then one from the data-type to the unboxed one).

Update: A video was posted where Don Stewart presents the basic concepts and where I briefly present the associative adaptive containers mentioned in this article.

It is very easy to create flattened maps now, all you have to do is instantiate the following typeclass:

class AdaptMap k a where
  data Map k a

  tip :: Map k a
  unsafeBin :: Size -> k -> a -> Map k a -> Map k a -> Map k a
  mapMap :: c -> (Size -> k -> a -> Map k a -> Map k a -> c) -> Map k a -> c
  mapMap2 :: (AdaptMap k b)
          => c
          -> (Size -> k -> a -> Map k a -> Map k a -> c) -- Right is Tip
          -> (Size -> k -> b -> Map k b -> Map k b -> c) -- Left is Tip
          -> (Size -> k -> a -> Map k a -> Map k a ->
              Size -> k -> b -> Map k b -> Map k b -> c)
          -> Map k a
          -> Map k b
          -> c
  mapMap2 t b1 b2 c t1 t2 = mapMap tip1 bin1 t1
    where tip1                = mapMap t b2 t2
          bin1 s1 k1 i1 l1 r1 = mapMap tip2 bin2 t2
            where tip2                = b1 s1 k1 i1 l1 r1
                  bin2 s2 k2 i2 l2 r2 = c s1 k1 i1 l1 r1 s2 k2 i2 l2 r2

Notice that the case for two tree deconstruction is completely defined based on single tree deconstruction, so you only need to worry about tip, unsafeBin and mapMap.  Also note that this representation assumes you will have an implementation that is similar to the original one.

An example would be:

instance AdaptMap Int Int where
  data Map Int Int  = TipIntInt
                    | BinIntInt
                      {-# UNPACK #-} !Size
                      {-# UNPACK #-} !Int
                      {-# UNPACK #-} !Int
                      !(Map Int Int)
                      !(Map Int Int)

  tip                              = TipIntInt
  unsafeBin                        = BinIntInt
  mapMap t b TipIntInt             = t
  mapMap t b (BinIntInt s k i l r) = b s k i l r

After we got the basic code working, I thought it would be interesting to flatten nodes inside of Data.Adaptive.Map further, so I created a different instance for Int32 (to get as similar as possible to the original keys and values).  The definition is much hairier:

instance AdaptMap Int32 Int32 where
  data Map Int32 Int32  = TipInt32Int32
                        | BinInt32Int32
                          {-# UNPACK #-} !Size
                          {-# UNPACK #-} !Int32
                          {-# UNPACK #-} !Int32
                          !(Map Int32 Int32)
                          !(Map Int32 Int32)
                        | BinTipTipInt32Int32
                          {-# UNPACK #-} !Size
                          {-# UNPACK #-} !Int32
                          {-# UNPACK #-} !Int32
                        | BinBinTipInt32Int32
                          {-# UNPACK #-} !Size
                          {-# UNPACK #-} !Int32
                          {-# UNPACK #-} !Int32
                          {-# UNPACK #-} !Size
                          {-# UNPACK #-} !Int32
                          {-# UNPACK #-} !Int32
                          !(Map Int32 Int32)
                          !(Map Int32 Int32)
                        | BinTipBinInt32Int32
                          {-# UNPACK #-} !Size
                          {-# UNPACK #-} !Int32
                          {-# UNPACK #-} !Int32
                          {-# UNPACK #-} !Size
                          {-# UNPACK #-} !Int32
                          {-# UNPACK #-} !Int32
                          !(Map Int32 Int32)
                          !(Map Int32 Int32)
                        | BinBinBinInt32Int32
                          {-# UNPACK #-} !Size
                          {-# UNPACK #-} !Int32
                          {-# UNPACK #-} !Int32
                          {-# UNPACK #-} !Size
                          {-# UNPACK #-} !Int32
                          {-# UNPACK #-} !Int32
                          !(Map Int32 Int32)
                          !(Map Int32 Int32)
                          {-# UNPACK #-} !Size
                          {-# UNPACK #-} !Int32
                          {-# UNPACK #-} !Int32
                          !(Map Int32 Int32)
                          !(Map Int32 Int32)

  tip                              = TipInt32Int32
  unsafeBin s k v TipInt32Int32 TipInt32Int32 = BinTipTipInt32Int32 s k v
  unsafeBin s k v (BinInt32Int32 s1 k1 v1 l1 r1) TipInt32Int32
    = BinBinTipInt32Int32 s k v s1 k1 v1 l1 r1
  unsafeBin s k v TipInt32Int32 (BinInt32Int32 s2 k2 v2 l2 r2)
    = BinTipBinInt32Int32 s k v s2 k2 v2 l2 r2
  unsafeBin s k v (BinInt32Int32 s1 k1 v1 l1 r1) (BinInt32Int32 s2 k2 v2 l2 r2)
    = BinBinBinInt32Int32 s k v s1 k1 v1 l1 r1 s2 k2 v2 l2 r2
  unsafeBin s k v l r
    = BinInt32Int32 s k v l r

  mapMap t b TipInt32Int32                              = t
  mapMap t b (BinInt32Int32 s k v l r)                  = b s k v l r
  mapMap t b (BinTipTipInt32Int32 s k v)                =
    b s k v TipInt32Int32 TipInt32Int32
  mapMap t b (BinBinTipInt32Int32 s k v s1 k1 v1 l1 r1) =
    b s k v (BinInt32Int32 s1 k1 v1 l1 r1) TipInt32Int32
  mapMap t b (BinTipBinInt32Int32 s k v s2 k2 v2 l2 r2) =
    b s k v TipInt32Int32 (BinInt32Int32 s2 k2 v2 l2 r2)
  mapMap t b (BinBinBinInt32Int32 s k v s1 k1 v1 l1 r1 s2 k2 v2 l2 r2) =
    b s k v (BinInt32Int32 s1 k1 v1 l1 r1) (BinInt32Int32 s2 k2 v2 l2 r2)

The reason for this is that I’ve read in the past that flattening 3 tree nodes into one can give you very good cache locality.  While you are recreating nodes on deconstruction, these will be cache local anyways, so it should not be that expensive to access the data in these.

To see this in action (apologies about the long data-constructors), the following image is a representation of fromList $ zip [1..100] [1..100].  While the image is not fully clear, it is clear that the number of nodes used to represent this is much smaller.

100-nodes

In contrast, the simpler Int version of the same data-structure has many more nodes:

100-nodes2





Bootstrapping cabal

12 03 2009

Often I have to install cabal onto a machine where I just have GHC 6.8.2 running. I’ve found the following little script quite practical to install the basics.  Once you have that installed, it is very easy to install other haskell packages with cabal.

#!/bin/bash
#
# Copyright 2009 Christophe Poucet. All Rights Reserved.
# Author: <christophe (dot) poucet (at) gmail (dot) com> (Christophe Poucet)
# License: BSD3

webroot=http://hackage.haskell.org/packages/archive

install_package() {
  local name=$1
  local version=$2

  wget $webroot/$name/$version/$name-$version.tar.gz
  tar xvzf $name-$version.tar.gz
  cd $name-$version
  runghc Setup configure
  runghc Setup build
  sudo runghc Setup install
  cd ..
  rm -rf $name-$version
  rm -f $name-$version.tar.gz
}

install_package mtl 1.1.0.2
install_package parsec 3.0.0
install_package network 2.2.0.1
install_package HTTP 4000.0.4
install_package zlib 0.5.0.0
install_package Cabal 1.6.0.2
install_package cabal-install 0.6.2




Staying with the times… (?)

27 02 2009

I’ve decided to try out twittering.  I know that I’m seriously lagging on the hype-wave, but I think it might be a nice way to share links that I find interesting.  I have delicious too, but it is not as conversational.  I’ve also had a friendfeed for a while, so I’ve linked all my services together there:

http://friendfeed.com/poucet

http://twitter.com/poucet





Imported from Blogger

26 02 2009

I have just imported all my old blogs from http://notvincenz.blogspot.com, randomly sampling one entry, I noticed that some of the blogs need to be reeditted a bit.  If you happen to notice any broken blogs, please ping me so I can fix it.

Thanks!





Fibonacci in the Type System

26 02 2009

I thought I would share an old piece of code that I’ve had lying around for a while.  Basically, it calculates Fibonacci numbers in the type system:

{-# LANGUAGE EmptyDataDecls, MultiParamTypeClasses,
    FunctionalDependencies, FlexibleInstances, UndecidableInstances #-}
module Fibonacci where

data Nat
data S a = S a
class Numeral a where
value :: a -> Integer

prev :: S a -> a
prev = undefined

instance Numeral Nat where
value _ = 0

instance (Numeral a) => Numeral (S a) where
value x = 1 + (value . prev $ x)

class Add a b c | a b -> c where
add :: a -> b -> c

instance Add Nat b b where
add = undefined

instance Add a b c => Add (S a) b (S c) where
add = undefined

class Fib a b | a -> b where
fib :: a -> b

instance Fib Nat Nat where
fib = undefined

instance Fib (S Nat) (S Nat) where
fib = undefined

instance (Fib (S a) b, Fib a c, Add b c d) => Fib (S (S a)) d where
fib x = undefined

main = print . value . fib . S . S . S . S . S
                           . S . S . S . S $ (undefined :: Nat)




Setting up a mini-network

26 02 2009

It has been a while since I last blogged, mostly due to the fact that I’ve been busy settling into my new place, travelling to IKEA, breaking IKEA mobility vans, buying furniture, etc.  However, I’ve started on a new little project, and I thought that I’d document as I go along.

The project I’m working on is a little home network.  You could say that a home network is rather easy, and it is, but I’ve decided to make my life more complicated.  I want to learn more about routers, DNS, firewalls, traffic-shaping, etc, so I thought that this was the perfect project for that.  Additionally, I might soon get another computer with some colleagues at a colo, so that will give me enough reliability (along with my linode box) to set up a set of nameservers for my own domain.

Currently I would like to:

Let’s start by defining the hardware I have:

  • Two ALIX engines, one with a 1GB CF (compact flash) card and one with a 4GB CF card.  I’m expecting another 4GB CF card to come soon, to play around.  These both have 3 ethernet ports, which gives you a lot of possibilities to play with.  One also has a miniPCI WLAN card.  I bought antennas for it too, sadly the connectors I got to attach the two is wrong, so I’m waiting for new connectors.
  • One cablemodem, with my current contract I should be able to get 4 ips from it.
  • One Netgear GS608, 8-Port Gigabit Switch.  I was going to get a 100 Megabit switch, but sadly they were out, so I decided to upgrade.
  • One Netgear ProSafe GS105 Gigabit Switch.
  • Several ethernet cables, it would be a rather pointless exercise without it, wouldn’t it? :)
  • A Kingston 19-in-1 Card Reader.  I saw it in the store, it was rather cheap, and I figured I might be able to use this to set up the CF cards.  Though I’m currently leaning towards PXE boot.
  • Finally, I still have an old linksys WRT54G wireless router.
  • Additionally, I have a macbook pro which I’m using to write this blog, as well as Google random information, and a Dell Latitude D610 which I will use both as primary installation device, as well as laptop to connect to the serial port of the Alixen.

Now, originally, we had the WRT54G wireless router in our office, on the first floor. Sadly, due to the incredible swiss building style, we’re not able to get WIFI in the living room, which is exactly 1 floor up.  We’re investigating whether we can use the ISDN-plugs that seem to be available in each room to route ethernet upstairs.  If that’s possible, then the links router will go upstairs for WIFI in the living room, and I’ll use the ALIX engine with the WLAN card to supply our downstairs with WIFI.  Let’s hope it’ll work, otherwise we might have to get another WIFI bridge, which wouldn’t be very neat.

Currently, the plan is as follows:

  • Use the 5 port switch to get the most out of my public IP space.  I’ll probably attach my PS3 directly there, so I do not have to set up natting etc for online gaming.  Additionally, if I can route through the ISDN cables, I’ll use this to send a public IP straight to the linksys WRT54G WLAN router in the living room.
  • Use one of the Alix boxen as router, firewall and WLAN device.  For now the other ALIX will be for experimentation, though I’m tempted to set up VRRP.  The primary alix box will serve as router between the public IP space and the private IP space.  Since it has three ports, I might even go for a DMZ.
  • Use the 8 port switch as switch in the private IP space.

I’ve been contemplating quite a bit which OS to install on the Alix boxen. I’ve always worked with Ubuntu (well not quite always, way way long ago I had Mandrake, and prior to that Slackware). There are mini-distros available, but given the amount of space I have on the CFs and since I want this to be a learning exercise, I’ve decided to stay away from them.  This leaves me with the following choices:

  • Ubuntu.  I’m the most familiar with it, and generally it tends to be rather painless. That being said, it usually installs with a X11 system, no matter which version you choose, and
  • FreeBSD or OpenBSD.  This are usually ideal for this, or so one should believe.  However, I have little experience with them.  It might make a good learning exercise, bu on the other hand, I’m expecting to get a colo’d box soon with some colleagues that will run Debian.  So it is best to capitalize on one system.
  • Install debian on the main Alix box.  I’ve been contemplating a lot about this.  On my dell, I use ubuntu, and I quite like it.  However, one of the downsides of ubuntu is the frequency with which I get patches for it.  That is perfect for a device that one uses actively.  However, for a server/router box, this is not ideal, especially if you have CF.  Stability is key.  I’ve also considered FreeBSD or OpenBSD, just to learn more about them, but since the colo-box I will share with my colleagues will most likely be debian, it makes sense to use debian, that way I can reuse the configs, scripts etc.

For the PXE install, I followed the advice on this page.  Sadly, after a lot of struggling, I would get the device to start dhcp, it would actually start copying files, but it would not boot.  I then moved to trying to install plain old debian, however this failed as well, it would boot properly, but then it would simply hang.  Perhaps it is the serial settings of minicom that were screwing up.

Therefore, I’ve moved on to try Voyage Linux, which seems to be a slimmed down Debian specifically suited for this purpose, but not limited to being just the slimmed down version.  Installing it was a breeze, I just followed the instructions given in the README, and everything worked perfectly.

The first things I did, was install vim and screen.  I also set up proper .screenrc and .vimrc.

Looking further into how things are mounted, I noticed the following:”

  • /var/lock and /var/run are mounted purely in memory with tmpfs.  This is done by the /etc/init.d/mountkernfs.sh script
  • /var/log and /var/tmp are mounted as aufs, which is a union file system that will write to memory but also show what’s on disk.  They are setup by the /etc/init.d/voyage-sync script. Additionally, this script also rsyncs files back to disk when shutting down.

This shows two different ways of not using the CF when not required, one which is purely transient and one which is non-transient but saves on CF writes.  Currently, I think the following setups might be good:

  • Use the overlay file system, aufs, for .ssh.  Do not sync automatically on this, but instead selectively do so.
  • Use tmpfs for /var/lib/apt.  I do not need my CF to be spammed with a bunch of stuff just because I want to rarely install a package.

This article has remained as draft for far too long, so I will publish it and create a second article to continue.





A day in the life of an SRE

25 02 2009

Yesterday was a crappy day. I was oncall and for those that might not have heard, here is the link http://googleblog.blogspot.com/2009/02/current-gmail-outage.html.  For those that did not know, I work as an SRE (Site Reliability Engineer) for GMail at Google.

On a better note, I have recently installed xmonad, and I’m really happy with it as it has increased my productivity.  Currently, I’m still using a rather standard configuration, except for some custom key-bindings.  I will probably post when I get a more customized configuration.





A SIGINT signal handler for lua

1 01 2009

It has been a while since I last blogged, mostly due to the fact that I’ve been busy settling into my new place, travelling to IKEA, breaking IKEA mobility vans, buying furniture, etc.  I’ve started a little project on home networking, and I have a blog post in progress documenting the details.  But I thought I’d blog about something else in the meantime.

I’m currently on vacation in Barbados for Christmas and New Year, visiting my parents.  It would have been a great vacation, had it not been for the several mishaps.  The first day I stepped on a sea urchin, which led to me having several bouts of fevers over the next 2 weeks.  Additionally, I nearly landed on some corral cliffs after losing having the leash of my bodyboard snap and finding myself fighting pointlessly against a current that was too strong to even fight when I could stand.

Anyways, the reason I wanted to blog was because I’ve been contemplating about focussing more effort again on writing personal software projects.  I have a bunch becoming stale on my laptop, and would like to share them better.  In the past, on my previous blog, I would show snippets inside of blog posts, and I’ll probably do that again today to share something I wrote a while back.  However, this is not a very robust solution, and it makes it a pain to share bigger projects or to have others use it.  I have a VPS host, however the homepage I have on it is truly shameful.  Which brings up the question in this blog post: Does anyone have any suggestions for a nice layout for a homepage? It probably will be more organizational and less prose-based, as I can use my blog for prose.  Just a place to jot some details about myself, share various pieces of code (either in a darcs or a git repository), and put on some snippets with nice syntax highlighting.  Is the standard 3-column page still the hot meme, or is there something better?  And any decent suggestions on what color-scheme and typography to use?

So coming back to the source code snippet.  A while back, while working on some lua code, I wanted to have the ability to register lua code for the CTRL+C handler. The following piece of code does this by first registering a C function as a signal handler for SIGINT that sets a lua hook on the next line of lua call.  In the case that it takes too long to actually evaluate the debug hook, it temporarily sets the SIGINT back to SIG_DFL, such that two ctrl+c’s will kill the program as expected.

The use of it is fairly trivial, after loading the module with require 'signal', you simply call signal.set(function() ... some code ... end). If you want to reset the original signal handler that was set prior to your first signal.set you simply call it with no parameters.

/*------------------------------------------------------------------------------
 * signal.c - Allows for the registration of a lua signal handler for SIGINT.
 *
 * Authors: Christophe Poucet
 * License: BSD
 * Created: September 23, 2007
 *
 * Copyright 2007-2009 © Christophe Poucet. All Rights Reserved.
 *----------------------------------------------------------------------------*/

#include
#include "lua.h"

#include "lauxlib.h"
#include "lualib.h"

static lua_State *globalL = NULL;

static void (*old_handler)(int) = NULL;

static void laction(int i);

static void lstop (lua_State *L, lua_Debug *ar) {
  (void)ar;  /* unused arg. */
  lua_sethook(L, NULL, 0, 0);
  lua_getfield(L, LUA_REGISTRYINDEX, "handler");
  lua_pcall(L, 0, 0, 0);
  signal(SIGINT, laction);
}

static void laction (int i) {
  /* if another SIGINT occurs before lstop, terminate process(default action) */
  signal(i, SIG_DFL);
  lua_sethook(globalL, lstop, LUA_MASKCALL | LUA_MASKRET | LUA_MASKCOUNT, 1);
}

static int l_setsignal(lua_State *L) {
  globalL = L;
  luaL_checkany(L, 1);
  if (lua_isnil(L, 1)) {
    lua_pushnil(L);
    lua_setfield(L, LUA_REGISTRYINDEX, "handler");
    if (old_handler != NULL) {
      signal(SIGINT, old_handler);
      old_handler = NULL;
    }
  } else if (lua_isfunction(L, 1)) {
    lua_pushvalue(L, 1);
    lua_setfield(L, LUA_REGISTRYINDEX, "handler");
    if (old_handler != NULL) {
      signal(SIGINT, laction);
    } else {
      old_handler = signal(SIGINT, laction);
    }
  } else {
    luaL_error(L, "The argument should be either nil or a function");
  }
  return 0;
}

static const struct luaL_Reg s_signal [] = {
  {"set", l_setsignal},
  {NULL, NULL} /* sentinel */
};

int luaopen_signal(lua_State *L) {
  luaL_register(L, "signal", s_signal);
  return 1;
}




Chrome to be released soon

1 09 2008

As posted on blogoscoped, Google Chrome will be released soon, and I have to admit that I’m very excited about.  In the spirit of Google, Chrome takes what works and improves upon it in an incremental and robust fashion.  The cartoon is very telling to show what features it will include.

My favourite features for it are the independent tabs and better multithreading.  Whlie I think firefox is great, it’s starting to hang often for me as I have a bunch of tabs open, some with rather heavy web-apps.  One app hanging causes my entire browser to hang and breaks my work-flow.  Additionally, at times firefox crashes and then you’ve just lost everything you were busy with.