commit 2d9121e663848e1609ce7217c02d5b78d9b80b3b Author: Levi Pearson Date: Tue Jun 10 22:58:26 2014 -0600 Data structure slides and source code diff --git a/DenseNat.hs b/DenseNat.hs new file mode 100644 index 0000000..fe48e91 --- /dev/null +++ b/DenseNat.hs @@ -0,0 +1,22 @@ +module DenseNat where + +data Digit = Zero | One deriving (Show, Ord, Eq, Enum) + +type DenseNat = [Digit] -- increasing order of significance + +inc :: DenseNat -> DenseNat +inc [] = [One] +inc (Zero : ds) = One : ds +inc (One : ds) = Zero : inc ds -- carry + +dec :: DenseNat -> DenseNat +dec [One] = [] +dec (One : ds) = Zero : ds +dec (Zero : ds) = One : dec ds -- borrow + +add :: DenseNat -> DenseNat -> DenseNat +add ds [] = ds +add [] ds = ds +add (d : ds1) (Zero : ds2) = d : add ds1 ds2 +add (Zero : ds1) (d : ds2) = d : add ds1 ds2 +add (One : ds1) (One : ds2) = Zero : inc (add ds1 ds2) -- carry diff --git a/HAMT.hs b/HAMT.hs new file mode 100644 index 0000000..4d3eab3 --- /dev/null +++ b/HAMT.hs @@ -0,0 +1,97 @@ +module HAMT where + +import Data.Bits +import Data.Word +import Prelude hiding (lookup, (++)) + +import Data.Vector (Vector, singleton, unsafeDrop, unsafeIndex, + unsafeTake, unsafeUpdate, (++)) + +type Key = Word +type Bitmap = Word +type Shift = Int +type Subkey = Int -- we need to use this to do shifts, so an Int it is + +data HAMT a = Empty + | BitmapIndexed {-# UNPACK #-} !Bitmap !(Vector (HAMT a)) + | Leaf {-# UNPACK #-} !Key a + | Full {-# UNPACK #-} !(Vector (HAMT a)) + deriving (Show) + + +-- These architecture dependent constants + +bitsPerSubkey :: Int +bitsPerSubkey = floor . logBase 2 . fromIntegral . bitSize $ (undefined :: Word) + +subkeyMask :: Bitmap +subkeyMask = 1 `shiftL` bitsPerSubkey - 1 + +maskIndex :: Bitmap -> Bitmap -> Int +maskIndex b m = popCount (b .&. (m - 1)) + +mask :: Key -> Shift -> Bitmap +mask k s = shiftL 1 (subkey k s) + +{-# INLINE subkey #-} +subkey :: Key -> Shift -> Int +subkey k s = fromIntegral $ shiftR k s .&. subkeyMask + +empty :: HAMT a +empty = Empty + +lookup :: Key -> HAMT a -> Maybe a +lookup k t = lookup' k 0 t + +lookup' :: Key -> Shift -> HAMT a -> Maybe a +lookup' k s t + = case t of + Empty -> Nothing + Leaf kx x + | k == kx -> Just x + | otherwise -> Nothing + BitmapIndexed b v -> + let m = mask k s in + if b .&. m == 0 + then Nothing + else lookup' k (s+bitsPerSubkey) (unsafeIndex v (maskIndex b m)) + Full v -> lookup' k (s+bitsPerSubkey) (unsafeIndex v (subkey k s)) + +insert :: Key -> a -> HAMT a -> HAMT a +insert k v t = insert' k 0 v t + +insert' :: Key -> Shift -> a -> HAMT a -> HAMT a +insert' kx s x t + = case t of + Empty -> Leaf kx x + Leaf ky y + | ky == kx -> Leaf kx x + | otherwise -> + insert' kx s x $ BitmapIndexed (mask ky s) (singleton t) + BitmapIndexed b v -> {-# SCC "i-Bitmap" #-} + let m = mask kx s + i = maskIndex b m in + if b .&. m == 0 + then let l = Leaf kx x + v' = unsafeTake i v ++ singleton l ++ unsafeDrop i v + b' = b .|. m in + if b' == 0xFFFFFFFF + then Full v' + else BitmapIndexed b' v' + else {-# SCC "i-Bitmap-conflict" #-} + let st = unsafeIndex v i + st' = insert' kx (s+bitsPerSubkey) x st + v' = {-# SCC "i-Bitmap-update" #-} + unsafeUpdate v (singleton (i, st')) + in BitmapIndexed b v' + Full v -> {-# SCC "i-Full" #-} + let i = subkey kx s + st = unsafeIndex v i + st' = insert' kx (s+bitsPerSubkey) x st + v' = {-# SCC "i-Full-update" #-} + unsafeUpdate v (singleton (i, st')) + in Full v' + +-- lazy, but that's not necessarily a bad thing +fromList :: [(Key, a)] -> HAMT a +fromList = foldl (flip $ uncurry insert) empty diff --git a/HON.hs b/HON.hs new file mode 100644 index 0000000..317b273 --- /dev/null +++ b/HON.hs @@ -0,0 +1,216 @@ +module HigherOrderNested where + +{- Start with a standard data type +data Nat = Zero + | Succ Nat + deriving (Eq, Show) +-} + +-- Decompose it into separate types +data Zero = NZero + deriving (Eq, Show) + +data Succ nat = Succ nat + deriving (Eq, Show) + +-- And define a nested combining type +data Nats nat = NNil + | NCons nat (Nats (Succ nat)) + deriving (Eq, Show) + +{- Some examples of instances of this type: +NCons NZero NNil +NCons NZero (NCons (Succ NZero) NNil) +NCons NZero (NCons (Succ NZero) (NCons (Succ (Succ NZero)) NNil)) +-} + +{- The Stack type, analogous to Nat +data Stack a = Empty + | Push a (Stack a) + deriving (Eq, Show) +-} + +-- Decompose to separate types +data Empty a = Empty + deriving (Eq, Show) + +data Push stack a = Push a (stack a) + deriving (Eq, Show) + +-- Define a nested combining type +data Stacks stack a = SNil + | SCons (stack a) (Stacks (Push stack) a) + deriving (Eq, Show) + +{- Some examples: +SCons Empty SNil +SCons Empty (SCons (Push 'a' Empty) SNil) +SCons Empty (SCons (Push 'a' Empty) (SCons (Push 'a' (Push 'b' Empty)) SNil)) +-} + +{- Perfect Binary Leaf Trees +data Bush a = Leaf a + | Fork (Bush a) (Bush a) + deriving (Eq, Show) +-} + +-- Decomposed Perfect Binary Leaf Trees +data Leaf a = Leaf a + deriving (Eq, Show) +data Fork bush a = Fork (bush a) (bush a) + deriving (Eq, Show) + +{- Uses a zeroless 1-2 representation to eliminate overhead of numerical + nodes with no data. + + Counting: 1, 2, 11, 21, 12, 22, 111, 211, 121, 221, 112, 212, 122, 222 + + Note that only the first One constructor can directly store the + Leaf type; all others will use Fork^n Leaf, which guarantees that + our "Perfect" invariant holds. + + Each recursive instantiation of the type occurs with another level + of Fork wrapping the bush type; One uses that level directly, while + Two adds an extra Fork level to it. +-} +data RandomAccessList bush a + = Nil + | One (bush a) (RandomAccessList (Fork bush) a) + | Two (Fork bush a) (RandomAccessList (Fork bush) a) + deriving (Eq, Show) + +type IxSequence = RandomAccessList Leaf + +-- Consing to the front increments the number with a Leaf as data +cons :: a -> IxSequence a -> IxSequence a +cons a = incr (Leaf a) + +incr :: bush a + -> RandomAccessList bush a + -> RandomAccessList bush a +incr b Nil = One b Nil -- inc 0 = 1 +incr b1 (One b2 ds) = Two (Fork b1 b2) ds -- inc 1.. = 2.. +incr b1 (Two b2 ds) = One b1 (incr b2 ds) -- inc 2d.. = 1(inc d).. + +-- This is used to eliminate leading zeros +zero :: RandomAccessList (Fork bush) a + -> RandomAccessList bush a +zero Nil = Nil -- 0 = 0 +zero (One b ds) = Two b (zero ds) -- 01.. = 20. +zero (Two (Fork b1 b2) ds) = Two b1 (One b2 ds) -- 02.. = 21. + +-- Removing from the front decrements the number and removes a Leaf +front :: IxSequence a -> (a, IxSequence a) +front Nil = error "IxSequence empty" +front (One (Leaf a) ds) = (a, zero ds) -- dec 1.. = 0.. +front (Two (Fork (Leaf a) b) ts) = (a, One b ts) -- dec 2.. = 1.. + +fromList :: [a] -> IxSequence a +fromList = foldr cons Nil + +{- A recursive (top-down) solution +unleaf :: Leaf a -> [a] +unleaf (Leaf a) = [a] + +unfork :: (bush a -> [a]) -> Fork bush a -> [a] +unfork flatten (Fork l r) = flatten l ++ flatten r + +listify :: (bush a -> [a]) -> RandomAccessList bush a -> [a] +listify _ Nil = [] +listify flatten (One b ds) = flatten b ++ listify (unfork flatten) ds +listify flatten (Two b ds) = unfork flatten b ++ listify (unfork flatten) ds + +toList :: IxSequence a -> [a] +toList = listify unleaf +-} + +{- A recursive (top-down) solution with Type Classes +class Flatten bush where + flatten :: bush a -> [a] + +instance Flatten Leaf where + flatten (Leaf a) = [a] + +instance (Flatten bush) => Flatten (Fork bush) where + flatten (Fork l r) = flatten l ++ flatten r + +listify :: (Flatten bush) => RandomAccessList bush a -> [a] +listify Nil = [] +listify (One b ds) = flatten b ++ listify ds +listify (Two b ds) = flatten b ++ listify ds + +toList :: IxSequence a -> [a] +toList = listify +-} + +{- An iterative (bottom-up) solution; linear time bound -} +listify :: RandomAccessList bush a -> [bush a] +listify Nil = [] +listify (One b ds) = b : unforks (listify ds) +listify (Two b ds) = unforks (b : listify ds) + +unforks :: [Fork bush a] -> [bush a] +unforks [] = [] +unforks (Fork b1 b2 : ts) = b1 : b2 : unforks ts + +toList :: IxSequence a -> [a] +toList s = [a | Leaf a <- listify s] + +{- Examples: + +> fromList[1..5] +One (Leaf 1) + (Two (Fork (Fork (Leaf 2) (Leaf 3)) + (Fork (Leaf 4) (Leaf 5))) + Nil) + +12 = 1*1 + 2*2 = 1 + 4 = 5 + +> fromList [1..20] +Two (Fork (Leaf 1) (Leaf 2)) -- 0, 1 + (One (Fork (Leaf 3) (Leaf 4)) -- 2, 3 + (Two (Fork (Fork (Fork (Leaf 5) (Leaf 6)) -- 4, 11 + (Fork (Leaf 7) (Leaf 8))) + (Fork (Fork (Leaf 9) (Leaf 10)) + (Fork (Leaf 11) (Leaf 12)))) + (One (Fork (Fork (Fork (Leaf 13) (Leaf 14)) -- 12, 19 + (Fork (Leaf 15) (Leaf 16))) + (Fork (Fork (Leaf 17) (Leaf 18)) + (Fork (Leaf 19) (Leaf 20)))) + Nil))) + +2121 = 2*1 + 1*2 + 2*4 + 1*8 = 2 + 2 + 8 + 8 = 20 + +tree size at each rank is d*2^r + +-} + +{- +Searching: + +lookup 0 (One (Leaf x) _) = x +lookup 0 (One (Fork t1 t2) ds) = lookup 0 (One ) + +-} + +class FindB bush where + findB :: Int -> Int -> bush a -> a +instance FindB Leaf where + findB _ _ (Leaf x) = x +instance (FindB bush) => FindB (Fork bush) where + findB i s (Fork lb rb) + | i < s`div`2 = findB i (s`div`2) lb + | otherwise = findB i (s`div`2) rb + +find :: Int -> IxSequence a -> a +find = find' 0 + +find' :: (FindB bush) => Int -> Int + -> RandomAccessList bush a -> a +find' _ _ Nil = error "Not found" +find' r i (One b ds) + | i < 2^r = findB i (2^r) b + | otherwise = find' (i-2^r) (r+1) ds +find' r i (Two b ds) + | i < 2*2^r = findB i (2*2^r) b + | otherwise = find' (i-2*2^r) (r+1) ds diff --git a/Nested.hs b/Nested.hs new file mode 100644 index 0000000..dcde164 --- /dev/null +++ b/Nested.hs @@ -0,0 +1,31 @@ +module Nested where + +data Fork t = Fork t t + deriving (Show, Eq) + +data RandList t = Nil + | Zero (RandList (Fork t)) + | One t (RandList (Fork t)) + deriving (Show, Eq) + +cons :: a -> RandList a -> RandList a +cons x Nil = One x Nil +cons x1 (Zero ds) = One x1 ds +cons x1 (One x2 ds) = Zero (cons (Fork x1 x2) ds) + +front :: RandList a -> (a, RandList a) +front Nil = error "Empty RandList" +front (One x Nil) = (x, Nil) +front (One x ds) = (x, Zero ds) +front (Zero ds) = (x1, One x2 ds') + where (Fork x1 x2, ds') = front ds + +find :: Int -> RandList a -> a +find 0 Nil = error "Not found" +find 0 (One x _) = x +find i (One _ ds) = find (i-1) (Zero ds) +find i (Zero ds) = if i`mod`2 == 0 then x else y + where (Fork x y) = find (i`div`2) ds + +fromList :: [a] -> RandList a +fromList = foldr cons Nil diff --git a/Presn.md b/Presn.md new file mode 100644 index 0000000..6e1a866 --- /dev/null +++ b/Presn.md @@ -0,0 +1,1031 @@ +--- +title: Functional Data Structures +subtitle: Analysis and Implementation +author: Levi Pearson +... + +# Intro + +### Outline + +1. Basics of Functional Data Structures + * Immutability + * Persistent vs. Ephemeral + * Node Structure + * Data Sharing + +2. Analysis Techniques + * Amortization Overview + * Amortization with Persistence + +3. Design Techniques + * Lazy Rebuilding and Scheduling + * Numerical Representations + * Non-Uniform Structure + +4. Real-World Examples + * Zippers + * 2-3 Finger Trees + * Hash Array Mapped Tries + +### Motivation + +Functional data structures provide: + +* Relatively simple implementations +* Good worst-case performance bounds +* Persistence for free +* Ease of use in concurrent programs + +## Basics + +### Functional Data Structures? + +What do we mean by "Functional Data Structure"? + +* Built from immutable values +* Can be implemented in a purely functional language +* No in-place updates! + +Obviously not the data structures taught in Intro to Algorithms class... + +### Persistence + +Most imperative structures are *ephemeral*; updates destroy previous versions + +A data structure is *persistent* when old versions remain after updates + +* Developed in the context of imperative structures +* Imperative versions of persistent structures can be quite complex +* Adding persistence can change performance bounds +* Purely functional structures are automatically persistent! + +### Value of Persistence + +Why would we want persistence anyway? + +* Safe data sharing +* No locking required +* Restoration of previous states +* Interesting algorithms (e.g. in computational geometry) + +### Node-Based Structure + +Functional data structures share structural characteristics: + +* Built from *nodes* and *references* +* Nodes contain references: + * one or more references to other nodes + * one or more references to values +* A structure may be built from one or more *kinds* of node + +Examples: + +~~~ {.haskell} +data List a = Nil + | Cons a (List a) + +data Tree a = Leaf a + | Branch (Tree a) (Tree a) +~~~ + +### Data Sharing + +The fine-grained node structure has some benefits: + +* New versions of structures share the bulk of their data with old versions +* Copying on updates is limited to *nodes* rather than *values* +* Only the nodes on the path from the root to change point are copied. + +Also some drawbacks: + +* Extra overhead for references vs. in-place storage +* Locality of data can suffer + +# Analysis + +## Amortization + +### Structures Enable Algorithms + +* Data structures support *operations* on them +* Operations are implemented via *algorithms* +* Algorithms perform operations + +We bottom out at two primitive operations: + +* Construction of an ADT value +* Destructuring/Pattern Matching of an ADT value + +### Algorithm comparison + +We judge between algorithms based on analysis: + +* How many primitive operations are needed to complete the algorithm? +* How does the number of operations scale as the problem size grows? + +*Asymptotic analysis* gives an upper bound to the growth of operation + count as the size grows to infinity. + +### Worst Case + +Typical analysis is *worst-case*: + +* Find a function such that some constant times that function of the + input size is *always* greater than the actual count of operations + for any single operation. +* That function is a worst-case upper bound. +* The smallest upper bound is how we characterize the asymptotic + worst-case behavior. + +### Average Case + +Sometimes we consider *average-case*: + +* Find the upper bounds of all possible single operation cases. +* Model the statistical likelihood of the cases. +* Find the smallest upper bound of the weighted combination of cases. + +### Amortized + +An *amortized* analysis is different: + +* Find the upper bounds of all possible single operation cases. +* Consider the possible *sequences* of operations. +* Show that *expensive* operations always run infrequently enough to + distribute their cost among *cheap* operations + +### When To Amortize? + +A **worst-case** analysis is necessary when you have hard latency bounds, +e.g. real-time requirements. + +**Average** or **amortized** analysis can give a better sense of +*expected throughput* of an algorithm. + +**Amortized** analysis is useful when most operations are cheap, but +occasionally an expensive one must be performed. + +Examples: + +* Array doubling +* Rehashing +* Tree balancing + +Developed in the context of *ephemeral* data structures. + +### The Persistence Wrench + +An ephemeral structure has a single logical future: Once an operation +is performed, there's no going back. + +The amortization accounting process is straightforward: + +* Assign an extra *credit* cost to cheap cases. +* Prove that an expensive case only occurs after sufficient credit + has accumulated to pay for it. + +Persistent structures may have *multiple* logical futures. + +Consider a tree very close to needing an expensive rebalancing: + +~~~ {.Haskell} +-- tr1 refers to the teetering tree +let tr2 = insert foo tr1 -- causes a rebalance: expensive +let tr3 = insert foo tr1 -- causes another rebalance +let tr4 = insert foo tr1 -- causes yet another rebalance +~~~ + +How to account for that? You can't spend your savings more than once. + +## Functional Amortization + +### Tackling Persistence with Laziness + +Chris Okasaki discovered a way around the multiple-future problem: + +* Delay expensive operations via lazy evaluation +* Account for expensive operations via *debt* +* Savings can only be spent once, but over-paying on debt is ok + +This works because: + +* A suspended computation places a *thunk* in the new structure +* When a thunk is *forced*, it performs its calculation +* The value replaces the thunk *in place* + +The previous example, with lazy evaluation: + +~~~ {.Haskell} +-- tr1 refers to the teetering tree +let tr2 = insert foo tr1 -- creates a rebalancing thunk: cheap +let tr3 = insert bar tr2 -- forces rebalancing: expensive +let tr4 = insert baz tr2 -- sees previous rebalancing +~~~ + +### Outline of Amortized Lazy Analysis + +Like a layaway plan: + +1. Find a computation you can't afford to execute +2. Create a suspension for it, assign a value proportional to its shared cost +3. Pay the debt off a little at a time +4. When the debt is paid off, the suspension may be executed + +Consider each logical future as if it were the *only one*. + +Each future must fully pay before invoking any suspension. + +### Restoring Worst-Case Bounds + +Sometimes the latency budget requires a tight worst-case bound on all +operations. + +Worst-case bounds can be restored via *scheduling* + +* Execute suspended computations in bounded steps +* Execute the steps as they are paid off + +This has a small space and complexity overhead, but bounds the latency. + +### Using Analysis Results + +Sure that sounds like *fun*, but what's it good for? + +Different operations over structures (e.g. head-insert, tail-insert, +search) are necessary to implement higher-level algorithms. + +Choose the data structure that: + +* Provides good bounds for the operations your algorithm needs +* Provides the latency guarantees you need + +Other factors as well, of course + +# Implementation Patterns + +## Lazy Rebuilding + +### Queues, Take 1 + +The essence of the Queue: + +* `insert` adds an element at the end of the queue +* `remove` takes an element from the front of the queue + +How to represent it? What about a List? + +* Our `remove` operation is $O \left({1}\right)$ +* But `insert` is $O \left({n}\right)$! + +The standard trick: A pair of lists, `left` and `right`. + +* The `left` list holds the head of the queue +* The `right` list holds the tail, stored in reversed order + +When `left` becomes empty, reverse `right` and put it in `left`. + +### Queue Implementation + +~~~ {.Haskell} +module Queue1 where +import Prelude hiding (reverse, length) +import StrictList (head, tail, reverse) +data Queue a = Q { left :: StrictList a, leftLen :: !Int + , right :: StrictList a, rightLen :: !Int + } deriving (Show, Eq) +empty :: Queue a +empty = Q Empty 0 Empty 0 +length :: Queue a -> Int +length (Q _ ll _ rl) = ll + rl +insert :: a -> Queue a -> Queue a +insert e (Q l ll r rl) = Q l ll (e :$ r) (rl + 1) +remove :: Queue a -> (a, Queue a) +remove (Q Empty _ r rl) = remove (Q (reverse r) rl Empty 0) +remove (Q l ll r rl) = (head l, Q (tail l) (ll - 1) r rl) +~~~ + +### Queue Analysis + +Most operations are trivial: + +* `empty`: $O \left({1}\right)$ +* `insert`: $O \left({1}\right)$ +* `length`: $O \left({1}\right)$ + +`remove` will take some thought + +~~~ {.Haskell} +remove :: Queue a -> (a, Queue a) +remove (Q Empty _ r rl) = remove (Q (reverse r) rl Empty 0) +remove (Q l ll r rl) = (head l, Q (tail l) (ll - 1) r rl) +~~~ + +* When `left` is non-empty, `remove` takes $O \left({1}\right)$ +* But when empty, it takes $O \left({n}\right)$ due to the `reverse` + +### Amortizing the Queue + +* The `reverse` operation takes place only when `left` is empty + +* `left` is empty when: + * The entire queue is empty + * `n` values have been `insert`ed without a `remove` + * All previously reversed values have been `remove`d. + +* A `remove` after `n` `inserts` will take `n` operations. + +* Those `n` values must be `remove`d before another `reverse` can + occur. + +* So, assigning extra credit to each `insert` should offset the linear + cost of the `reverse`... + +### Not So Fast + +~~~ {.Haskell} +let q1 = foldr insert empty "Abracadabra" +let q2 = remove q1 -- Causes a reverse +let q3 = remove q1 -- Causes the same reverse +let q4 = remove q1 -- And again... +~~~ + +Oops. Persistence screws up our amortization scheme. + +Time to make it lazy. + +### Queue Take 2 - Paired Lazy Lists + +Goal: Reverse `right` *incrementally* before `left` becomes empty. + +Means: Periodically append `reverse right` to `left` using *guarded recursion*. + +~~~ {.Haskell} +-- rot l r [] = l ++ reverse r +rot [] r a = head r : a +rot l r a = head l : rot (tail l) (tail r) (head r : a) +~~~ + +This evaluates up to the (`:`) operation per call to `head` on the +result. + +### Queue Take 2 - Paired Lazy Lists (cont.) + +Forcing a tail: + +* performs one step of the `++` and one step of the `reverse`. +* may also force another tail; but only $\log n$ times. + +So worst case improves from $O\left({n}\right)$ to $O\left({\log n}\right)$. + +If we never let `right` be longer than `left`, by the time `rotate` +steps through the original `left`, the `reverse` will be complete. + +Amortized cost is therefore $O \left({1}\right)$ again, and safe for +persistence. + +### Full Implementation + +~~~ {.Haskell} +module Queue2 where +import Prelude hiding (length) +data Queue a = Q { left :: [a], leftLen :: !Int + , right :: [a], rightLen :: !Int + } deriving (Show, Eq) +empty :: Queue a +empty = Q [] 0 [] 0 +length :: Queue a -> Int +length (Q _ ll _ rl) = ll + rl +insert :: a -> Queue a -> Queue a +insert e (Q l ll r rl) = makeQ (Q l ll (e : r) (rl + 1)) +remove :: Queue a -> (a, Queue a) +remove (Q l ll r rl) = (head l, makeQ (Q (tail l) (ll-1) r rl)) +makeQ :: Queue a -> Queue a -- Preserve invariant: |R| <= |L| +makeQ q@(Q l ll r rl) + | rl <= ll = q + | otherwise = Q (rot l r []) (ll + rl) [] 0 +rot :: [a] -> [a] -> [a] -> [a] +rot [] r a = head r : a +rot l r a = head l : rot (tail l) (tail r) (head r : a) +~~~ + +## Numerical Representations + +### Arithmetic + +> Many people regard arithmetic as a trivial thing that children learn +> and computers do, but we will see that arithmetic is a fascinating +> topic with many interesting facets. It is important to make a +> thorough study of efficient methods for calculating with numbers, +> since arithmetic underlies so many computer applications. + +Donald E. Knuth, from "The Art of Computer Programming" + +### Nat and List + +Inductive definition of Naturals: + +~~~ {.Haskell} +data Nat = Zero + | Succ Nat +~~~ + +Inductive definition of Lists: + +~~~ {.Haskell} +data List a = Nil + | Cons a (List a) +~~~ + +Functions on `Nat` and `List` are analogous too: + +* `inc` of a `Nat` is like `cons` on a `List` +* `dec` of a `Nat` is like removing `head` of a `List` +* adding two `Nat`s is like combining two `List`s + +Lists are *numbers* that contain *values*. + +`Nat` and `List` implement a *unary* number system. + +### Positional Numbers - Definitions + +A *positional number* is written as a series of digits: +$$ +b_0 \dots b_{m-1} +$$ +The digit $b_0$ is the *least significant digit* + +The digit $b_{m-1}$ is the *most significant digit* + +Each digit $b_i$ has a *weight* $w_i$, so the value is: +$$ +\sum\limits_{i=0}^{m-1} b_i w_i +$$ +A number is *base* $B$ if $w_i=B^i$ and $D_i=\left\{0,\dots,B-1\right\}$. + +Usually weights are increasing sequences of powers and $D_i$ is the +same for every digit. + +### Redundancy + +A number system is *redundant* if there is more than one way to +represent some numbers. + +Take the system where $w_i = 2^i$ and $D_i = \left\{0,1,2\right\}$ + +Decimal $13$ can be written: + +* $1011$ +* $1201$ +* $122$ + +### Dense and Sparse Representations + +*Dense* representations are simple lists/sequences of digits, + including $0$. + +*Sparse* representations exclude $0$ digits, so must include either: + +* *rank* (the index in the sequence) or +* *weight* + +### Dense Example + +Straightforward list of binary digits, but least significant bit first + +~~~ {.Haskell} +data Digit = Zero | One +type DenseNat = [Digit] -- increasing order of significance +inc [] = [One] +inc (Zero : ds) = One : ds +inc (One : ds) = Zero : inc ds -- carry +dec [One] = [] +dec (One : ds) = Zero : ds +dec (Zero : ds) = One : dec ds -- borrow +add ds [] = ds +add [] ds = ds +add (d : ds1) (Zero : ds2) = d : add ds1 ds2 +add (Zero : ds1) (d : ds2) = d : add ds1 ds2 +add (One : ds1) (One : ds2) = Zero : inc (add ds1 ds2) -- carry +~~~ + +### Sparse Example + +Store a list of $d_i w_i$ values, so `Zero` digits won't appear + +~~~ {.Haskell} +type SparseNat = [Int] -- increasing list of powers of 2 +carry w [] = [w] +carry w ws@(w' : rest) + | w < w' = w : ws + | otherwise = carry (2 * w) rest +borrow w ws@(w' : rest) + | w < w' = rest + | otherwise = w : borrow (2 * w) ws +inc ws = carry 1 ws +dec ws = borrow 1 ws +add ws [] = ws +add [] ws = ws +add m@(w1 : ws1) n@(w2 : ws2) + | w1 < w2 = w1 : add ws1 n + | w2 < w1 = w2 : add m ws2 + | otherwise = carry (2 * w1) (add ws1 ws2) +~~~ + +### Binary Numerical Representations + +We can transform a positional number system into a data structure: + +* Replace sequence of digits with a sequence of trees +* Number and size of trees is determined by representation of numbers + +For example: + +* The binary representation of $73$ is $1001001$ +* A collection of size $73$ would contain three trees: + * size $1$ + * size $8$ + * size $64$ + +### Complete Binary Leaf Trees + +A binary leaf tree is either: + +* a `Leaf`, containing a value; or +* a `Fork`, containing two binary leaf trees + +A *complete* binary leaf tree has all `Leaf` nodes at the same rank. + +~~~ {.Haskell} +data BLT a = Leaf a + | Fork (BLT a) (BLT a) +~~~ + +### Binary Random-Access List + +Let's combine a dense binary representation with a `BLT` + +First, let's annotate the `BLT`s with their size: + +~~~ {.Haskell} +data BLT a = Leaf a + | Fork !Int (BLT a) (BLT a) deriving (Show) +~~~ + +Recall the types we used for `DenseNat`: + +~~~ {.Haskell} +data Digit = Zero | One + deriving (Show, Ord, Eq) +type DenseNat = [Digit] -- increasing order of significance +~~~ + +We'll adapt them to store a `BLT` on `One` digits: + +~~~ {.Haskell} +data Digit a = Zero | One (BLT a) + deriving (Show, Ord, Eq) +type RandList a = [Digit a] +~~~ + +### Some RandList Operations + +We had an `inc` operation for `Nat` + +~~~ {.Haskell} +inc :: DenseNat -> DenseNat +inc [] = [One] +inc (Zero : ds) = One : ds +inc (One : ds) = Zero : inc ds -- carry +~~~ + +We adapt it to `cons` for `RandList` + +~~~ {.Haskell} +cons :: a -> RandList a -> RandList a +cons x ts = insTree (Leaf x) ts + +insTree t [] = [One t] +insTree t (Zero : ts) = One t : ts +insTree t1 (One t2 : ts) = Zero : insTree (link t1 t2) ts + where link t1 t2 = Fork (size t1 + size t2) t1 t2 + size (Leaf _) = 1 + size (Fork n _ _ ) = n +~~~ + +### Some RandList Operations + +We had a `dec` operation for `Nat` + +~~~ {.Haskell} +dec :: DenseNat -> DenseNat +dec [One] = [] +dec (One : ds) = Zero : ds +dec (Zero : ds) = One : dec ds -- borrow +~~~ + +We adapt it to `tail` for `RandList` + +~~~ {.Haskell} +tail :: RandList a -> RandList a +tail ts = ts' where (_, ts') = borrowTree ts + +borrowTree [One t] = (t, [] ) +borrowTree (One t : ts) = (t, Zero : ts ) +borrowTree (Zero : ts) = (t1, One t2 : ts') + where (Fork _ t1 t2, ts') = borrowTree ts +~~~ + +### Properties of RandList + +We have arranged an $n$-length sequence as a $\log\left(n+1\right)$ +length list of trees of $\log n$ depth. + +Operations `cons`, `head`, and `tail` perform $O\left({1}\right)$ +work per digit, so their worst-case is $O\left({\log n}\right)$ + +Operations `lookup` and `update` take at most $O\left({\log n}\right)$ +to find the right tree and $O\left({\log n}\right)$ to find the right +element, for a total of $O\left({\log n}\right)$ worst-case time. + +For mostly-random access, this will be a significant improvement over +`List`'s $O\left({n}\right)$ worst-case performance. + +### Skew Binary Numbers + +Our standard binary representation's performance was hindered by +*cascading carries*. + +In the **skew binary number** representation: + +* The weight $w_i$ of the $i$th digit is $2^{i+1}-1$ +* The set of digits $D_i = \left\{0,1,2\right\}$ +* Only the lowest non-$0$ digit may be $2$ (for uniqueness) + +The number $92$ is $002101$ in this representation: + +\begin{align*} +\sum\limits_{i=0}^{5} b_i w_i &= +0+0+(2^{2+1}-1)\times{2}+(2^{3+1}-1)+0+(2^{5+1}-1) \\ +&= 14+15+63 \\ +&= 92 +\end{align*} + +### Skew Binary Number Operations + +What makes skew binary representation useful: + +* $w_i$ is $2^{i+1}-1$ +* $1+2\left(2^{i+1}-1\right) = 2^{i+2}-1$ + +This means that when the lowest non-$0$ digit is $2$, the `inc` +operation: + +* resets the $2$ to $0$ +* increments the next digit from $0$ to $1$ *or* $1$ to $2$ + +If there is no $2$, just increment the lowest digit from $0$ to $1$ or +from $1$ to $2$. These only take $O\left({1}\right)$ time! + +We must use a *sparse* representation to keep $O\left({1}\right)$ +access to the first non-$0$ digit. + +### Skew Binary Random Access List + +~~~ {.Haskell} +data BTree a = Leaf a | Node a (BTree a) (BTree a) +data Digit a = Digit !Int (BTree a) +type RList a = [Digit a] +cons x (Digit w1 t1 : Digit w2 t2 : ts') + | w1 == w2 = Digit (1 + w1 + w2) (Node x t1 t2) : ts' +cons x ts = Digit 1 (Leaf x) : ts +head (Digit 1 (Leaf x) : _) = x +head (Digit _ (Node x _ _) : _) = x +tail (Digit 1 (Leaf _) : ts) = ts +tail (Digit w (Node _ t1 t2) : ts) = + Digit w' t1 : Digit w' t2 : ts + where w' = w `div` 2 +lookup (Digit w t : ts) i + | i < w = lookupTree w t i + | otherwise = lookup ts (i - w) +lookupTree 1 (Leaf x) 0 = x +lookupTree _ (Node x _ _) 0 = x +lookupTree w (Node _ t1 t2) i + | i < w' = lookupTree w' t1 (i-1) + | otherwise = lookupTree w' t2 (i-1-w') + where w' = w `div` 2 +~~~ + +## Non-Uniform Structure + +### Random Access Lists + +Here is our first Random Access List type: + +~~~ {.Haskell} +data BLT a = Leaf a + | Fork (BLT a) (BLT a) +data Digit a = Zero | One (BLT a) +type RandList a = [Digit a] +~~~ + +It is possible to construct invalid members of this type: + +~~~ {.Haskell} +[One (Fork (Leaf 1) (Fork (Leaf 2) (Leaf 3)))] +~~~ + +Constraint violations: + +* All trees should be *complete* +* Incorrect height for its numerical representation + +### Random Access Lists (cont.) + +We can encode the fact that we only want *complete* trees of the +correct height by combining the structures and "pairing" the type +parameter on each recurrence: + +~~~ {.Haskell} +data Fork t = Fork t t -- this is just like (t,t) +data RandList t = Nil + | Zero (RandList (Fork t)) + | One t (RandList (Fork t)) +~~~ + +This is known as *polymorphic recursion* or *non-uniform data types* + +Some examples: + +~~~ {.Haskell} +Nil +One 1 Nil +Zero (One (Fork 1 2) Nil) +Zero (Zero (One (Fork (Fork 1 2) (Fork 3 4)) Nil)) +~~~ + +### Random Access List Implementation + +~~~ {.Haskell} +data Fork t = Fork t t +data RandList t = Nil + | Zero (RandList (Fork t)) + | One t (RandList (Fork t)) +cons :: a -> RandList a -> RandList a +cons x Nil = One x Nil +cons x1 (Zero ds) = One x1 ds +cons x1 (One x2 ds) = Zero (cons (Fork x1 x2) ds) +front :: RandList a -> (a, RandList a) +front Nil = error "Empty RandList" +front (One x Nil) = (x, Nil) +front (One x ds) = (x, Zero ds) +front (Zero ds) = (x1, One x2 ds') + where (Fork x1 x2, ds') = front ds +find :: Int -> RandList a -> a +find 0 Nil = error "Not found" +find 0 (One x _) = x +find i (One _ ds) = find (i-1) (Zero ds) +find i (Zero ds) = if i`mod`2 == 0 then x else y + where (Fork x y) = find (i`div`2) ds +~~~ + +### Other Numerical Representations + +We have only scratched the surface of numerical representations + +* Different tree structures at digits + * complete binary trees + * complete leaf trees + * binomial trees + * pennants + +* Different number systems + * zeroless binary + * redundant binary + * skew binary + * segmented binary + * higher bases + * fibonacci numbers + * factorials - random permutation algorithm + +# Real-World Structures + +## Zippers + +### What is a Zipper? + +Imagine you have a large, complex data structure... + +A zipper is a *complementary* structure that tracks a navigation path +through the primary structure. + +It contains: + +* a *focus point* that refers to the current point in the navigation +* the *path* of the traversal from the root to focus point + +Operators on a zipper: + +* navigation: `goUp`, `goDown`, `goLeft`, searching, etc. +* update: `replace` the focus point, `insertLeft`, etc. + +Traversing a structure with a zipper turns it *inside-out*, and moving +back to the head pulls it back together, like a *zipper*. + +### A Zipper on Rose Trees + +First, a tree structure, possibly representing a document: + +~~~ {.Haskell} +data Tree a = Item a | Section [Tree a] +~~~ + +We need a structure to record the path; we need to track steps up and +down the structure as well as the position in the list + +~~~ {.Haskell} +data Path a = Top | Node [Tree a] (Path a) [Tree a] +~~~ + +Finally, we need to combine the path with the focus point, which will +just be a subtree of the tree we are traversing: + +~~~ {.Haskell} +data Location a = Loc (Tree a) (Path a) +~~~ + +To build a zipper, we just combine the root of the tree with the `Top` +path constructor in a `Location` + +### A Zipper on Rose Trees - Navigation + +Basic navigation (error checking elided): + +~~~ {.Haskell} +data Path a = Top | Node [Tree a] (Path a) [Tree a] +data Location a = Loc (Tree a) (Path a) + +goDown (Loc (Section (t1:trees)) p) = + Loc t1 (Node [] p trees) + +goLeft (Loc t (Node (l:left) up right)) = + Loc l (Node left up (t:right)) + +goRight (Loc t (Node left up (r:right))) = + Loc r (Node (t:left) up right) + +goUp (Loc t (Node left up right)) = + Loc (Section (reverse left ++ t:right)) up +~~~ + +### A Zipper on Rose Trees - Update + +Modifying at the current position: + +~~~ {.Haskell} +data Path a = Top | Node [Tree a] (Path a) [Tree a] +data Location a = Loc (Tree a) (Path a) + +replace (Loc _ p) t = Loc t p + +insertRight (Loc t (Node left up right)) r = + Loc t (Node left up (r:right)) + +insertLeft (Loc t (Node left up right)) l = + Loc t (Node (l:left) up right) + +insertDown (Loc (Section sons) p) t1 = + Loc t1 (Node [] p sons) +~~~ + +### Crazy Zipper Facts + +The zipper structure for a given data structure can be derived +mechanically from its algebraic type signature. + +The zipper type signature is the *first derivative* (in the sense you +learned in Calculus) of its parent signature. + +Zippers are *one-hole contexts* for their parent type. The *focus +point* is the hole. + +Zippers can also be represented by *delimited continuations* of a +traversing process. These can be used with any `Traversable` without +knowing its concrete type at all! + +Continuation-capturing traversal functions invert the control +structure in a similar manner to how taking the derivative of the data +type inverts its data structure. + +## 2-3 Finger Trees + +### Not this time + +I lied. There's no time to talk about these. Sorry! + +But you should be prepared to read this paper about them now: + +http://www.cs.ox.ac.uk/people/ralf.hinze/publications/FingerTrees.pdf + +## Hash Array Mapped Tries + +### Downsides To This Stuff + +Way back at the beginning, I mentioned some downsides: + +* Extra overhead for references vs. in-place storage +* Locality of data can suffer + +These are constant-factor issues, but with the advances in the cache +structure of machines, they are an increasingly *large* constant +factor. + +Bad memory access patterns can make memory fetches take *orders of +magnitude* longer. + +The solution is to use *bigger* chunks of data; do more copying and +less pointer chasing. + +### Hash Array Mapped Trie + +Key features: + +* Instead of a branching factor of 2, use a factor of 32 or so. +* Each node (aside from leaf nodes) contains up to 32 elements. +* $n$ bits of the key index the array at the next level +* Bit population count is used to represent sparse arrays + +~~~ {.Haskell} +type Key = Word +type Bitmap = Word +data HAMT a = Empty + | BitmapIndexed !Bitmap !(Vector (HAMT a)) + | Leaf !Key a + | Full !(Vector (HAMT a)) +~~~ + +### Hash Array Mapped Trie - More Explanation + +A *trie* is a tree with a branching factor $k$, where $k$ is the size +of alphabet of key elements. + +A trie for caseless English words would have branching factor 26. + +The first symbol of the key determines the index to the first tree +level; second symbol to the next, etc. + +HAMT divides a key (vector index or hashed key) into bit fields: + +* a branching factor of 32 uses 5-bit fields +* A 32-bit key divides into 6 fields, with 2 bits remaining + +HAMT stores children sparsely: Bitmap determines child presence, +vector stores only present children. + +Clever bit-level math determines child vector index from bitmap. + +### Hash Array Mapped Trie - Lookup + +~~~ {.Haskell} +maskIdx b m = popCount (b .&. (m - 1)) +mask k s = shiftL 1 (subkey k s) + +subkeyMask = 1 `shiftL` bitsPerSubkey - 1 +subkey k s = fromIntegral $ shiftR k s .&. subkeyMask + +lookup :: Key -> HAMT a -> Maybe a +lookup k t = lookup' k 0 t +lookup' k s t + = case t of + Empty -> Nothing + Leaf kx x + | k == kx -> Just x + | otherwise -> Nothing + BitmapIndexed b v -> + let m = mask k s in + if b .&. m == 0 + then Nothing + else lookup' k (s+subkeyWidth) (v ! maskIdx b m) + Full v -> lookup' k (s+subkeyWidth) (v ! subkey k s) +~~~ + +# Conclusion + +### Functional Data Structure Fundamentals + +Functional data structures provide: + +* Safety via Immutability +* Efficient Persistence +* Clear & Simple Implementation + +Many are based around some core ideas: + +* Numerical Representations +* Lazy Amortization +* Non-Uniform Type Structure + +The field is still developing! + +Studying Functional Data Structures will teach you a lot about +functional languages. diff --git a/Queue1.hs b/Queue1.hs new file mode 100644 index 0000000..6aa27fd --- /dev/null +++ b/Queue1.hs @@ -0,0 +1,22 @@ +module Queue1 where + +import StrictList as L + +data Queue a = Q { left :: StrictList a + , leftLen :: !Int + , right :: StrictList a + , rightLen :: !Int + } deriving (Show, Eq) + +empty :: Queue a +empty = Q Empty 0 Empty 0 + +length :: Queue a -> Int +length (Q _ ll _ rl) = ll + rl + +insert :: a -> Queue a -> Queue a +insert e (Q l ll r rl) = Q l ll (e :$ r) (rl + 1) + +remove :: Queue a -> (a, Queue a) +remove (Q Empty _ r rl) = remove (Q (L.reverse r) rl Empty 0) +remove (Q l ll r rl) = (L.head l, Q (L.tail l) (ll - 1) r rl) diff --git a/Queue2.hs b/Queue2.hs new file mode 100644 index 0000000..b493282 --- /dev/null +++ b/Queue2.hs @@ -0,0 +1,33 @@ +module Queue2 where + +import Prelude hiding (length) + +-- Invariant: |R| <= |L| +-- Invariant is preserved via the makeQ function + +data Queue a = Q { left :: [a] + , leftLen :: !Int + , right :: [a] + , rightLen :: !Int + } deriving (Show, Eq) + +empty :: Queue a +empty = Q [] 0 [] 0 + +length :: Queue a -> Int +length (Q _ ll _ rl) = ll + rl + +insert :: a -> Queue a -> Queue a +insert e (Q l ll r rl) = makeQ (Q l ll (e : r) (rl + 1)) + +remove :: Queue a -> (a, Queue a) +remove (Q l ll r rl) = (head l, makeQ (Q (tail l) (ll - 1) r rl)) + +makeQ :: Queue a -> Queue a +makeQ q@(Q l ll r rl) + | rl <= ll = q + | otherwise = Q (rot l r []) (ll + rl) [] 0 + +rot :: [a] -> [a] -> [a] -> [a] +rot [] r a = head r : a +rot l r a = head l : rot (tail l) (tail r) (head r : a) diff --git a/Queue2.txt b/Queue2.txt new file mode 100644 index 0000000..1744f38 --- /dev/null +++ b/Queue2.txt @@ -0,0 +1,55 @@ +--Load Queue2.hs - lazy rebuilding queue +:l Queue2.hs + +--Start ghc-vis +:vis + +let x = empty +:view x + +--Insert a value - view new queue; we already triggered the reverse +let y = insert 1 x +:view y + +--Remove the value - forces the evaluation in y, but we discard the new tree +let (z,_) = remove y +print z +:update + +--Add another element to y; now there's one in the right list +let a = insert 2 y +:view a + +--Add one more, and we trigger the swap again +let b = insert 3 a +:view b + +--We can add 3 more and still not swap +let c = insert 6 $ insert 5 $ insert 4 b +:view c + +--One more, and now we trigger the swap +let d = insert 7 c +:view d + +--Let's remove one by one and see how evaluation progresses +let (e,f) = remove d +print e +:view f + +let (g,h) = remove f +print g +:view h + +let (i,j) = remove h +print i +:view j + +let (k,l) = remove j +print k +:view l + +let (m,n) = remove l +print m +:view n + diff --git a/Queue3.hs b/Queue3.hs new file mode 100644 index 0000000..3c7cd23 --- /dev/null +++ b/Queue3.hs @@ -0,0 +1 @@ +module Queue3 where diff --git a/RB1.hs b/RB1.hs new file mode 100644 index 0000000..0a8987b --- /dev/null +++ b/RB1.hs @@ -0,0 +1,81 @@ +module RedBlack1 where + +{- Version 1, 'untyped' -} +data Color = R | B deriving Show +data RB a = E | T Color (RB a) a (RB a) deriving Show + +{- Insertion and membership test as by Okasaki -} +insert :: Ord a => a -> RB a -> RB a +insert x s = + T B a z b + where + T _ a z b = ins s + ins E = T R E x E + ins s'@(T B a' y b') + | xy = balance a' y (ins b') + | otherwise = s' + ins s'@(T R a' y b') + | xy = T R a' y (ins b') + | otherwise = s' + +member :: Ord a => a -> RB a -> Bool +member _ E = False +member x (T _ a y b) + | x < y = member x a + | x > y = member x b + | otherwise = True + +{- balance: first equation is new, + to make it work with a weaker invariant -} +balance :: RB a -> a -> RB a -> RB a +balance (T R a x b) y (T R c z d) = T R (T B a x b) y (T B c z d) +balance (T R (T R a x b) y c) z d = T R (T B a x b) y (T B c z d) +balance (T R a x (T R b y c)) z d = T R (T B a x b) y (T B c z d) +balance a x (T R b y (T R c z d)) = T R (T B a x b) y (T B c z d) +balance a x (T R (T R b y c) z d) = T R (T B a x b) y (T B c z d) +balance a x b = T B a x b + +{- deletion a la SMK -} +delete :: Ord a => a -> RB a -> RB a +delete x t = + case del t of {T _ a y b -> T B a y b; _ -> E} + where + del E = E + del (T _ a y b) + | xy = delformRight a y b + | otherwise = app a b + delformLeft a@(T B _ _ _) y b = balleft (del a) y b + delformLeft a y b = T R (del a) y b + delformRight a y b@(T B _ _ _) = balright a y (del b) + delformRight a y b = T R a y (del b) + +balleft :: RB a -> a -> RB a -> RB a +balleft (T R a x b) y c = T R (T B a x b) y c +balleft bl x (T B a y b) = balance bl x (T R a y b) +balleft bl x (T R (T B a y b) z c) = T R (T B bl x a) y (balance b z (sub1 c)) + +balright :: RB a -> a -> RB a -> RB a +balright a x (T R b y c) = T R a x (T B b y c) +balright (T B a x b) y bl = balance (T R a x b) y bl +balright (T R a x (T B b y c)) z bl = T R (balance (sub1 a) x b) y (T B c z bl) + +sub1 :: RB a -> RB a +sub1 (T B a x b) = T R a x b +sub1 _ = error "invariance violation" + +app :: RB a -> RB a -> RB a +app E x = x +app x E = x +app (T R a x b) (T R c y d) = + case app b c of + T R b' z c' -> T R(T R a x b') z (T R c' y d) + bc -> T R a x (T R bc y d) +app (T B a x b) (T B c y d) = + case app b c of + T R b' z c' -> T R(T B a x b') z (T B c' y d) + bc -> balleft a x (T B bc y d) +app a (T R b x c) = T R (app a b) x c +app (T R a x b) c = T R a x (app b c) diff --git a/RB2.hs b/RB2.hs new file mode 100644 index 0000000..136b168 --- /dev/null +++ b/RB2.hs @@ -0,0 +1,179 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE UndecidableInstances #-} + +module RedBlack2 where + +data Unit a = E deriving Show +type Tr t a = (t a,a,t a) +data Red t a = C (t a) | R (Tr t a) + +instance (Show (t a), Show a) => Show (Red t a) + where showsPrec _ (C t) = shows t + showsPrec _ (R(a,b,c)) = + ("R("++) . shows a . (","++) . shows b . (","++) . shows c . (")"++) + +data AddLayer t a = B(Tr(Red t) a) deriving Show + +data RB t a = Base (t a) | Next (RB (AddLayer t) a) + +instance (Show (t a),Show a) => Show (RB t a) + where + show (Base t) = show t + show (Next t) = show t + +type Tree a = RB Unit a +empty :: Tree a +empty = Base E + +type RR t a = Red (Red t) a +type RL t a = Red (AddLayer t) a + +member :: Ord a => a -> Tree a -> Bool +member x t = rbmember x t (const False) + +rbmember :: Ord a => a -> RB t a -> (t a->Bool) -> Bool +rbmember _ (Base t) m = m t +rbmember x (Next u) m = rbmember x u (bmem x m) + +bmem :: Ord a => a -> (t a->Bool) -> AddLayer t a -> Bool +bmem x m (B(l,y,r)) + | xy = rmem x m r + | otherwise = True + +rmem :: Ord a => a -> (t a->Bool) -> Red t a->Bool +rmem _ m (C t) = m t +rmem x m (R(l,y,r)) + | xy = m r + | otherwise = True + +insert :: Ord a => a -> Tree a -> Tree a +insert = rbinsert + +class Insertion t where ins :: Ord a => a -> t a -> Red t a +instance Insertion Unit where ins x E = R(E,x,E) + +rbinsert :: (Ord a,Insertion t) => a -> RB t a -> RB t a +rbinsert x (Next t) = Next (rbinsert x t) +rbinsert x (Base t) = blacken(ins x t) + +blacken :: Red t a -> RB t a +blacken (C u) = Base u +blacken (R(a,x,b)) = Next(Base(B(C a,x,C b))) + +balanceL :: RR t a -> a -> Red t a -> RL t a +balanceL (R(R(a,x,b),y,c)) z d = R(B(C a,x,C b),y,B(c,z,d)) +balanceL (R(a,x,R(b,y,c))) z d = R(B(a,x,C b),y,B(C c,z,d)) +balanceL (R(C a,x,C b)) z d = C(B(R(a,x,b),z,d)) +balanceL (C a) x b = C(B(a,x,b)) + +balanceR :: Red t a -> a -> RR t a -> RL t a +balanceR a x (R(R(b,y,c),z,d)) = R(B(a,x,C b),y,B(C c,z,d)) +balanceR a x (R(b,y,R(c,z,d))) = R(B(a,x,b),y,B(C c,z,C d)) +balanceR a x (R(C b,y,C c)) = C(B(a,x,R(b,y,c))) +balanceR a x (C b) = C(B(a,x,b)) + +instance Insertion t => Insertion (AddLayer t) where + ins x t@(B(l,y,r)) + | xy = balance(C l) y (ins x r) + | otherwise = C t +instance Insertion t => Insertion (Red t) where + ins x (C t) = C(ins x t) + ins x t@(R(a,y,b)) + | xy = R(C a,y,ins x b) + | otherwise = C t + +balance :: RR t a -> a -> RR t a -> RL t a +balance (R a) y (R b) = R(B a,y,B b) +balance (C a) x b = balanceR a x b +balance a x (C b) = balanceL a x b + +class Append t where app :: t a -> t a -> Red t a + +instance Append Unit where app _ _ = C E + +instance Append t => Append (AddLayer t) where + app (B(a,x,b)) (B(c,y,d)) = threeformB a x (appRed b c) y d + +threeformB :: Red t a -> a -> RR t a -> a -> Red t a -> RL t a +threeformB a x (R(b,y,c)) z d = R(B(a,x,b),y,B(c,z,d)) +threeformB a x (C b) y c = balleftB (C a) x (B(b,y,c)) + +appRed :: Append t => Red t a -> Red t a -> RR t a +appRed (C x) (C y) = C(app x y) +appRed (C t) (R(a,x,b)) = R(app t a,x,C b) +appRed (R(a,x,b)) (C t) = R(C a,x,app b t) +appRed (R(a,x,b))(R(c,y,d)) = threeformR a x (app b c) y d + +threeformR:: t a -> a -> Red t a -> a -> t a -> RR t a +threeformR a x (R(b,y,c)) z d = R(R(a,x,b),y,R(c,z,d)) +threeformR a x (C b) y c = R(R(a,x,b),y,C c) + +balleft :: RR t a -> a -> RL t a -> RR (AddLayer t) a +balleft (R a) y c = R(C(B a),y,c) +balleft (C t) x (R(B(a,y,b),z,c)) = R(C(B(t,x,a)),y,balleftB (C b) z c) +balleft b x (C t) = C (balleftB b x t) + +balleftB :: RR t a -> a -> AddLayer t a -> RL t a +balleftB bl x (B y) = balance bl x (R y) + +balright :: RL t a -> a -> RR t a -> RR (AddLayer t) a +balright a x (R b) = R(a,x,C(B b)) +balright (R(a,x,B(b,y,c))) z (C d) = R(balrightB a x (C b),y,C(B(c,z,d))) +balright (C t) x b = C (balrightB t x b) + +balrightB :: AddLayer t a -> a -> RR t a -> RL t a +balrightB (B y) = balance (R y) + +class Append t => DelRed t where + delTup :: Ord a => a -> Tr t a -> Red t a + delLeft :: Ord a => a -> t a -> a -> Red t a -> RR t a + delRight :: Ord a => a -> Red t a -> a -> t a -> RR t a + +class Append t => Del t where + del :: Ord a => a -> AddLayer t a -> RR t a + +class (DelRed t, Del t) => Deletion t + +instance DelRed Unit where + delTup z t@(_,x,_) = if x==z then C E else R t + delLeft _ _ y b = R(C E,y,b) + delRight _ a y _ = R(a,y,C E) + +instance Deletion t => DelRed (AddLayer t) where + delTup z (a,x,b) + | zx = balrightB a x (del z b) + | otherwise = app a b + delLeft x a = balleft (del x a) + delRight x a y b = balright a y (del x b) + +instance DelRed t => Del t where + del z (B(a,x,b)) + | zx = delformRight b + | otherwise = appRed a b + where delformLeft(C t) = delLeft z t x b + delformLeft(R t) = R(delTup z t,x,b) + delformRight(C t) = delRight z a x t + delformRight(R t) = R(a,x,delTup z t) + +instance Deletion t => Deletion (AddLayer t) +instance Deletion Unit + +rbdelete :: (Ord a,Deletion t) => a -> RB (AddLayer t) a -> RB t a +rbdelete x (Next t) = Next (rbdelete x t) +rbdelete x (Base t) = blacken2 (del x t) + +blacken2 :: RR t a -> RB t a +blacken2 (C(C t)) = Base t +blacken2 (C(R(a,x,b))) = Next(Base(B(C a,x,C b))) +blacken2 (R p) = Next(Base(B p)) + +delete:: Ord a => a -> Tree a -> Tree a +delete x (Next u) = rbdelete x u +delete _ _ = empty diff --git a/RB3.hs b/RB3.hs new file mode 100644 index 0000000..ec1386a --- /dev/null +++ b/RB3.hs @@ -0,0 +1,204 @@ +{-# LANGUAGE ExistentialQuantification #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RankNTypes #-} + +module RedBlack3 where + +type Tr t a b = (t a b, a, t a b) +data Red t a b = C (t a b) | R (Tr t a b) +data Black a b = E | B (Tr (Red Black) a [b]) + +instance Show a => Show (Black a b) where + showsPrec _ E = ('E':) + showsPrec _ (B (a, x, b)) = ("B(" ++) . showRed a . ("," ++) . shows x . + ("," ++) . showRed b . (")" ++) + +showRed :: forall t a b. (Show (t a b), Show a) + => Red t a b + -> ShowS +showRed (C x) = shows x +showRed (R (a, x, b)) = ("R(" ++) . shows a . ("," ++) . shows x . ("," ++) . + shows b . (")" ++) + +type RR a b = Red (Red Black) a b + +inc :: Black a b + -> Black a [b] +inc = tickB + +{- tickB is the identity, + but it allows us to replace that bogus type variable -} + +tickB :: Black a b + -> Black a c +tickB E = E +tickB (B (a, x, b)) = B (tickR a, x, tickR b) + +tickR :: Red Black a b + -> Red Black a c +tickR (C t) = C (tickB t) +tickR (R (a, x, b)) = R (tickB a, x, tickB b) + +data Tree a = forall b . ENC (Black a b) + +instance Show a => Show (Tree a) + where show (ENC t) = show t + +empty :: Tree a +empty = ENC E + +insert :: Ord a + => a + -> Tree a + -> Tree a +insert x (ENC t) = ENC (blacken (insB x t)) + +blacken :: Red Black a b + -> Black a b +blacken (C u) = u +blacken (R (a, x, b)) = B (C (inc a), x, C (inc b)) + +insB :: Ord a + => a + -> Black a b + -> Red Black a b +insB x E = R (E, x, E) +insB x t@(B (a, y, b)) + | x < y = balanceL (insR x a) y b + | x > y = balanceR a y (insR x b) + | otherwise = C t + +insR :: Ord a + => a + -> Red Black a b + -> RR a b +insR x (C t) = C (insB x t) +insR x t@(R (a, y, b)) + | x < y = R (insB x a, y, C b) + | x > y = R (C a, y, insB x b) + | otherwise = C t + +balanceL :: RR a [b] + -> a + -> Red Black a [b] + -> Red Black a b +balanceL (R (R (a, x, b), y, c)) z d = R (B (C a, x, C b), y, B (c, z, d)) +balanceL (R (a, x, R (b, y, c))) z d = R (B (a, x, C b), y, B (C c, z, d)) +balanceL (R (C a, x, C b)) z d = C (B (R (a, x, b), z, d)) +balanceL (C a) x b = C (B (a, x, b)) + +balanceR :: Red Black a [b] + -> a + -> RR a [b] + -> Red Black a b +balanceR a x (R (R (b, y, c), z, d)) = R (B (a, x, C b), y, B (C c, z, d)) +balanceR a x (R (b, y, R (c, z, d))) = R (B (a, x, b), y, B (C c, z, C d)) +balanceR a x (R (C b, y, C c)) = C (B (a, x, R (b, y, c))) +balanceR a x (C b) = C (B (a, x, b)) + +delete :: Ord a + => a + -> Tree a + -> Tree a +delete x (ENC t) = + case delB x t of + R p -> ENC (B p) + C (R (a, x', b)) -> ENC (B (C a, x', C b)) + C (C q) -> ENC q + +delB :: Ord a + => a + -> Black a b + -> RR a [b] +delB _ E = C (C E) +delB x (B (a, y, b)) + | x < y = delfromL a + | x > y = delfromR b + | otherwise = appendR a b + where delfromL (R t) = R (delT x t, y, b) + delfromL (C E) = R (C E, y, b) + delfromL (C t) = balL (delB x t) y b + delfromR (R t) = R (a, y, delT x t) + delfromR (C E) = R (a, y, C E) + delfromR (C t) = balR a y (delB x t) + +delT :: Ord a + => a + -> Tr Black a b + -> Red Black a b +delT x t@(a,y,b) + | x < y = delfromL a + | x > y = delfromR b + | otherwise = append a b + where delfromL (B _) = balLeB (delB x a) y b + delfromL _ = R t + delfromR (B _) = balRiB a y (delB x b) + delfromR _ = R t + +balLeB :: RR a [b] + -> a + -> Black a b + -> Red Black a b +balLeB bl x (B y) = balance bl x (R y) + +balRiB :: Black a b + -> a + -> RR a [b] + -> Red Black a b +balRiB (B y) = balance (R y) + +balL :: RR a [b] + -> a + -> Red Black a b + -> RR a b +balL (R a) y c = R (C (B a), y, c) +balL (C t) x (R (B (a, y, b), z, c)) = R (C (B (t, x, a)), y, balLeB (C b) z c) +balL b x (C t) = C (balLeB b x t) + +balR :: Red Black a b + -> a + -> RR a [b] + -> RR a b +balR a x (R b) = R (a, x, C (B b)) +balR (R (a, x, B (b, y, c))) z (C d) = R (balRiB a x (C b), y, C (B (c, z, d))) +balR (C t) x b = C (balRiB t x b) + +balance :: RR a [b] + -> a + -> RR a [b] + -> Red Black a b +balance (R a) y (R b) = R (B a, y, B b) +balance (C a) x b = balanceR a x b +balance a x (C b) = balanceL a x b + +append :: Black a b + -> Black a b + -> Red Black a b +append (B (a, x, b)) (B (c, y, d)) = threeformB a x (appendR b c) y d +append _ _ = C E + +threeformB :: Red Black a [b] + -> a + -> RR a [b] + -> a + -> Red Black a [b] + -> Red Black a b +threeformB a x (R (b, y, c)) z d = R (B (a, x, b), y, B (c, z, d)) +threeformB a x (C b) y c = balLeB (C a) x (B (b, y, c)) + +appendR :: Red Black a b + -> Red Black a b + -> RR a b +appendR (C x) (C y) = C (append x y) +appendR (C t) (R (a, x, b)) = R (append t a, x, C b) +appendR (R (a, x, b)) (C t) = R (C a, x, append b t) +appendR (R (a, x, b)) (R (c, y, d)) = threeformR a x (append b c) y d + +threeformR :: Black a b + -> a + -> Red Black a b + -> a + -> Black a b + -> RR a b +threeformR a x (R (b, y, c)) z d = R (R (a, x, b), y, R (c, z, d)) +threeformR a x (C b) y c = R (R (a, x, b), y, C c) diff --git a/RB4.hs b/RB4.hs new file mode 100644 index 0000000..a611939 --- /dev/null +++ b/RB4.hs @@ -0,0 +1,131 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE ExistentialQuantification #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE StandaloneDeriving #-} + +module RedBlackTree where + +data Nat = Zero | Succ Nat deriving (Eq, Ord, Show) +type One = Succ Zero + +data RedBlack = Black | Red deriving (Eq, Ord, Show) + +-- red-black trees are rooted at a black node +data RedBlackTree a = forall n. T (Node Black n a) +deriving instance Show a => Show (RedBlackTree a) + +-- all paths from a node to a leaf have exactly n black nodes +data Node :: RedBlack -> Nat -> * -> * where + -- all leafs are black + Leaf :: Node Black One a + -- internal black nodes can have children of either color + B :: Node cL n a -> a -> Node cR n a -> Node Black (Succ n) a + -- internal red nodes can only have black children + R :: Node Black n a -> a -> Node Black n a -> Node Red n a +deriving instance Show a => Show (Node c n a) + +-- one-hole context for red-black trees +data Context :: Nat -> RedBlack -> Nat -> * -> * where + -- if we're at the root, the hole is a black node + Root :: Context n Black n a + -- we can go left or right from a red node hole, creating a hole for a black node + BC :: Bool -> a -> Node Black n a -> Context m Red n a -> Context m Black n a + -- we can go left or right from a black node hole, creating a hole for either + EC :: Bool -> a -> Node cY n a -> Context m Black (Succ n) a -> Context m cX n a + +data Zipper m a = forall c n. Zipper (Node c n a) (Context m c n a) + +-- create a zipper +unZip :: Node Black n a -> Zipper n a +unZip = flip Zipper Root + +-- destroy a zipper +zipUp :: Zipper m a -> Node Black m a +zipUp (Zipper x Root) = x +zipUp (Zipper x (BC goLeft a y c)) = zipUp $ Zipper (if goLeft then R x a y else R y a x) c +zipUp (Zipper x (EC goLeft a y c)) = zipUp $ Zipper (if goLeft then B x a y else B y a x) c + +-- locate the node that should contain a in the red-black tree +zipTo :: Ord a => a -> Zipper n a -> Zipper n a +zipTo _ z@(Zipper Leaf _) = z +zipTo a z@(Zipper (R l a' r) c) = case compare a a' of + EQ -> z + LT -> zipTo a $ Zipper l (BC True a' r c) + GT -> zipTo a $ Zipper r (BC False a' l c) +zipTo a z@(Zipper (B l a' r) c) = case compare a a' of + EQ -> z + LT -> zipTo a $ Zipper l (EC True a' r c) + GT -> zipTo a $ Zipper r (EC False a' l c) + +-- create a red-black tree +empty :: RedBlackTree a +empty = T Leaf + +-- insert a node into a red-black tree +-- (see http://en.wikipedia.org/wiki/Red%E2%80%93black_tree#Insertion) +insert :: Ord a => a -> RedBlackTree a -> RedBlackTree a +insert a t@(T root) = case zipTo a (unZip root) of + -- find matching leaf and replace with red node (pointing to two leaves) + Zipper Leaf c -> insertAt (R Leaf a Leaf) c + -- if it's already in the tree, there's no need to modify it + _ -> t + +insertAt :: Node Red n a -> Context m c n a -> RedBlackTree a +-- 1) new node is root => paint it black and done +insertAt (R l a r) Root = T $ B l a r +-- 2) new node's parent is black => done +insertAt x (EC b a y c) = T . zipUp $ Zipper x (EC b a y c) +-- 3) uncle is red => paint parent/uncle black, g'parent red. recurse on g'parent +insertAt x (BC pb pa py (EC gb ga (R ul ua ur) gc)) = insertAt g gc + where p = if pb then B x pa py else B py pa x + u = B ul ua ur + g = if gb then R p ga u else R u ga p +-- 4) node is between parent and g'parent => inner rotation +insertAt (R l a r) (BC False pa py pc@(EC True _ _ _)) = insertAt (R py pa l) (BC True a r pc) +insertAt (R l a r) (BC True pa py pc@(EC False _ _ _)) = insertAt (R r pa py) (BC False a l pc) +-- 5) otherwise => outer rotation +-- XXX: GHC seems unable to infer that gy is Black so I have to do both cases +-- explicitly, rather than +-- insertAt x (BC True pa py (EC True ga gy gc)) = +-- T . zipUp $ Zipper (B x pa $ R py ga gy) gc +-- insertAt x (BC False pa py (EC False ga gy gc)) = +-- T . zipUp $ Zipper (B (R gy ga py) pa x) gc +insertAt x (BC True pa py (EC True ga gy@Leaf gc)) = + T . zipUp $ Zipper (B x pa $ R py ga gy) gc +insertAt x (BC True pa py (EC True ga gy@B {} gc)) = + T . zipUp $ Zipper (B x pa $ R py ga gy) gc +insertAt x (BC False pa py (EC False ga gy@Leaf gc)) = + T . zipUp $ Zipper (B (R gy ga py) pa x) gc +insertAt x (BC False pa py (EC False ga gy@B {} gc)) = + T . zipUp $ Zipper (B (R gy ga py) pa x) gc + +-- can't derive, since we abstract over n, so we have to manually +-- check for identical structure +instance Eq a => Eq (RedBlackTree a) where + T Leaf == T Leaf = + True + + T (B l@B {} a r@B {}) == T (B l'@B {} a' r'@B {}) = + a == a' && T l == T l' && T r == T r' + + T (B (R ll la lr) a r@B {}) == T (B (R ll' la' lr') a' r'@B {}) = + a == a' && la == la' && T ll == T ll' && T lr == T lr' && T r == T r' + + T (B l@B {} a (R rl ra rr)) == T (B l'@B {} a' (R rl' ra' rr')) = + a == a' && ra == ra' && T l == T l' && T rl == T rl' && T rr == T rr' + + T (B (R ll la lr) a (R rl ra rr)) == T (B (R ll' la' lr') a' (R rl' ra' rr')) = + a == a' && la == la' && ra == ra' && + T ll == T ll' && T lr == T lr' && T rl == T rl' && T rr == T rr' + _ == _ = + False + +-- can't derive, since B abstracts over child node colors, so +-- manually check for identical structure +instance Eq a => Eq (Node c n a) where + Leaf == Leaf = True + R l a r == R l' a' r' = a == a' && l == l' && r == r' + b@B {} == b'@B {} = T b == T b' + _ == _ = False diff --git a/RandList.hs b/RandList.hs new file mode 100644 index 0000000..8d5eae9 --- /dev/null +++ b/RandList.hs @@ -0,0 +1,85 @@ +module RandList + ( RandList + , empty + , isEmpty + , cons + , head + , tail + , lookup + , update + ) where + +import Prelude hiding (head, lookup, tail) + +data BLT a = Leaf a + | Fork !Int (BLT a) (BLT a) + deriving (Eq, Show) + +data Digit a = Zero + | One (BLT a) + deriving (Eq, Show) + +type RandList a = [Digit a] + +empty :: RandList a +empty = [] + +isEmpty :: RandList a -> Bool +isEmpty = null + +cons :: a -> RandList a -> RandList a +cons x = insTree (Leaf x) + +head :: RandList a -> a +head ts = x where (Leaf x, _) = borrowTree ts + +tail :: RandList a -> RandList a +tail ts = ts' where (_, ts') = borrowTree ts + +lookup :: RandList a -> Int -> a +lookup [] _ = error "Bad Index" +lookup (Zero : ts) i = lookup ts i +lookup (One t : ts) i + | i < size t = lookupTree t i + | otherwise = lookup ts (i - size t) + +update :: RandList a -> Int -> a -> RandList a +update [] _ _ = error "Bad Index" +update (Zero : ts) i y = Zero : update ts i y +update (One t : ts) i y + | i < size t = One (updateTree t i y) : ts + | otherwise = One t : update ts (i - size t) y + +-- Utility functions + +size :: BLT a -> Int +size (Leaf _) = 1 +size (Fork n _ _ ) = n + +link :: BLT a -> BLT a -> BLT a +link t1 t2 = Fork (size t1 + size t2) t1 t2 + +insTree :: BLT a -> RandList a -> RandList a +insTree t [] = [One t] +insTree t (Zero : ts) = One t : ts +insTree t1 (One t2 : ts) = Zero : insTree (link t1 t2) ts + +borrowTree :: RandList a -> (BLT a, RandList a) +borrowTree [One t] = (t, []) +borrowTree (One t : ts) = (t, Zero : ts) +borrowTree (Zero : ts) = (t1, One t2 : ts') + where (Fork _ t1 t2, ts') = borrowTree ts + +lookupTree :: BLT a -> Int -> a +lookupTree (Leaf x) 0 = x +lookupTree (Leaf _) _ = error "Bad Index" +lookupTree (Fork n t1 t2) i + | i < (n `div` 2) = lookupTree t1 i + | otherwise = lookupTree t2 (i - n `div` 2) + +updateTree :: BLT a -> Int -> a -> BLT a +updateTree (Leaf _) 0 y = Leaf y +updateTree (Leaf _) _ _ = error "Bad Index" +updateTree (Fork n t1 t2) i y + | i < (n `div` 2) = Fork n (updateTree t1 i y) t2 + | otherwise = Fork n t1 (updateTree t2 (i - n `div` 2) y) diff --git a/SkewNat.hs b/SkewNat.hs new file mode 100644 index 0000000..6ec9119 --- /dev/null +++ b/SkewNat.hs @@ -0,0 +1,13 @@ +module SkewNat where + +type SkewNat = [Int] -- increasing list of powers of 2^{k+1}-1 + +inc :: SkewNat -> SkewNat +inc ws@(w1 : w2 : rest) + | w1 == w2 = 1 + w1 + w2 : rest + | otherwise = 1 : ws +inc ws = 1 : ws + +dec :: SkewNat -> SkewNat +dec (1 : ws) = ws +dec (w : ws) = (w `div` 2) : (w `div` 2) : ws diff --git a/SkewRandList.hs b/SkewRandList.hs new file mode 100644 index 0000000..1c661a4 --- /dev/null +++ b/SkewRandList.hs @@ -0,0 +1,61 @@ +module SkewRandList + ( RList + , cons + , head + , tail + , lookup + , update ) where + +import Prelude hiding (head, lookup, tail) + +data BTree a = Leaf a + | Node a (BTree a) (BTree a) + deriving (Show, Eq) + +data Digit a = Digit !Int (BTree a) + deriving (Show, Eq) + +type RList a = [Digit a] + +cons :: a -> RList a -> RList a +cons x (Digit w1 t1 : Digit w2 t2 : ts') + | w1 == w2 = Digit (1 + w1 + w2) (Node x t1 t2) : ts' +cons x ts = Digit 1 (Leaf x) : ts + +head :: RList a -> a +head (Digit 1 (Leaf x) : _) = x +head (Digit _ (Node x _ _) : _) = x + +tail :: RList a -> RList a +tail (Digit 1 (Leaf _) : ts) = ts +tail (Digit w (Node _ t1 t2) : ts) = + Digit w' t1 : Digit w' t2 : ts + where w' = w `div` 2 + +lookup :: RList a -> Int -> a +lookup (Digit w t : ts) i + | i < w = lookupTree w t i + | otherwise = lookup ts (i - w) + +update :: RList a -> Int -> a -> RList a +update (Digit w t : ts) i y + | i < w = Digit w (updateTree w t i y) : ts + | otherwise = Digit w t : update ts (i-w) y + +lookupTree :: Int -> BTree a -> Int -> a +lookupTree 1 (Leaf x) 0 = x +lookupTree _ (Node x _ _) 0 = x +lookupTree w (Node _ t1 t2) i + | i < w' = lookupTree w' t1 (i-1) + | otherwise = lookupTree w' t2 (i-1-w') + where w' = w `div` 2 + +updateTree :: Int -> BTree a -> Int -> a -> BTree a +updateTree 1 (Leaf _) 0 y = Leaf y +updateTree _ (Node _ t1 t2) 0 y = Node y t1 t2 +updateTree w (Node x t1 t2) i y + | i < (w`div`2) = + Node x (updateTree w' t1 (i-1) y) t2 + | otherwise = + Node x t1 (updateTree w' t2 (i-1-w') y) + where w' = w `div` 2 diff --git a/SparseNat.hs b/SparseNat.hs new file mode 100644 index 0000000..9d1f10f --- /dev/null +++ b/SparseNat.hs @@ -0,0 +1,28 @@ +module SparseNat where + +type SparseNat = [Int] -- increasing list of powers of 2 + +carry :: Int -> SparseNat -> SparseNat +carry w [] = [w] +carry w ws@(w' : rest) + | w < w' = w : ws + | otherwise = carry (2 * w) rest + +borrow :: Int -> SparseNat -> SparseNat +borrow w ws@(w' : rest) + | w < w' = rest + | otherwise = w : borrow (2 * w) ws + +inc :: SparseNat -> SparseNat +inc = carry 1 + +dec :: SparseNat -> SparseNat +dec = borrow 1 + +add :: SparseNat -> SparseNat -> SparseNat +add ws [] = ws +add [] ws = ws +add m@(w1 : ws1) n@(w2 : ws2) + | w1 < w2 = w1 : add ws1 n + | w2 < w1 = w2 : add m ws2 + | otherwise = carry (2 * w1) (add ws1 ws2) diff --git a/StrictList.hs b/StrictList.hs new file mode 100644 index 0000000..c89a935 --- /dev/null +++ b/StrictList.hs @@ -0,0 +1,30 @@ +module StrictList ( + StrictList(..) + , foldl + , reverse + , head + , tail + ) where + +import Prelude hiding (foldl, head, reverse, tail) + +data StrictList a = !a :$ !(StrictList a) | Empty + deriving (Show, Eq) + +infixr 5 :$ + +foldl :: (b -> a -> b) -> b -> StrictList a -> b +foldl f = lgo + where lgo z Empty = z + lgo z (x :$ xs) = let z' = f z x in z' `seq` lgo z' xs + +reverse :: StrictList a -> StrictList a +reverse = foldl (flip (:$)) Empty + +head :: StrictList a -> a +head (x :$ _ ) = x +head Empty = error "Empty StrictList has no head" + +tail :: StrictList a -> StrictList a +tail (_ :$ xs) = xs +tail Empty = error "Empty StrictList has no tail" diff --git a/Zipper.hs b/Zipper.hs new file mode 100644 index 0000000..a1a3ddf --- /dev/null +++ b/Zipper.hs @@ -0,0 +1,51 @@ +module Zipper where + +data Tree a = Item a + | Section [Tree a] + deriving (Show, Eq) + +data Path a = Top + | Node [Tree a] (Path a) [Tree a] + deriving (Show, Eq) + +data Location a = Loc (Tree a) (Path a) + deriving (Show, Eq) + +goLeft :: Location a -> Location a +goLeft (Loc _ Top) = error "Can't go left of top" +goLeft (Loc t (Node (l:left) up right)) = Loc l (Node left up (t:right)) +goLeft (Loc _ (Node [] _ _)) = error "Can't go left of first" + +goRight :: Location a -> Location a +goRight (Loc _ Top) = error "Can't go right of top" +goRight (Loc t (Node left up (r:right))) = Loc r (Node (t:left) up right) +goRight _ = error "Can't go right of last" + +goUp :: Location a -> Location a +goUp (Loc _ Top) = error "Can't go up from top" +goUp (Loc t (Node left up right)) = Loc (Section (reverse left ++ t:right)) up + +goDown :: Location a -> Location a +goDown (Loc (Item _) _) = error "Can't go down from item" +goDown (Loc (Section (t1:trees)) p) = Loc t1 (Node [] p trees) + +nth :: Location a -> Int -> Location a +nth loc = go + where go 1 = goDown loc + go n | n > 0 = goRight (go (n-1)) + | otherwise = error "nth expects a positive int" + +replace :: Location a -> Tree a -> Location a +replace (Loc _ p) t = Loc t p + +insertRight :: Location a -> Tree a -> Location a +insertRight (Loc _ Top) _ = error "Can't insert right of top" +insertRight (Loc t (Node left up right)) r = Loc t (Node left up (r:right)) + +insertLeft :: Location a -> Tree a -> Location a +insertLeft (Loc _ Top) _ = error "Can't insert left of top" +insertLeft (Loc t (Node left up right)) l = Loc t (Node (l:left) up right) + +insertDown :: Location a -> Tree a -> Location a +insertDown (Loc (Item _) _) _ = error "Can't insert down of an item" +insertDown (Loc (Section sons) p) t1 = Loc t1 (Node [] p sons)