From 2d9121e663848e1609ce7217c02d5b78d9b80b3b Mon Sep 17 00:00:00 2001 From: Levi Pearson Date: Tue, 10 Jun 2014 22:58:26 -0600 Subject: [PATCH] Data structure slides and source code --- DenseNat.hs | 22 + HAMT.hs | 97 +++++ HON.hs | 216 ++++++++++ Nested.hs | 31 ++ Presn.md | 1031 +++++++++++++++++++++++++++++++++++++++++++++++ Queue1.hs | 22 + Queue2.hs | 33 ++ Queue2.txt | 55 +++ Queue3.hs | 1 + RB1.hs | 81 ++++ RB2.hs | 179 ++++++++ RB3.hs | 204 ++++++++++ RB4.hs | 131 ++++++ RandList.hs | 85 ++++ SkewNat.hs | 13 + SkewRandList.hs | 61 +++ SparseNat.hs | 28 ++ StrictList.hs | 30 ++ Zipper.hs | 51 +++ 19 files changed, 2371 insertions(+) create mode 100644 DenseNat.hs create mode 100644 HAMT.hs create mode 100644 HON.hs create mode 100644 Nested.hs create mode 100644 Presn.md create mode 100644 Queue1.hs create mode 100644 Queue2.hs create mode 100644 Queue2.txt create mode 100644 Queue3.hs create mode 100644 RB1.hs create mode 100644 RB2.hs create mode 100644 RB3.hs create mode 100644 RB4.hs create mode 100644 RandList.hs create mode 100644 SkewNat.hs create mode 100644 SkewRandList.hs create mode 100644 SparseNat.hs create mode 100644 StrictList.hs create mode 100644 Zipper.hs diff --git a/DenseNat.hs b/DenseNat.hs new file mode 100644 index 0000000..fe48e91 --- /dev/null +++ b/DenseNat.hs @@ -0,0 +1,22 @@ +module DenseNat where + +data Digit = Zero | One deriving (Show, Ord, Eq, Enum) + +type DenseNat = [Digit] -- increasing order of significance + +inc :: DenseNat -> DenseNat +inc [] = [One] +inc (Zero : ds) = One : ds +inc (One : ds) = Zero : inc ds -- carry + +dec :: DenseNat -> DenseNat +dec [One] = [] +dec (One : ds) = Zero : ds +dec (Zero : ds) = One : dec ds -- borrow + +add :: DenseNat -> DenseNat -> DenseNat +add ds [] = ds +add [] ds = ds +add (d : ds1) (Zero : ds2) = d : add ds1 ds2 +add (Zero : ds1) (d : ds2) = d : add ds1 ds2 +add (One : ds1) (One : ds2) = Zero : inc (add ds1 ds2) -- carry diff --git a/HAMT.hs b/HAMT.hs new file mode 100644 index 0000000..4d3eab3 --- /dev/null +++ b/HAMT.hs @@ -0,0 +1,97 @@ +module HAMT where + +import Data.Bits +import Data.Word +import Prelude hiding (lookup, (++)) + +import Data.Vector (Vector, singleton, unsafeDrop, unsafeIndex, + unsafeTake, unsafeUpdate, (++)) + +type Key = Word +type Bitmap = Word +type Shift = Int +type Subkey = Int -- we need to use this to do shifts, so an Int it is + +data HAMT a = Empty + | BitmapIndexed {-# UNPACK #-} !Bitmap !(Vector (HAMT a)) + | Leaf {-# UNPACK #-} !Key a + | Full {-# UNPACK #-} !(Vector (HAMT a)) + deriving (Show) + + +-- These architecture dependent constants + +bitsPerSubkey :: Int +bitsPerSubkey = floor . logBase 2 . fromIntegral . bitSize $ (undefined :: Word) + +subkeyMask :: Bitmap +subkeyMask = 1 `shiftL` bitsPerSubkey - 1 + +maskIndex :: Bitmap -> Bitmap -> Int +maskIndex b m = popCount (b .&. (m - 1)) + +mask :: Key -> Shift -> Bitmap +mask k s = shiftL 1 (subkey k s) + +{-# INLINE subkey #-} +subkey :: Key -> Shift -> Int +subkey k s = fromIntegral $ shiftR k s .&. subkeyMask + +empty :: HAMT a +empty = Empty + +lookup :: Key -> HAMT a -> Maybe a +lookup k t = lookup' k 0 t + +lookup' :: Key -> Shift -> HAMT a -> Maybe a +lookup' k s t + = case t of + Empty -> Nothing + Leaf kx x + | k == kx -> Just x + | otherwise -> Nothing + BitmapIndexed b v -> + let m = mask k s in + if b .&. m == 0 + then Nothing + else lookup' k (s+bitsPerSubkey) (unsafeIndex v (maskIndex b m)) + Full v -> lookup' k (s+bitsPerSubkey) (unsafeIndex v (subkey k s)) + +insert :: Key -> a -> HAMT a -> HAMT a +insert k v t = insert' k 0 v t + +insert' :: Key -> Shift -> a -> HAMT a -> HAMT a +insert' kx s x t + = case t of + Empty -> Leaf kx x + Leaf ky y + | ky == kx -> Leaf kx x + | otherwise -> + insert' kx s x $ BitmapIndexed (mask ky s) (singleton t) + BitmapIndexed b v -> {-# SCC "i-Bitmap" #-} + let m = mask kx s + i = maskIndex b m in + if b .&. m == 0 + then let l = Leaf kx x + v' = unsafeTake i v ++ singleton l ++ unsafeDrop i v + b' = b .|. m in + if b' == 0xFFFFFFFF + then Full v' + else BitmapIndexed b' v' + else {-# SCC "i-Bitmap-conflict" #-} + let st = unsafeIndex v i + st' = insert' kx (s+bitsPerSubkey) x st + v' = {-# SCC "i-Bitmap-update" #-} + unsafeUpdate v (singleton (i, st')) + in BitmapIndexed b v' + Full v -> {-# SCC "i-Full" #-} + let i = subkey kx s + st = unsafeIndex v i + st' = insert' kx (s+bitsPerSubkey) x st + v' = {-# SCC "i-Full-update" #-} + unsafeUpdate v (singleton (i, st')) + in Full v' + +-- lazy, but that's not necessarily a bad thing +fromList :: [(Key, a)] -> HAMT a +fromList = foldl (flip $ uncurry insert) empty diff --git a/HON.hs b/HON.hs new file mode 100644 index 0000000..317b273 --- /dev/null +++ b/HON.hs @@ -0,0 +1,216 @@ +module HigherOrderNested where + +{- Start with a standard data type +data Nat = Zero + | Succ Nat + deriving (Eq, Show) +-} + +-- Decompose it into separate types +data Zero = NZero + deriving (Eq, Show) + +data Succ nat = Succ nat + deriving (Eq, Show) + +-- And define a nested combining type +data Nats nat = NNil + | NCons nat (Nats (Succ nat)) + deriving (Eq, Show) + +{- Some examples of instances of this type: +NCons NZero NNil +NCons NZero (NCons (Succ NZero) NNil) +NCons NZero (NCons (Succ NZero) (NCons (Succ (Succ NZero)) NNil)) +-} + +{- The Stack type, analogous to Nat +data Stack a = Empty + | Push a (Stack a) + deriving (Eq, Show) +-} + +-- Decompose to separate types +data Empty a = Empty + deriving (Eq, Show) + +data Push stack a = Push a (stack a) + deriving (Eq, Show) + +-- Define a nested combining type +data Stacks stack a = SNil + | SCons (stack a) (Stacks (Push stack) a) + deriving (Eq, Show) + +{- Some examples: +SCons Empty SNil +SCons Empty (SCons (Push 'a' Empty) SNil) +SCons Empty (SCons (Push 'a' Empty) (SCons (Push 'a' (Push 'b' Empty)) SNil)) +-} + +{- Perfect Binary Leaf Trees +data Bush a = Leaf a + | Fork (Bush a) (Bush a) + deriving (Eq, Show) +-} + +-- Decomposed Perfect Binary Leaf Trees +data Leaf a = Leaf a + deriving (Eq, Show) +data Fork bush a = Fork (bush a) (bush a) + deriving (Eq, Show) + +{- Uses a zeroless 1-2 representation to eliminate overhead of numerical + nodes with no data. + + Counting: 1, 2, 11, 21, 12, 22, 111, 211, 121, 221, 112, 212, 122, 222 + + Note that only the first One constructor can directly store the + Leaf type; all others will use Fork^n Leaf, which guarantees that + our "Perfect" invariant holds. + + Each recursive instantiation of the type occurs with another level + of Fork wrapping the bush type; One uses that level directly, while + Two adds an extra Fork level to it. +-} +data RandomAccessList bush a + = Nil + | One (bush a) (RandomAccessList (Fork bush) a) + | Two (Fork bush a) (RandomAccessList (Fork bush) a) + deriving (Eq, Show) + +type IxSequence = RandomAccessList Leaf + +-- Consing to the front increments the number with a Leaf as data +cons :: a -> IxSequence a -> IxSequence a +cons a = incr (Leaf a) + +incr :: bush a + -> RandomAccessList bush a + -> RandomAccessList bush a +incr b Nil = One b Nil -- inc 0 = 1 +incr b1 (One b2 ds) = Two (Fork b1 b2) ds -- inc 1.. = 2.. +incr b1 (Two b2 ds) = One b1 (incr b2 ds) -- inc 2d.. = 1(inc d).. + +-- This is used to eliminate leading zeros +zero :: RandomAccessList (Fork bush) a + -> RandomAccessList bush a +zero Nil = Nil -- 0 = 0 +zero (One b ds) = Two b (zero ds) -- 01.. = 20. +zero (Two (Fork b1 b2) ds) = Two b1 (One b2 ds) -- 02.. = 21. + +-- Removing from the front decrements the number and removes a Leaf +front :: IxSequence a -> (a, IxSequence a) +front Nil = error "IxSequence empty" +front (One (Leaf a) ds) = (a, zero ds) -- dec 1.. = 0.. +front (Two (Fork (Leaf a) b) ts) = (a, One b ts) -- dec 2.. = 1.. + +fromList :: [a] -> IxSequence a +fromList = foldr cons Nil + +{- A recursive (top-down) solution +unleaf :: Leaf a -> [a] +unleaf (Leaf a) = [a] + +unfork :: (bush a -> [a]) -> Fork bush a -> [a] +unfork flatten (Fork l r) = flatten l ++ flatten r + +listify :: (bush a -> [a]) -> RandomAccessList bush a -> [a] +listify _ Nil = [] +listify flatten (One b ds) = flatten b ++ listify (unfork flatten) ds +listify flatten (Two b ds) = unfork flatten b ++ listify (unfork flatten) ds + +toList :: IxSequence a -> [a] +toList = listify unleaf +-} + +{- A recursive (top-down) solution with Type Classes +class Flatten bush where + flatten :: bush a -> [a] + +instance Flatten Leaf where + flatten (Leaf a) = [a] + +instance (Flatten bush) => Flatten (Fork bush) where + flatten (Fork l r) = flatten l ++ flatten r + +listify :: (Flatten bush) => RandomAccessList bush a -> [a] +listify Nil = [] +listify (One b ds) = flatten b ++ listify ds +listify (Two b ds) = flatten b ++ listify ds + +toList :: IxSequence a -> [a] +toList = listify +-} + +{- An iterative (bottom-up) solution; linear time bound -} +listify :: RandomAccessList bush a -> [bush a] +listify Nil = [] +listify (One b ds) = b : unforks (listify ds) +listify (Two b ds) = unforks (b : listify ds) + +unforks :: [Fork bush a] -> [bush a] +unforks [] = [] +unforks (Fork b1 b2 : ts) = b1 : b2 : unforks ts + +toList :: IxSequence a -> [a] +toList s = [a | Leaf a <- listify s] + +{- Examples: + +> fromList[1..5] +One (Leaf 1) + (Two (Fork (Fork (Leaf 2) (Leaf 3)) + (Fork (Leaf 4) (Leaf 5))) + Nil) + +12 = 1*1 + 2*2 = 1 + 4 = 5 + +> fromList [1..20] +Two (Fork (Leaf 1) (Leaf 2)) -- 0, 1 + (One (Fork (Leaf 3) (Leaf 4)) -- 2, 3 + (Two (Fork (Fork (Fork (Leaf 5) (Leaf 6)) -- 4, 11 + (Fork (Leaf 7) (Leaf 8))) + (Fork (Fork (Leaf 9) (Leaf 10)) + (Fork (Leaf 11) (Leaf 12)))) + (One (Fork (Fork (Fork (Leaf 13) (Leaf 14)) -- 12, 19 + (Fork (Leaf 15) (Leaf 16))) + (Fork (Fork (Leaf 17) (Leaf 18)) + (Fork (Leaf 19) (Leaf 20)))) + Nil))) + +2121 = 2*1 + 1*2 + 2*4 + 1*8 = 2 + 2 + 8 + 8 = 20 + +tree size at each rank is d*2^r + +-} + +{- +Searching: + +lookup 0 (One (Leaf x) _) = x +lookup 0 (One (Fork t1 t2) ds) = lookup 0 (One ) + +-} + +class FindB bush where + findB :: Int -> Int -> bush a -> a +instance FindB Leaf where + findB _ _ (Leaf x) = x +instance (FindB bush) => FindB (Fork bush) where + findB i s (Fork lb rb) + | i < s`div`2 = findB i (s`div`2) lb + | otherwise = findB i (s`div`2) rb + +find :: Int -> IxSequence a -> a +find = find' 0 + +find' :: (FindB bush) => Int -> Int + -> RandomAccessList bush a -> a +find' _ _ Nil = error "Not found" +find' r i (One b ds) + | i < 2^r = findB i (2^r) b + | otherwise = find' (i-2^r) (r+1) ds +find' r i (Two b ds) + | i < 2*2^r = findB i (2*2^r) b + | otherwise = find' (i-2*2^r) (r+1) ds diff --git a/Nested.hs b/Nested.hs new file mode 100644 index 0000000..dcde164 --- /dev/null +++ b/Nested.hs @@ -0,0 +1,31 @@ +module Nested where + +data Fork t = Fork t t + deriving (Show, Eq) + +data RandList t = Nil + | Zero (RandList (Fork t)) + | One t (RandList (Fork t)) + deriving (Show, Eq) + +cons :: a -> RandList a -> RandList a +cons x Nil = One x Nil +cons x1 (Zero ds) = One x1 ds +cons x1 (One x2 ds) = Zero (cons (Fork x1 x2) ds) + +front :: RandList a -> (a, RandList a) +front Nil = error "Empty RandList" +front (One x Nil) = (x, Nil) +front (One x ds) = (x, Zero ds) +front (Zero ds) = (x1, One x2 ds') + where (Fork x1 x2, ds') = front ds + +find :: Int -> RandList a -> a +find 0 Nil = error "Not found" +find 0 (One x _) = x +find i (One _ ds) = find (i-1) (Zero ds) +find i (Zero ds) = if i`mod`2 == 0 then x else y + where (Fork x y) = find (i`div`2) ds + +fromList :: [a] -> RandList a +fromList = foldr cons Nil diff --git a/Presn.md b/Presn.md new file mode 100644 index 0000000..6e1a866 --- /dev/null +++ b/Presn.md @@ -0,0 +1,1031 @@ +--- +title: Functional Data Structures +subtitle: Analysis and Implementation +author: Levi Pearson +... + +# Intro + +### Outline + +1. Basics of Functional Data Structures + * Immutability + * Persistent vs. Ephemeral + * Node Structure + * Data Sharing + +2. Analysis Techniques + * Amortization Overview + * Amortization with Persistence + +3. Design Techniques + * Lazy Rebuilding and Scheduling + * Numerical Representations + * Non-Uniform Structure + +4. Real-World Examples + * Zippers + * 2-3 Finger Trees + * Hash Array Mapped Tries + +### Motivation + +Functional data structures provide: + +* Relatively simple implementations +* Good worst-case performance bounds +* Persistence for free +* Ease of use in concurrent programs + +## Basics + +### Functional Data Structures? + +What do we mean by "Functional Data Structure"? + +* Built from immutable values +* Can be implemented in a purely functional language +* No in-place updates! + +Obviously not the data structures taught in Intro to Algorithms class... + +### Persistence + +Most imperative structures are *ephemeral*; updates destroy previous versions + +A data structure is *persistent* when old versions remain after updates + +* Developed in the context of imperative structures +* Imperative versions of persistent structures can be quite complex +* Adding persistence can change performance bounds +* Purely functional structures are automatically persistent! + +### Value of Persistence + +Why would we want persistence anyway? + +* Safe data sharing +* No locking required +* Restoration of previous states +* Interesting algorithms (e.g. in computational geometry) + +### Node-Based Structure + +Functional data structures share structural characteristics: + +* Built from *nodes* and *references* +* Nodes contain references: + * one or more references to other nodes + * one or more references to values +* A structure may be built from one or more *kinds* of node + +Examples: + +~~~ {.haskell} +data List a = Nil + | Cons a (List a) + +data Tree a = Leaf a + | Branch (Tree a) (Tree a) +~~~ + +### Data Sharing + +The fine-grained node structure has some benefits: + +* New versions of structures share the bulk of their data with old versions +* Copying on updates is limited to *nodes* rather than *values* +* Only the nodes on the path from the root to change point are copied. + +Also some drawbacks: + +* Extra overhead for references vs. in-place storage +* Locality of data can suffer + +# Analysis + +## Amortization + +### Structures Enable Algorithms + +* Data structures support *operations* on them +* Operations are implemented via *algorithms* +* Algorithms perform operations + +We bottom out at two primitive operations: + +* Construction of an ADT value +* Destructuring/Pattern Matching of an ADT value + +### Algorithm comparison + +We judge between algorithms based on analysis: + +* How many primitive operations are needed to complete the algorithm? +* How does the number of operations scale as the problem size grows? + +*Asymptotic analysis* gives an upper bound to the growth of operation + count as the size grows to infinity. + +### Worst Case + +Typical analysis is *worst-case*: + +* Find a function such that some constant times that function of the + input size is *always* greater than the actual count of operations + for any single operation. +* That function is a worst-case upper bound. +* The smallest upper bound is how we characterize the asymptotic + worst-case behavior. + +### Average Case + +Sometimes we consider *average-case*: + +* Find the upper bounds of all possible single operation cases. +* Model the statistical likelihood of the cases. +* Find the smallest upper bound of the weighted combination of cases. + +### Amortized + +An *amortized* analysis is different: + +* Find the upper bounds of all possible single operation cases. +* Consider the possible *sequences* of operations. +* Show that *expensive* operations always run infrequently enough to + distribute their cost among *cheap* operations + +### When To Amortize? + +A **worst-case** analysis is necessary when you have hard latency bounds, +e.g. real-time requirements. + +**Average** or **amortized** analysis can give a better sense of +*expected throughput* of an algorithm. + +**Amortized** analysis is useful when most operations are cheap, but +occasionally an expensive one must be performed. + +Examples: + +* Array doubling +* Rehashing +* Tree balancing + +Developed in the context of *ephemeral* data structures. + +### The Persistence Wrench + +An ephemeral structure has a single logical future: Once an operation +is performed, there's no going back. + +The amortization accounting process is straightforward: + +* Assign an extra *credit* cost to cheap cases. +* Prove that an expensive case only occurs after sufficient credit + has accumulated to pay for it. + +Persistent structures may have *multiple* logical futures. + +Consider a tree very close to needing an expensive rebalancing: + +~~~ {.Haskell} +-- tr1 refers to the teetering tree +let tr2 = insert foo tr1 -- causes a rebalance: expensive +let tr3 = insert foo tr1 -- causes another rebalance +let tr4 = insert foo tr1 -- causes yet another rebalance +~~~ + +How to account for that? You can't spend your savings more than once. + +## Functional Amortization + +### Tackling Persistence with Laziness + +Chris Okasaki discovered a way around the multiple-future problem: + +* Delay expensive operations via lazy evaluation +* Account for expensive operations via *debt* +* Savings can only be spent once, but over-paying on debt is ok + +This works because: + +* A suspended computation places a *thunk* in the new structure +* When a thunk is *forced*, it performs its calculation +* The value replaces the thunk *in place* + +The previous example, with lazy evaluation: + +~~~ {.Haskell} +-- tr1 refers to the teetering tree +let tr2 = insert foo tr1 -- creates a rebalancing thunk: cheap +let tr3 = insert bar tr2 -- forces rebalancing: expensive +let tr4 = insert baz tr2 -- sees previous rebalancing +~~~ + +### Outline of Amortized Lazy Analysis + +Like a layaway plan: + +1. Find a computation you can't afford to execute +2. Create a suspension for it, assign a value proportional to its shared cost +3. Pay the debt off a little at a time +4. When the debt is paid off, the suspension may be executed + +Consider each logical future as if it were the *only one*. + +Each future must fully pay before invoking any suspension. + +### Restoring Worst-Case Bounds + +Sometimes the latency budget requires a tight worst-case bound on all +operations. + +Worst-case bounds can be restored via *scheduling* + +* Execute suspended computations in bounded steps +* Execute the steps as they are paid off + +This has a small space and complexity overhead, but bounds the latency. + +### Using Analysis Results + +Sure that sounds like *fun*, but what's it good for? + +Different operations over structures (e.g. head-insert, tail-insert, +search) are necessary to implement higher-level algorithms. + +Choose the data structure that: + +* Provides good bounds for the operations your algorithm needs +* Provides the latency guarantees you need + +Other factors as well, of course + +# Implementation Patterns + +## Lazy Rebuilding + +### Queues, Take 1 + +The essence of the Queue: + +* `insert` adds an element at the end of the queue +* `remove` takes an element from the front of the queue + +How to represent it? What about a List? + +* Our `remove` operation is $O \left({1}\right)$ +* But `insert` is $O \left({n}\right)$! + +The standard trick: A pair of lists, `left` and `right`. + +* The `left` list holds the head of the queue +* The `right` list holds the tail, stored in reversed order + +When `left` becomes empty, reverse `right` and put it in `left`. + +### Queue Implementation + +~~~ {.Haskell} +module Queue1 where +import Prelude hiding (reverse, length) +import StrictList (head, tail, reverse) +data Queue a = Q { left :: StrictList a, leftLen :: !Int + , right :: StrictList a, rightLen :: !Int + } deriving (Show, Eq) +empty :: Queue a +empty = Q Empty 0 Empty 0 +length :: Queue a -> Int +length (Q _ ll _ rl) = ll + rl +insert :: a -> Queue a -> Queue a +insert e (Q l ll r rl) = Q l ll (e :$ r) (rl + 1) +remove :: Queue a -> (a, Queue a) +remove (Q Empty _ r rl) = remove (Q (reverse r) rl Empty 0) +remove (Q l ll r rl) = (head l, Q (tail l) (ll - 1) r rl) +~~~ + +### Queue Analysis + +Most operations are trivial: + +* `empty`: $O \left({1}\right)$ +* `insert`: $O \left({1}\right)$ +* `length`: $O \left({1}\right)$ + +`remove` will take some thought + +~~~ {.Haskell} +remove :: Queue a -> (a, Queue a) +remove (Q Empty _ r rl) = remove (Q (reverse r) rl Empty 0) +remove (Q l ll r rl) = (head l, Q (tail l) (ll - 1) r rl) +~~~ + +* When `left` is non-empty, `remove` takes $O \left({1}\right)$ +* But when empty, it takes $O \left({n}\right)$ due to the `reverse` + +### Amortizing the Queue + +* The `reverse` operation takes place only when `left` is empty + +* `left` is empty when: + * The entire queue is empty + * `n` values have been `insert`ed without a `remove` + * All previously reversed values have been `remove`d. + +* A `remove` after `n` `inserts` will take `n` operations. + +* Those `n` values must be `remove`d before another `reverse` can + occur. + +* So, assigning extra credit to each `insert` should offset the linear + cost of the `reverse`... + +### Not So Fast + +~~~ {.Haskell} +let q1 = foldr insert empty "Abracadabra" +let q2 = remove q1 -- Causes a reverse +let q3 = remove q1 -- Causes the same reverse +let q4 = remove q1 -- And again... +~~~ + +Oops. Persistence screws up our amortization scheme. + +Time to make it lazy. + +### Queue Take 2 - Paired Lazy Lists + +Goal: Reverse `right` *incrementally* before `left` becomes empty. + +Means: Periodically append `reverse right` to `left` using *guarded recursion*. + +~~~ {.Haskell} +-- rot l r [] = l ++ reverse r +rot [] r a = head r : a +rot l r a = head l : rot (tail l) (tail r) (head r : a) +~~~ + +This evaluates up to the (`:`) operation per call to `head` on the +result. + +### Queue Take 2 - Paired Lazy Lists (cont.) + +Forcing a tail: + +* performs one step of the `++` and one step of the `reverse`. +* may also force another tail; but only $\log n$ times. + +So worst case improves from $O\left({n}\right)$ to $O\left({\log n}\right)$. + +If we never let `right` be longer than `left`, by the time `rotate` +steps through the original `left`, the `reverse` will be complete. + +Amortized cost is therefore $O \left({1}\right)$ again, and safe for +persistence. + +### Full Implementation + +~~~ {.Haskell} +module Queue2 where +import Prelude hiding (length) +data Queue a = Q { left :: [a], leftLen :: !Int + , right :: [a], rightLen :: !Int + } deriving (Show, Eq) +empty :: Queue a +empty = Q [] 0 [] 0 +length :: Queue a -> Int +length (Q _ ll _ rl) = ll + rl +insert :: a -> Queue a -> Queue a +insert e (Q l ll r rl) = makeQ (Q l ll (e : r) (rl + 1)) +remove :: Queue a -> (a, Queue a) +remove (Q l ll r rl) = (head l, makeQ (Q (tail l) (ll-1) r rl)) +makeQ :: Queue a -> Queue a -- Preserve invariant: |R| <= |L| +makeQ q@(Q l ll r rl) + | rl <= ll = q + | otherwise = Q (rot l r []) (ll + rl) [] 0 +rot :: [a] -> [a] -> [a] -> [a] +rot [] r a = head r : a +rot l r a = head l : rot (tail l) (tail r) (head r : a) +~~~ + +## Numerical Representations + +### Arithmetic + +> Many people regard arithmetic as a trivial thing that children learn +> and computers do, but we will see that arithmetic is a fascinating +> topic with many interesting facets. It is important to make a +> thorough study of efficient methods for calculating with numbers, +> since arithmetic underlies so many computer applications. + +Donald E. Knuth, from "The Art of Computer Programming" + +### Nat and List + +Inductive definition of Naturals: + +~~~ {.Haskell} +data Nat = Zero + | Succ Nat +~~~ + +Inductive definition of Lists: + +~~~ {.Haskell} +data List a = Nil + | Cons a (List a) +~~~ + +Functions on `Nat` and `List` are analogous too: + +* `inc` of a `Nat` is like `cons` on a `List` +* `dec` of a `Nat` is like removing `head` of a `List` +* adding two `Nat`s is like combining two `List`s + +Lists are *numbers* that contain *values*. + +`Nat` and `List` implement a *unary* number system. + +### Positional Numbers - Definitions + +A *positional number* is written as a series of digits: +$$ +b_0 \dots b_{m-1} +$$ +The digit $b_0$ is the *least significant digit* + +The digit $b_{m-1}$ is the *most significant digit* + +Each digit $b_i$ has a *weight* $w_i$, so the value is: +$$ +\sum\limits_{i=0}^{m-1} b_i w_i +$$ +A number is *base* $B$ if $w_i=B^i$ and $D_i=\left\{0,\dots,B-1\right\}$. + +Usually weights are increasing sequences of powers and $D_i$ is the +same for every digit. + +### Redundancy + +A number system is *redundant* if there is more than one way to +represent some numbers. + +Take the system where $w_i = 2^i$ and $D_i = \left\{0,1,2\right\}$ + +Decimal $13$ can be written: + +* $1011$ +* $1201$ +* $122$ + +### Dense and Sparse Representations + +*Dense* representations are simple lists/sequences of digits, + including $0$. + +*Sparse* representations exclude $0$ digits, so must include either: + +* *rank* (the index in the sequence) or +* *weight* + +### Dense Example + +Straightforward list of binary digits, but least significant bit first + +~~~ {.Haskell} +data Digit = Zero | One +type DenseNat = [Digit] -- increasing order of significance +inc [] = [One] +inc (Zero : ds) = One : ds +inc (One : ds) = Zero : inc ds -- carry +dec [One] = [] +dec (One : ds) = Zero : ds +dec (Zero : ds) = One : dec ds -- borrow +add ds [] = ds +add [] ds = ds +add (d : ds1) (Zero : ds2) = d : add ds1 ds2 +add (Zero : ds1) (d : ds2) = d : add ds1 ds2 +add (One : ds1) (One : ds2) = Zero : inc (add ds1 ds2) -- carry +~~~ + +### Sparse Example + +Store a list of $d_i w_i$ values, so `Zero` digits won't appear + +~~~ {.Haskell} +type SparseNat = [Int] -- increasing list of powers of 2 +carry w [] = [w] +carry w ws@(w' : rest) + | w < w' = w : ws + | otherwise = carry (2 * w) rest +borrow w ws@(w' : rest) + | w < w' = rest + | otherwise = w : borrow (2 * w) ws +inc ws = carry 1 ws +dec ws = borrow 1 ws +add ws [] = ws +add [] ws = ws +add m@(w1 : ws1) n@(w2 : ws2) + | w1 < w2 = w1 : add ws1 n + | w2 < w1 = w2 : add m ws2 + | otherwise = carry (2 * w1) (add ws1 ws2) +~~~ + +### Binary Numerical Representations + +We can transform a positional number system into a data structure: + +* Replace sequence of digits with a sequence of trees +* Number and size of trees is determined by representation of numbers + +For example: + +* The binary representation of $73$ is $1001001$ +* A collection of size $73$ would contain three trees: + * size $1$ + * size $8$ + * size $64$ + +### Complete Binary Leaf Trees + +A binary leaf tree is either: + +* a `Leaf`, containing a value; or +* a `Fork`, containing two binary leaf trees + +A *complete* binary leaf tree has all `Leaf` nodes at the same rank. + +~~~ {.Haskell} +data BLT a = Leaf a + | Fork (BLT a) (BLT a) +~~~ + +### Binary Random-Access List + +Let's combine a dense binary representation with a `BLT` + +First, let's annotate the `BLT`s with their size: + +~~~ {.Haskell} +data BLT a = Leaf a + | Fork !Int (BLT a) (BLT a) deriving (Show) +~~~ + +Recall the types we used for `DenseNat`: + +~~~ {.Haskell} +data Digit = Zero | One + deriving (Show, Ord, Eq) +type DenseNat = [Digit] -- increasing order of significance +~~~ + +We'll adapt them to store a `BLT` on `One` digits: + +~~~ {.Haskell} +data Digit a = Zero | One (BLT a) + deriving (Show, Ord, Eq) +type RandList a = [Digit a] +~~~ + +### Some RandList Operations + +We had an `inc` operation for `Nat` + +~~~ {.Haskell} +inc :: DenseNat -> DenseNat +inc [] = [One] +inc (Zero : ds) = One : ds +inc (One : ds) = Zero : inc ds -- carry +~~~ + +We adapt it to `cons` for `RandList` + +~~~ {.Haskell} +cons :: a -> RandList a -> RandList a +cons x ts = insTree (Leaf x) ts + +insTree t [] = [One t] +insTree t (Zero : ts) = One t : ts +insTree t1 (One t2 : ts) = Zero : insTree (link t1 t2) ts + where link t1 t2 = Fork (size t1 + size t2) t1 t2 + size (Leaf _) = 1 + size (Fork n _ _ ) = n +~~~ + +### Some RandList Operations + +We had a `dec` operation for `Nat` + +~~~ {.Haskell} +dec :: DenseNat -> DenseNat +dec [One] = [] +dec (One : ds) = Zero : ds +dec (Zero : ds) = One : dec ds -- borrow +~~~ + +We adapt it to `tail` for `RandList` + +~~~ {.Haskell} +tail :: RandList a -> RandList a +tail ts = ts' where (_, ts') = borrowTree ts + +borrowTree [One t] = (t, [] ) +borrowTree (One t : ts) = (t, Zero : ts ) +borrowTree (Zero : ts) = (t1, One t2 : ts') + where (Fork _ t1 t2, ts') = borrowTree ts +~~~ + +### Properties of RandList + +We have arranged an $n$-length sequence as a $\log\left(n+1\right)$ +length list of trees of $\log n$ depth. + +Operations `cons`, `head`, and `tail` perform $O\left({1}\right)$ +work per digit, so their worst-case is $O\left({\log n}\right)$ + +Operations `lookup` and `update` take at most $O\left({\log n}\right)$ +to find the right tree and $O\left({\log n}\right)$ to find the right +element, for a total of $O\left({\log n}\right)$ worst-case time. + +For mostly-random access, this will be a significant improvement over +`List`'s $O\left({n}\right)$ worst-case performance. + +### Skew Binary Numbers + +Our standard binary representation's performance was hindered by +*cascading carries*. + +In the **skew binary number** representation: + +* The weight $w_i$ of the $i$th digit is $2^{i+1}-1$ +* The set of digits $D_i = \left\{0,1,2\right\}$ +* Only the lowest non-$0$ digit may be $2$ (for uniqueness) + +The number $92$ is $002101$ in this representation: + +\begin{align*} +\sum\limits_{i=0}^{5} b_i w_i &= +0+0+(2^{2+1}-1)\times{2}+(2^{3+1}-1)+0+(2^{5+1}-1) \\ +&= 14+15+63 \\ +&= 92 +\end{align*} + +### Skew Binary Number Operations + +What makes skew binary representation useful: + +* $w_i$ is $2^{i+1}-1$ +* $1+2\left(2^{i+1}-1\right) = 2^{i+2}-1$ + +This means that when the lowest non-$0$ digit is $2$, the `inc` +operation: + +* resets the $2$ to $0$ +* increments the next digit from $0$ to $1$ *or* $1$ to $2$ + +If there is no $2$, just increment the lowest digit from $0$ to $1$ or +from $1$ to $2$. These only take $O\left({1}\right)$ time! + +We must use a *sparse* representation to keep $O\left({1}\right)$ +access to the first non-$0$ digit. + +### Skew Binary Random Access List + +~~~ {.Haskell} +data BTree a = Leaf a | Node a (BTree a) (BTree a) +data Digit a = Digit !Int (BTree a) +type RList a = [Digit a] +cons x (Digit w1 t1 : Digit w2 t2 : ts') + | w1 == w2 = Digit (1 + w1 + w2) (Node x t1 t2) : ts' +cons x ts = Digit 1 (Leaf x) : ts +head (Digit 1 (Leaf x) : _) = x +head (Digit _ (Node x _ _) : _) = x +tail (Digit 1 (Leaf _) : ts) = ts +tail (Digit w (Node _ t1 t2) : ts) = + Digit w' t1 : Digit w' t2 : ts + where w' = w `div` 2 +lookup (Digit w t : ts) i + | i < w = lookupTree w t i + | otherwise = lookup ts (i - w) +lookupTree 1 (Leaf x) 0 = x +lookupTree _ (Node x _ _) 0 = x +lookupTree w (Node _ t1 t2) i + | i < w' = lookupTree w' t1 (i-1) + | otherwise = lookupTree w' t2 (i-1-w') + where w' = w `div` 2 +~~~ + +## Non-Uniform Structure + +### Random Access Lists + +Here is our first Random Access List type: + +~~~ {.Haskell} +data BLT a = Leaf a + | Fork (BLT a) (BLT a) +data Digit a = Zero | One (BLT a) +type RandList a = [Digit a] +~~~ + +It is possible to construct invalid members of this type: + +~~~ {.Haskell} +[One (Fork (Leaf 1) (Fork (Leaf 2) (Leaf 3)))] +~~~ + +Constraint violations: + +* All trees should be *complete* +* Incorrect height for its numerical representation + +### Random Access Lists (cont.) + +We can encode the fact that we only want *complete* trees of the +correct height by combining the structures and "pairing" the type +parameter on each recurrence: + +~~~ {.Haskell} +data Fork t = Fork t t -- this is just like (t,t) +data RandList t = Nil + | Zero (RandList (Fork t)) + | One t (RandList (Fork t)) +~~~ + +This is known as *polymorphic recursion* or *non-uniform data types* + +Some examples: + +~~~ {.Haskell} +Nil +One 1 Nil +Zero (One (Fork 1 2) Nil) +Zero (Zero (One (Fork (Fork 1 2) (Fork 3 4)) Nil)) +~~~ + +### Random Access List Implementation + +~~~ {.Haskell} +data Fork t = Fork t t +data RandList t = Nil + | Zero (RandList (Fork t)) + | One t (RandList (Fork t)) +cons :: a -> RandList a -> RandList a +cons x Nil = One x Nil +cons x1 (Zero ds) = One x1 ds +cons x1 (One x2 ds) = Zero (cons (Fork x1 x2) ds) +front :: RandList a -> (a, RandList a) +front Nil = error "Empty RandList" +front (One x Nil) = (x, Nil) +front (One x ds) = (x, Zero ds) +front (Zero ds) = (x1, One x2 ds') + where (Fork x1 x2, ds') = front ds +find :: Int -> RandList a -> a +find 0 Nil = error "Not found" +find 0 (One x _) = x +find i (One _ ds) = find (i-1) (Zero ds) +find i (Zero ds) = if i`mod`2 == 0 then x else y + where (Fork x y) = find (i`div`2) ds +~~~ + +### Other Numerical Representations + +We have only scratched the surface of numerical representations + +* Different tree structures at digits + * complete binary trees + * complete leaf trees + * binomial trees + * pennants + +* Different number systems + * zeroless binary + * redundant binary + * skew binary + * segmented binary + * higher bases + * fibonacci numbers + * factorials - random permutation algorithm + +# Real-World Structures + +## Zippers + +### What is a Zipper? + +Imagine you have a large, complex data structure... + +A zipper is a *complementary* structure that tracks a navigation path +through the primary structure. + +It contains: + +* a *focus point* that refers to the current point in the navigation +* the *path* of the traversal from the root to focus point + +Operators on a zipper: + +* navigation: `goUp`, `goDown`, `goLeft`, searching, etc. +* update: `replace` the focus point, `insertLeft`, etc. + +Traversing a structure with a zipper turns it *inside-out*, and moving +back to the head pulls it back together, like a *zipper*. + +### A Zipper on Rose Trees + +First, a tree structure, possibly representing a document: + +~~~ {.Haskell} +data Tree a = Item a | Section [Tree a] +~~~ + +We need a structure to record the path; we need to track steps up and +down the structure as well as the position in the list + +~~~ {.Haskell} +data Path a = Top | Node [Tree a] (Path a) [Tree a] +~~~ + +Finally, we need to combine the path with the focus point, which will +just be a subtree of the tree we are traversing: + +~~~ {.Haskell} +data Location a = Loc (Tree a) (Path a) +~~~ + +To build a zipper, we just combine the root of the tree with the `Top` +path constructor in a `Location` + +### A Zipper on Rose Trees - Navigation + +Basic navigation (error checking elided): + +~~~ {.Haskell} +data Path a = Top | Node [Tree a] (Path a) [Tree a] +data Location a = Loc (Tree a) (Path a) + +goDown (Loc (Section (t1:trees)) p) = + Loc t1 (Node [] p trees) + +goLeft (Loc t (Node (l:left) up right)) = + Loc l (Node left up (t:right)) + +goRight (Loc t (Node left up (r:right))) = + Loc r (Node (t:left) up right) + +goUp (Loc t (Node left up right)) = + Loc (Section (reverse left ++ t:right)) up +~~~ + +### A Zipper on Rose Trees - Update + +Modifying at the current position: + +~~~ {.Haskell} +data Path a = Top | Node [Tree a] (Path a) [Tree a] +data Location a = Loc (Tree a) (Path a) + +replace (Loc _ p) t = Loc t p + +insertRight (Loc t (Node left up right)) r = + Loc t (Node left up (r:right)) + +insertLeft (Loc t (Node left up right)) l = + Loc t (Node (l:left) up right) + +insertDown (Loc (Section sons) p) t1 = + Loc t1 (Node [] p sons) +~~~ + +### Crazy Zipper Facts + +The zipper structure for a given data structure can be derived +mechanically from its algebraic type signature. + +The zipper type signature is the *first derivative* (in the sense you +learned in Calculus) of its parent signature. + +Zippers are *one-hole contexts* for their parent type. The *focus +point* is the hole. + +Zippers can also be represented by *delimited continuations* of a +traversing process. These can be used with any `Traversable` without +knowing its concrete type at all! + +Continuation-capturing traversal functions invert the control +structure in a similar manner to how taking the derivative of the data +type inverts its data structure. + +## 2-3 Finger Trees + +### Not this time + +I lied. There's no time to talk about these. Sorry! + +But you should be prepared to read this paper about them now: + +http://www.cs.ox.ac.uk/people/ralf.hinze/publications/FingerTrees.pdf + +## Hash Array Mapped Tries + +### Downsides To This Stuff + +Way back at the beginning, I mentioned some downsides: + +* Extra overhead for references vs. in-place storage +* Locality of data can suffer + +These are constant-factor issues, but with the advances in the cache +structure of machines, they are an increasingly *large* constant +factor. + +Bad memory access patterns can make memory fetches take *orders of +magnitude* longer. + +The solution is to use *bigger* chunks of data; do more copying and +less pointer chasing. + +### Hash Array Mapped Trie + +Key features: + +* Instead of a branching factor of 2, use a factor of 32 or so. +* Each node (aside from leaf nodes) contains up to 32 elements. +* $n$ bits of the key index the array at the next level +* Bit population count is used to represent sparse arrays + +~~~ {.Haskell} +type Key = Word +type Bitmap = Word +data HAMT a = Empty + | BitmapIndexed !Bitmap !(Vector (HAMT a)) + | Leaf !Key a + | Full !(Vector (HAMT a)) +~~~ + +### Hash Array Mapped Trie - More Explanation + +A *trie* is a tree with a branching factor $k$, where $k$ is the size +of alphabet of key elements. + +A trie for caseless English words would have branching factor 26. + +The first symbol of the key determines the index to the first tree +level; second symbol to the next, etc. + +HAMT divides a key (vector index or hashed key) into bit fields: + +* a branching factor of 32 uses 5-bit fields +* A 32-bit key divides into 6 fields, with 2 bits remaining + +HAMT stores children sparsely: Bitmap determines child presence, +vector stores only present children. + +Clever bit-level math determines child vector index from bitmap. + +### Hash Array Mapped Trie - Lookup + +~~~ {.Haskell} +maskIdx b m = popCount (b .&. (m - 1)) +mask k s = shiftL 1 (subkey k s) + +subkeyMask = 1 `shiftL` bitsPerSubkey - 1 +subkey k s = fromIntegral $ shiftR k s .&. subkeyMask + +lookup :: Key -> HAMT a -> Maybe a +lookup k t = lookup' k 0 t +lookup' k s t + = case t of + Empty -> Nothing + Leaf kx x + | k == kx -> Just x + | otherwise -> Nothing + BitmapIndexed b v -> + let m = mask k s in + if b .&. m == 0 + then Nothing + else lookup' k (s+subkeyWidth) (v ! maskIdx b m) + Full v -> lookup' k (s+subkeyWidth) (v ! subkey k s) +~~~ + +# Conclusion + +### Functional Data Structure Fundamentals + +Functional data structures provide: + +* Safety via Immutability +* Efficient Persistence +* Clear & Simple Implementation + +Many are based around some core ideas: + +* Numerical Representations +* Lazy Amortization +* Non-Uniform Type Structure + +The field is still developing! + +Studying Functional Data Structures will teach you a lot about +functional languages. diff --git a/Queue1.hs b/Queue1.hs new file mode 100644 index 0000000..6aa27fd --- /dev/null +++ b/Queue1.hs @@ -0,0 +1,22 @@ +module Queue1 where + +import StrictList as L + +data Queue a = Q { left :: StrictList a + , leftLen :: !Int + , right :: StrictList a + , rightLen :: !Int + } deriving (Show, Eq) + +empty :: Queue a +empty = Q Empty 0 Empty 0 + +length :: Queue a -> Int +length (Q _ ll _ rl) = ll + rl + +insert :: a -> Queue a -> Queue a +insert e (Q l ll r rl) = Q l ll (e :$ r) (rl + 1) + +remove :: Queue a -> (a, Queue a) +remove (Q Empty _ r rl) = remove (Q (L.reverse r) rl Empty 0) +remove (Q l ll r rl) = (L.head l, Q (L.tail l) (ll - 1) r rl) diff --git a/Queue2.hs b/Queue2.hs new file mode 100644 index 0000000..b493282 --- /dev/null +++ b/Queue2.hs @@ -0,0 +1,33 @@ +module Queue2 where + +import Prelude hiding (length) + +-- Invariant: |R| <= |L| +-- Invariant is preserved via the makeQ function + +data Queue a = Q { left :: [a] + , leftLen :: !Int + , right :: [a] + , rightLen :: !Int + } deriving (Show, Eq) + +empty :: Queue a +empty = Q [] 0 [] 0 + +length :: Queue a -> Int +length (Q _ ll _ rl) = ll + rl + +insert :: a -> Queue a -> Queue a +insert e (Q l ll r rl) = makeQ (Q l ll (e : r) (rl + 1)) + +remove :: Queue a -> (a, Queue a) +remove (Q l ll r rl) = (head l, makeQ (Q (tail l) (ll - 1) r rl)) + +makeQ :: Queue a -> Queue a +makeQ q@(Q l ll r rl) + | rl <= ll = q + | otherwise = Q (rot l r []) (ll + rl) [] 0 + +rot :: [a] -> [a] -> [a] -> [a] +rot [] r a = head r : a +rot l r a = head l : rot (tail l) (tail r) (head r : a) diff --git a/Queue2.txt b/Queue2.txt new file mode 100644 index 0000000..1744f38 --- /dev/null +++ b/Queue2.txt @@ -0,0 +1,55 @@ +--Load Queue2.hs - lazy rebuilding queue +:l Queue2.hs + +--Start ghc-vis +:vis + +let x = empty +:view x + +--Insert a value - view new queue; we already triggered the reverse +let y = insert 1 x +:view y + +--Remove the value - forces the evaluation in y, but we discard the new tree +let (z,_) = remove y +print z +:update + +--Add another element to y; now there's one in the right list +let a = insert 2 y +:view a + +--Add one more, and we trigger the swap again +let b = insert 3 a +:view b + +--We can add 3 more and still not swap +let c = insert 6 $ insert 5 $ insert 4 b +:view c + +--One more, and now we trigger the swap +let d = insert 7 c +:view d + +--Let's remove one by one and see how evaluation progresses +let (e,f) = remove d +print e +:view f + +let (g,h) = remove f +print g +:view h + +let (i,j) = remove h +print i +:view j + +let (k,l) = remove j +print k +:view l + +let (m,n) = remove l +print m +:view n + diff --git a/Queue3.hs b/Queue3.hs new file mode 100644 index 0000000..3c7cd23 --- /dev/null +++ b/Queue3.hs @@ -0,0 +1 @@ +module Queue3 where diff --git a/RB1.hs b/RB1.hs new file mode 100644 index 0000000..0a8987b --- /dev/null +++ b/RB1.hs @@ -0,0 +1,81 @@ +module RedBlack1 where + +{- Version 1, 'untyped' -} +data Color = R | B deriving Show +data RB a = E | T Color (RB a) a (RB a) deriving Show + +{- Insertion and membership test as by Okasaki -} +insert :: Ord a => a -> RB a -> RB a +insert x s = + T B a z b + where + T _ a z b = ins s + ins E = T R E x E + ins s'@(T B a' y b') + | xy = balance a' y (ins b') + | otherwise = s' + ins s'@(T R a' y b') + | xy = T R a' y (ins b') + | otherwise = s' + +member :: Ord a => a -> RB a -> Bool +member _ E = False +member x (T _ a y b) + | x < y = member x a + | x > y = member x b + | otherwise = True + +{- balance: first equation is new, + to make it work with a weaker invariant -} +balance :: RB a -> a -> RB a -> RB a +balance (T R a x b) y (T R c z d) = T R (T B a x b) y (T B c z d) +balance (T R (T R a x b) y c) z d = T R (T B a x b) y (T B c z d) +balance (T R a x (T R b y c)) z d = T R (T B a x b) y (T B c z d) +balance a x (T R b y (T R c z d)) = T R (T B a x b) y (T B c z d) +balance a x (T R (T R b y c) z d) = T R (T B a x b) y (T B c z d) +balance a x b = T B a x b + +{- deletion a la SMK -} +delete :: Ord a => a -> RB a -> RB a +delete x t = + case del t of {T _ a y b -> T B a y b; _ -> E} + where + del E = E + del (T _ a y b) + | xy = delformRight a y b + | otherwise = app a b + delformLeft a@(T B _ _ _) y b = balleft (del a) y b + delformLeft a y b = T R (del a) y b + delformRight a y b@(T B _ _ _) = balright a y (del b) + delformRight a y b = T R a y (del b) + +balleft :: RB a -> a -> RB a -> RB a +balleft (T R a x b) y c = T R (T B a x b) y c +balleft bl x (T B a y b) = balance bl x (T R a y b) +balleft bl x (T R (T B a y b) z c) = T R (T B bl x a) y (balance b z (sub1 c)) + +balright :: RB a -> a -> RB a -> RB a +balright a x (T R b y c) = T R a x (T B b y c) +balright (T B a x b) y bl = balance (T R a x b) y bl +balright (T R a x (T B b y c)) z bl = T R (balance (sub1 a) x b) y (T B c z bl) + +sub1 :: RB a -> RB a +sub1 (T B a x b) = T R a x b +sub1 _ = error "invariance violation" + +app :: RB a -> RB a -> RB a +app E x = x +app x E = x +app (T R a x b) (T R c y d) = + case app b c of + T R b' z c' -> T R(T R a x b') z (T R c' y d) + bc -> T R a x (T R bc y d) +app (T B a x b) (T B c y d) = + case app b c of + T R b' z c' -> T R(T B a x b') z (T B c' y d) + bc -> balleft a x (T B bc y d) +app a (T R b x c) = T R (app a b) x c +app (T R a x b) c = T R a x (app b c) diff --git a/RB2.hs b/RB2.hs new file mode 100644 index 0000000..136b168 --- /dev/null +++ b/RB2.hs @@ -0,0 +1,179 @@ +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE UndecidableInstances #-} + +module RedBlack2 where + +data Unit a = E deriving Show +type Tr t a = (t a,a,t a) +data Red t a = C (t a) | R (Tr t a) + +instance (Show (t a), Show a) => Show (Red t a) + where showsPrec _ (C t) = shows t + showsPrec _ (R(a,b,c)) = + ("R("++) . shows a . (","++) . shows b . (","++) . shows c . (")"++) + +data AddLayer t a = B(Tr(Red t) a) deriving Show + +data RB t a = Base (t a) | Next (RB (AddLayer t) a) + +instance (Show (t a),Show a) => Show (RB t a) + where + show (Base t) = show t + show (Next t) = show t + +type Tree a = RB Unit a +empty :: Tree a +empty = Base E + +type RR t a = Red (Red t) a +type RL t a = Red (AddLayer t) a + +member :: Ord a => a -> Tree a -> Bool +member x t = rbmember x t (const False) + +rbmember :: Ord a => a -> RB t a -> (t a->Bool) -> Bool +rbmember _ (Base t) m = m t +rbmember x (Next u) m = rbmember x u (bmem x m) + +bmem :: Ord a => a -> (t a->Bool) -> AddLayer t a -> Bool +bmem x m (B(l,y,r)) + | xy = rmem x m r + | otherwise = True + +rmem :: Ord a => a -> (t a->Bool) -> Red t a->Bool +rmem _ m (C t) = m t +rmem x m (R(l,y,r)) + | xy = m r + | otherwise = True + +insert :: Ord a => a -> Tree a -> Tree a +insert = rbinsert + +class Insertion t where ins :: Ord a => a -> t a -> Red t a +instance Insertion Unit where ins x E = R(E,x,E) + +rbinsert :: (Ord a,Insertion t) => a -> RB t a -> RB t a +rbinsert x (Next t) = Next (rbinsert x t) +rbinsert x (Base t) = blacken(ins x t) + +blacken :: Red t a -> RB t a +blacken (C u) = Base u +blacken (R(a,x,b)) = Next(Base(B(C a,x,C b))) + +balanceL :: RR t a -> a -> Red t a -> RL t a +balanceL (R(R(a,x,b),y,c)) z d = R(B(C a,x,C b),y,B(c,z,d)) +balanceL (R(a,x,R(b,y,c))) z d = R(B(a,x,C b),y,B(C c,z,d)) +balanceL (R(C a,x,C b)) z d = C(B(R(a,x,b),z,d)) +balanceL (C a) x b = C(B(a,x,b)) + +balanceR :: Red t a -> a -> RR t a -> RL t a +balanceR a x (R(R(b,y,c),z,d)) = R(B(a,x,C b),y,B(C c,z,d)) +balanceR a x (R(b,y,R(c,z,d))) = R(B(a,x,b),y,B(C c,z,C d)) +balanceR a x (R(C b,y,C c)) = C(B(a,x,R(b,y,c))) +balanceR a x (C b) = C(B(a,x,b)) + +instance Insertion t => Insertion (AddLayer t) where + ins x t@(B(l,y,r)) + | xy = balance(C l) y (ins x r) + | otherwise = C t +instance Insertion t => Insertion (Red t) where + ins x (C t) = C(ins x t) + ins x t@(R(a,y,b)) + | xy = R(C a,y,ins x b) + | otherwise = C t + +balance :: RR t a -> a -> RR t a -> RL t a +balance (R a) y (R b) = R(B a,y,B b) +balance (C a) x b = balanceR a x b +balance a x (C b) = balanceL a x b + +class Append t where app :: t a -> t a -> Red t a + +instance Append Unit where app _ _ = C E + +instance Append t => Append (AddLayer t) where + app (B(a,x,b)) (B(c,y,d)) = threeformB a x (appRed b c) y d + +threeformB :: Red t a -> a -> RR t a -> a -> Red t a -> RL t a +threeformB a x (R(b,y,c)) z d = R(B(a,x,b),y,B(c,z,d)) +threeformB a x (C b) y c = balleftB (C a) x (B(b,y,c)) + +appRed :: Append t => Red t a -> Red t a -> RR t a +appRed (C x) (C y) = C(app x y) +appRed (C t) (R(a,x,b)) = R(app t a,x,C b) +appRed (R(a,x,b)) (C t) = R(C a,x,app b t) +appRed (R(a,x,b))(R(c,y,d)) = threeformR a x (app b c) y d + +threeformR:: t a -> a -> Red t a -> a -> t a -> RR t a +threeformR a x (R(b,y,c)) z d = R(R(a,x,b),y,R(c,z,d)) +threeformR a x (C b) y c = R(R(a,x,b),y,C c) + +balleft :: RR t a -> a -> RL t a -> RR (AddLayer t) a +balleft (R a) y c = R(C(B a),y,c) +balleft (C t) x (R(B(a,y,b),z,c)) = R(C(B(t,x,a)),y,balleftB (C b) z c) +balleft b x (C t) = C (balleftB b x t) + +balleftB :: RR t a -> a -> AddLayer t a -> RL t a +balleftB bl x (B y) = balance bl x (R y) + +balright :: RL t a -> a -> RR t a -> RR (AddLayer t) a +balright a x (R b) = R(a,x,C(B b)) +balright (R(a,x,B(b,y,c))) z (C d) = R(balrightB a x (C b),y,C(B(c,z,d))) +balright (C t) x b = C (balrightB t x b) + +balrightB :: AddLayer t a -> a -> RR t a -> RL t a +balrightB (B y) = balance (R y) + +class Append t => DelRed t where + delTup :: Ord a => a -> Tr t a -> Red t a + delLeft :: Ord a => a -> t a -> a -> Red t a -> RR t a + delRight :: Ord a => a -> Red t a -> a -> t a -> RR t a + +class Append t => Del t where + del :: Ord a => a -> AddLayer t a -> RR t a + +class (DelRed t, Del t) => Deletion t + +instance DelRed Unit where + delTup z t@(_,x,_) = if x==z then C E else R t + delLeft _ _ y b = R(C E,y,b) + delRight _ a y _ = R(a,y,C E) + +instance Deletion t => DelRed (AddLayer t) where + delTup z (a,x,b) + | zx = balrightB a x (del z b) + | otherwise = app a b + delLeft x a = balleft (del x a) + delRight x a y b = balright a y (del x b) + +instance DelRed t => Del t where + del z (B(a,x,b)) + | zx = delformRight b + | otherwise = appRed a b + where delformLeft(C t) = delLeft z t x b + delformLeft(R t) = R(delTup z t,x,b) + delformRight(C t) = delRight z a x t + delformRight(R t) = R(a,x,delTup z t) + +instance Deletion t => Deletion (AddLayer t) +instance Deletion Unit + +rbdelete :: (Ord a,Deletion t) => a -> RB (AddLayer t) a -> RB t a +rbdelete x (Next t) = Next (rbdelete x t) +rbdelete x (Base t) = blacken2 (del x t) + +blacken2 :: RR t a -> RB t a +blacken2 (C(C t)) = Base t +blacken2 (C(R(a,x,b))) = Next(Base(B(C a,x,C b))) +blacken2 (R p) = Next(Base(B p)) + +delete:: Ord a => a -> Tree a -> Tree a +delete x (Next u) = rbdelete x u +delete _ _ = empty diff --git a/RB3.hs b/RB3.hs new file mode 100644 index 0000000..ec1386a --- /dev/null +++ b/RB3.hs @@ -0,0 +1,204 @@ +{-# LANGUAGE ExistentialQuantification #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE RankNTypes #-} + +module RedBlack3 where + +type Tr t a b = (t a b, a, t a b) +data Red t a b = C (t a b) | R (Tr t a b) +data Black a b = E | B (Tr (Red Black) a [b]) + +instance Show a => Show (Black a b) where + showsPrec _ E = ('E':) + showsPrec _ (B (a, x, b)) = ("B(" ++) . showRed a . ("," ++) . shows x . + ("," ++) . showRed b . (")" ++) + +showRed :: forall t a b. (Show (t a b), Show a) + => Red t a b + -> ShowS +showRed (C x) = shows x +showRed (R (a, x, b)) = ("R(" ++) . shows a . ("," ++) . shows x . ("," ++) . + shows b . (")" ++) + +type RR a b = Red (Red Black) a b + +inc :: Black a b + -> Black a [b] +inc = tickB + +{- tickB is the identity, + but it allows us to replace that bogus type variable -} + +tickB :: Black a b + -> Black a c +tickB E = E +tickB (B (a, x, b)) = B (tickR a, x, tickR b) + +tickR :: Red Black a b + -> Red Black a c +tickR (C t) = C (tickB t) +tickR (R (a, x, b)) = R (tickB a, x, tickB b) + +data Tree a = forall b . ENC (Black a b) + +instance Show a => Show (Tree a) + where show (ENC t) = show t + +empty :: Tree a +empty = ENC E + +insert :: Ord a + => a + -> Tree a + -> Tree a +insert x (ENC t) = ENC (blacken (insB x t)) + +blacken :: Red Black a b + -> Black a b +blacken (C u) = u +blacken (R (a, x, b)) = B (C (inc a), x, C (inc b)) + +insB :: Ord a + => a + -> Black a b + -> Red Black a b +insB x E = R (E, x, E) +insB x t@(B (a, y, b)) + | x < y = balanceL (insR x a) y b + | x > y = balanceR a y (insR x b) + | otherwise = C t + +insR :: Ord a + => a + -> Red Black a b + -> RR a b +insR x (C t) = C (insB x t) +insR x t@(R (a, y, b)) + | x < y = R (insB x a, y, C b) + | x > y = R (C a, y, insB x b) + | otherwise = C t + +balanceL :: RR a [b] + -> a + -> Red Black a [b] + -> Red Black a b +balanceL (R (R (a, x, b), y, c)) z d = R (B (C a, x, C b), y, B (c, z, d)) +balanceL (R (a, x, R (b, y, c))) z d = R (B (a, x, C b), y, B (C c, z, d)) +balanceL (R (C a, x, C b)) z d = C (B (R (a, x, b), z, d)) +balanceL (C a) x b = C (B (a, x, b)) + +balanceR :: Red Black a [b] + -> a + -> RR a [b] + -> Red Black a b +balanceR a x (R (R (b, y, c), z, d)) = R (B (a, x, C b), y, B (C c, z, d)) +balanceR a x (R (b, y, R (c, z, d))) = R (B (a, x, b), y, B (C c, z, C d)) +balanceR a x (R (C b, y, C c)) = C (B (a, x, R (b, y, c))) +balanceR a x (C b) = C (B (a, x, b)) + +delete :: Ord a + => a + -> Tree a + -> Tree a +delete x (ENC t) = + case delB x t of + R p -> ENC (B p) + C (R (a, x', b)) -> ENC (B (C a, x', C b)) + C (C q) -> ENC q + +delB :: Ord a + => a + -> Black a b + -> RR a [b] +delB _ E = C (C E) +delB x (B (a, y, b)) + | x < y = delfromL a + | x > y = delfromR b + | otherwise = appendR a b + where delfromL (R t) = R (delT x t, y, b) + delfromL (C E) = R (C E, y, b) + delfromL (C t) = balL (delB x t) y b + delfromR (R t) = R (a, y, delT x t) + delfromR (C E) = R (a, y, C E) + delfromR (C t) = balR a y (delB x t) + +delT :: Ord a + => a + -> Tr Black a b + -> Red Black a b +delT x t@(a,y,b) + | x < y = delfromL a + | x > y = delfromR b + | otherwise = append a b + where delfromL (B _) = balLeB (delB x a) y b + delfromL _ = R t + delfromR (B _) = balRiB a y (delB x b) + delfromR _ = R t + +balLeB :: RR a [b] + -> a + -> Black a b + -> Red Black a b +balLeB bl x (B y) = balance bl x (R y) + +balRiB :: Black a b + -> a + -> RR a [b] + -> Red Black a b +balRiB (B y) = balance (R y) + +balL :: RR a [b] + -> a + -> Red Black a b + -> RR a b +balL (R a) y c = R (C (B a), y, c) +balL (C t) x (R (B (a, y, b), z, c)) = R (C (B (t, x, a)), y, balLeB (C b) z c) +balL b x (C t) = C (balLeB b x t) + +balR :: Red Black a b + -> a + -> RR a [b] + -> RR a b +balR a x (R b) = R (a, x, C (B b)) +balR (R (a, x, B (b, y, c))) z (C d) = R (balRiB a x (C b), y, C (B (c, z, d))) +balR (C t) x b = C (balRiB t x b) + +balance :: RR a [b] + -> a + -> RR a [b] + -> Red Black a b +balance (R a) y (R b) = R (B a, y, B b) +balance (C a) x b = balanceR a x b +balance a x (C b) = balanceL a x b + +append :: Black a b + -> Black a b + -> Red Black a b +append (B (a, x, b)) (B (c, y, d)) = threeformB a x (appendR b c) y d +append _ _ = C E + +threeformB :: Red Black a [b] + -> a + -> RR a [b] + -> a + -> Red Black a [b] + -> Red Black a b +threeformB a x (R (b, y, c)) z d = R (B (a, x, b), y, B (c, z, d)) +threeformB a x (C b) y c = balLeB (C a) x (B (b, y, c)) + +appendR :: Red Black a b + -> Red Black a b + -> RR a b +appendR (C x) (C y) = C (append x y) +appendR (C t) (R (a, x, b)) = R (append t a, x, C b) +appendR (R (a, x, b)) (C t) = R (C a, x, append b t) +appendR (R (a, x, b)) (R (c, y, d)) = threeformR a x (append b c) y d + +threeformR :: Black a b + -> a + -> Red Black a b + -> a + -> Black a b + -> RR a b +threeformR a x (R (b, y, c)) z d = R (R (a, x, b), y, R (c, z, d)) +threeformR a x (C b) y c = R (R (a, x, b), y, C c) diff --git a/RB4.hs b/RB4.hs new file mode 100644 index 0000000..a611939 --- /dev/null +++ b/RB4.hs @@ -0,0 +1,131 @@ +{-# LANGUAGE DataKinds #-} +{-# LANGUAGE ExistentialQuantification #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE StandaloneDeriving #-} + +module RedBlackTree where + +data Nat = Zero | Succ Nat deriving (Eq, Ord, Show) +type One = Succ Zero + +data RedBlack = Black | Red deriving (Eq, Ord, Show) + +-- red-black trees are rooted at a black node +data RedBlackTree a = forall n. T (Node Black n a) +deriving instance Show a => Show (RedBlackTree a) + +-- all paths from a node to a leaf have exactly n black nodes +data Node :: RedBlack -> Nat -> * -> * where + -- all leafs are black + Leaf :: Node Black One a + -- internal black nodes can have children of either color + B :: Node cL n a -> a -> Node cR n a -> Node Black (Succ n) a + -- internal red nodes can only have black children + R :: Node Black n a -> a -> Node Black n a -> Node Red n a +deriving instance Show a => Show (Node c n a) + +-- one-hole context for red-black trees +data Context :: Nat -> RedBlack -> Nat -> * -> * where + -- if we're at the root, the hole is a black node + Root :: Context n Black n a + -- we can go left or right from a red node hole, creating a hole for a black node + BC :: Bool -> a -> Node Black n a -> Context m Red n a -> Context m Black n a + -- we can go left or right from a black node hole, creating a hole for either + EC :: Bool -> a -> Node cY n a -> Context m Black (Succ n) a -> Context m cX n a + +data Zipper m a = forall c n. Zipper (Node c n a) (Context m c n a) + +-- create a zipper +unZip :: Node Black n a -> Zipper n a +unZip = flip Zipper Root + +-- destroy a zipper +zipUp :: Zipper m a -> Node Black m a +zipUp (Zipper x Root) = x +zipUp (Zipper x (BC goLeft a y c)) = zipUp $ Zipper (if goLeft then R x a y else R y a x) c +zipUp (Zipper x (EC goLeft a y c)) = zipUp $ Zipper (if goLeft then B x a y else B y a x) c + +-- locate the node that should contain a in the red-black tree +zipTo :: Ord a => a -> Zipper n a -> Zipper n a +zipTo _ z@(Zipper Leaf _) = z +zipTo a z@(Zipper (R l a' r) c) = case compare a a' of + EQ -> z + LT -> zipTo a $ Zipper l (BC True a' r c) + GT -> zipTo a $ Zipper r (BC False a' l c) +zipTo a z@(Zipper (B l a' r) c) = case compare a a' of + EQ -> z + LT -> zipTo a $ Zipper l (EC True a' r c) + GT -> zipTo a $ Zipper r (EC False a' l c) + +-- create a red-black tree +empty :: RedBlackTree a +empty = T Leaf + +-- insert a node into a red-black tree +-- (see http://en.wikipedia.org/wiki/Red%E2%80%93black_tree#Insertion) +insert :: Ord a => a -> RedBlackTree a -> RedBlackTree a +insert a t@(T root) = case zipTo a (unZip root) of + -- find matching leaf and replace with red node (pointing to two leaves) + Zipper Leaf c -> insertAt (R Leaf a Leaf) c + -- if it's already in the tree, there's no need to modify it + _ -> t + +insertAt :: Node Red n a -> Context m c n a -> RedBlackTree a +-- 1) new node is root => paint it black and done +insertAt (R l a r) Root = T $ B l a r +-- 2) new node's parent is black => done +insertAt x (EC b a y c) = T . zipUp $ Zipper x (EC b a y c) +-- 3) uncle is red => paint parent/uncle black, g'parent red. recurse on g'parent +insertAt x (BC pb pa py (EC gb ga (R ul ua ur) gc)) = insertAt g gc + where p = if pb then B x pa py else B py pa x + u = B ul ua ur + g = if gb then R p ga u else R u ga p +-- 4) node is between parent and g'parent => inner rotation +insertAt (R l a r) (BC False pa py pc@(EC True _ _ _)) = insertAt (R py pa l) (BC True a r pc) +insertAt (R l a r) (BC True pa py pc@(EC False _ _ _)) = insertAt (R r pa py) (BC False a l pc) +-- 5) otherwise => outer rotation +-- XXX: GHC seems unable to infer that gy is Black so I have to do both cases +-- explicitly, rather than +-- insertAt x (BC True pa py (EC True ga gy gc)) = +-- T . zipUp $ Zipper (B x pa $ R py ga gy) gc +-- insertAt x (BC False pa py (EC False ga gy gc)) = +-- T . zipUp $ Zipper (B (R gy ga py) pa x) gc +insertAt x (BC True pa py (EC True ga gy@Leaf gc)) = + T . zipUp $ Zipper (B x pa $ R py ga gy) gc +insertAt x (BC True pa py (EC True ga gy@B {} gc)) = + T . zipUp $ Zipper (B x pa $ R py ga gy) gc +insertAt x (BC False pa py (EC False ga gy@Leaf gc)) = + T . zipUp $ Zipper (B (R gy ga py) pa x) gc +insertAt x (BC False pa py (EC False ga gy@B {} gc)) = + T . zipUp $ Zipper (B (R gy ga py) pa x) gc + +-- can't derive, since we abstract over n, so we have to manually +-- check for identical structure +instance Eq a => Eq (RedBlackTree a) where + T Leaf == T Leaf = + True + + T (B l@B {} a r@B {}) == T (B l'@B {} a' r'@B {}) = + a == a' && T l == T l' && T r == T r' + + T (B (R ll la lr) a r@B {}) == T (B (R ll' la' lr') a' r'@B {}) = + a == a' && la == la' && T ll == T ll' && T lr == T lr' && T r == T r' + + T (B l@B {} a (R rl ra rr)) == T (B l'@B {} a' (R rl' ra' rr')) = + a == a' && ra == ra' && T l == T l' && T rl == T rl' && T rr == T rr' + + T (B (R ll la lr) a (R rl ra rr)) == T (B (R ll' la' lr') a' (R rl' ra' rr')) = + a == a' && la == la' && ra == ra' && + T ll == T ll' && T lr == T lr' && T rl == T rl' && T rr == T rr' + _ == _ = + False + +-- can't derive, since B abstracts over child node colors, so +-- manually check for identical structure +instance Eq a => Eq (Node c n a) where + Leaf == Leaf = True + R l a r == R l' a' r' = a == a' && l == l' && r == r' + b@B {} == b'@B {} = T b == T b' + _ == _ = False diff --git a/RandList.hs b/RandList.hs new file mode 100644 index 0000000..8d5eae9 --- /dev/null +++ b/RandList.hs @@ -0,0 +1,85 @@ +module RandList + ( RandList + , empty + , isEmpty + , cons + , head + , tail + , lookup + , update + ) where + +import Prelude hiding (head, lookup, tail) + +data BLT a = Leaf a + | Fork !Int (BLT a) (BLT a) + deriving (Eq, Show) + +data Digit a = Zero + | One (BLT a) + deriving (Eq, Show) + +type RandList a = [Digit a] + +empty :: RandList a +empty = [] + +isEmpty :: RandList a -> Bool +isEmpty = null + +cons :: a -> RandList a -> RandList a +cons x = insTree (Leaf x) + +head :: RandList a -> a +head ts = x where (Leaf x, _) = borrowTree ts + +tail :: RandList a -> RandList a +tail ts = ts' where (_, ts') = borrowTree ts + +lookup :: RandList a -> Int -> a +lookup [] _ = error "Bad Index" +lookup (Zero : ts) i = lookup ts i +lookup (One t : ts) i + | i < size t = lookupTree t i + | otherwise = lookup ts (i - size t) + +update :: RandList a -> Int -> a -> RandList a +update [] _ _ = error "Bad Index" +update (Zero : ts) i y = Zero : update ts i y +update (One t : ts) i y + | i < size t = One (updateTree t i y) : ts + | otherwise = One t : update ts (i - size t) y + +-- Utility functions + +size :: BLT a -> Int +size (Leaf _) = 1 +size (Fork n _ _ ) = n + +link :: BLT a -> BLT a -> BLT a +link t1 t2 = Fork (size t1 + size t2) t1 t2 + +insTree :: BLT a -> RandList a -> RandList a +insTree t [] = [One t] +insTree t (Zero : ts) = One t : ts +insTree t1 (One t2 : ts) = Zero : insTree (link t1 t2) ts + +borrowTree :: RandList a -> (BLT a, RandList a) +borrowTree [One t] = (t, []) +borrowTree (One t : ts) = (t, Zero : ts) +borrowTree (Zero : ts) = (t1, One t2 : ts') + where (Fork _ t1 t2, ts') = borrowTree ts + +lookupTree :: BLT a -> Int -> a +lookupTree (Leaf x) 0 = x +lookupTree (Leaf _) _ = error "Bad Index" +lookupTree (Fork n t1 t2) i + | i < (n `div` 2) = lookupTree t1 i + | otherwise = lookupTree t2 (i - n `div` 2) + +updateTree :: BLT a -> Int -> a -> BLT a +updateTree (Leaf _) 0 y = Leaf y +updateTree (Leaf _) _ _ = error "Bad Index" +updateTree (Fork n t1 t2) i y + | i < (n `div` 2) = Fork n (updateTree t1 i y) t2 + | otherwise = Fork n t1 (updateTree t2 (i - n `div` 2) y) diff --git a/SkewNat.hs b/SkewNat.hs new file mode 100644 index 0000000..6ec9119 --- /dev/null +++ b/SkewNat.hs @@ -0,0 +1,13 @@ +module SkewNat where + +type SkewNat = [Int] -- increasing list of powers of 2^{k+1}-1 + +inc :: SkewNat -> SkewNat +inc ws@(w1 : w2 : rest) + | w1 == w2 = 1 + w1 + w2 : rest + | otherwise = 1 : ws +inc ws = 1 : ws + +dec :: SkewNat -> SkewNat +dec (1 : ws) = ws +dec (w : ws) = (w `div` 2) : (w `div` 2) : ws diff --git a/SkewRandList.hs b/SkewRandList.hs new file mode 100644 index 0000000..1c661a4 --- /dev/null +++ b/SkewRandList.hs @@ -0,0 +1,61 @@ +module SkewRandList + ( RList + , cons + , head + , tail + , lookup + , update ) where + +import Prelude hiding (head, lookup, tail) + +data BTree a = Leaf a + | Node a (BTree a) (BTree a) + deriving (Show, Eq) + +data Digit a = Digit !Int (BTree a) + deriving (Show, Eq) + +type RList a = [Digit a] + +cons :: a -> RList a -> RList a +cons x (Digit w1 t1 : Digit w2 t2 : ts') + | w1 == w2 = Digit (1 + w1 + w2) (Node x t1 t2) : ts' +cons x ts = Digit 1 (Leaf x) : ts + +head :: RList a -> a +head (Digit 1 (Leaf x) : _) = x +head (Digit _ (Node x _ _) : _) = x + +tail :: RList a -> RList a +tail (Digit 1 (Leaf _) : ts) = ts +tail (Digit w (Node _ t1 t2) : ts) = + Digit w' t1 : Digit w' t2 : ts + where w' = w `div` 2 + +lookup :: RList a -> Int -> a +lookup (Digit w t : ts) i + | i < w = lookupTree w t i + | otherwise = lookup ts (i - w) + +update :: RList a -> Int -> a -> RList a +update (Digit w t : ts) i y + | i < w = Digit w (updateTree w t i y) : ts + | otherwise = Digit w t : update ts (i-w) y + +lookupTree :: Int -> BTree a -> Int -> a +lookupTree 1 (Leaf x) 0 = x +lookupTree _ (Node x _ _) 0 = x +lookupTree w (Node _ t1 t2) i + | i < w' = lookupTree w' t1 (i-1) + | otherwise = lookupTree w' t2 (i-1-w') + where w' = w `div` 2 + +updateTree :: Int -> BTree a -> Int -> a -> BTree a +updateTree 1 (Leaf _) 0 y = Leaf y +updateTree _ (Node _ t1 t2) 0 y = Node y t1 t2 +updateTree w (Node x t1 t2) i y + | i < (w`div`2) = + Node x (updateTree w' t1 (i-1) y) t2 + | otherwise = + Node x t1 (updateTree w' t2 (i-1-w') y) + where w' = w `div` 2 diff --git a/SparseNat.hs b/SparseNat.hs new file mode 100644 index 0000000..9d1f10f --- /dev/null +++ b/SparseNat.hs @@ -0,0 +1,28 @@ +module SparseNat where + +type SparseNat = [Int] -- increasing list of powers of 2 + +carry :: Int -> SparseNat -> SparseNat +carry w [] = [w] +carry w ws@(w' : rest) + | w < w' = w : ws + | otherwise = carry (2 * w) rest + +borrow :: Int -> SparseNat -> SparseNat +borrow w ws@(w' : rest) + | w < w' = rest + | otherwise = w : borrow (2 * w) ws + +inc :: SparseNat -> SparseNat +inc = carry 1 + +dec :: SparseNat -> SparseNat +dec = borrow 1 + +add :: SparseNat -> SparseNat -> SparseNat +add ws [] = ws +add [] ws = ws +add m@(w1 : ws1) n@(w2 : ws2) + | w1 < w2 = w1 : add ws1 n + | w2 < w1 = w2 : add m ws2 + | otherwise = carry (2 * w1) (add ws1 ws2) diff --git a/StrictList.hs b/StrictList.hs new file mode 100644 index 0000000..c89a935 --- /dev/null +++ b/StrictList.hs @@ -0,0 +1,30 @@ +module StrictList ( + StrictList(..) + , foldl + , reverse + , head + , tail + ) where + +import Prelude hiding (foldl, head, reverse, tail) + +data StrictList a = !a :$ !(StrictList a) | Empty + deriving (Show, Eq) + +infixr 5 :$ + +foldl :: (b -> a -> b) -> b -> StrictList a -> b +foldl f = lgo + where lgo z Empty = z + lgo z (x :$ xs) = let z' = f z x in z' `seq` lgo z' xs + +reverse :: StrictList a -> StrictList a +reverse = foldl (flip (:$)) Empty + +head :: StrictList a -> a +head (x :$ _ ) = x +head Empty = error "Empty StrictList has no head" + +tail :: StrictList a -> StrictList a +tail (_ :$ xs) = xs +tail Empty = error "Empty StrictList has no tail" diff --git a/Zipper.hs b/Zipper.hs new file mode 100644 index 0000000..a1a3ddf --- /dev/null +++ b/Zipper.hs @@ -0,0 +1,51 @@ +module Zipper where + +data Tree a = Item a + | Section [Tree a] + deriving (Show, Eq) + +data Path a = Top + | Node [Tree a] (Path a) [Tree a] + deriving (Show, Eq) + +data Location a = Loc (Tree a) (Path a) + deriving (Show, Eq) + +goLeft :: Location a -> Location a +goLeft (Loc _ Top) = error "Can't go left of top" +goLeft (Loc t (Node (l:left) up right)) = Loc l (Node left up (t:right)) +goLeft (Loc _ (Node [] _ _)) = error "Can't go left of first" + +goRight :: Location a -> Location a +goRight (Loc _ Top) = error "Can't go right of top" +goRight (Loc t (Node left up (r:right))) = Loc r (Node (t:left) up right) +goRight _ = error "Can't go right of last" + +goUp :: Location a -> Location a +goUp (Loc _ Top) = error "Can't go up from top" +goUp (Loc t (Node left up right)) = Loc (Section (reverse left ++ t:right)) up + +goDown :: Location a -> Location a +goDown (Loc (Item _) _) = error "Can't go down from item" +goDown (Loc (Section (t1:trees)) p) = Loc t1 (Node [] p trees) + +nth :: Location a -> Int -> Location a +nth loc = go + where go 1 = goDown loc + go n | n > 0 = goRight (go (n-1)) + | otherwise = error "nth expects a positive int" + +replace :: Location a -> Tree a -> Location a +replace (Loc _ p) t = Loc t p + +insertRight :: Location a -> Tree a -> Location a +insertRight (Loc _ Top) _ = error "Can't insert right of top" +insertRight (Loc t (Node left up right)) r = Loc t (Node left up (r:right)) + +insertLeft :: Location a -> Tree a -> Location a +insertLeft (Loc _ Top) _ = error "Can't insert left of top" +insertLeft (Loc t (Node left up right)) l = Loc t (Node (l:left) up right) + +insertDown :: Location a -> Tree a -> Location a +insertDown (Loc (Item _) _) _ = error "Can't insert down of an item" +insertDown (Loc (Section sons) p) t1 = Loc t1 (Node [] p sons)