diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Data/Algorithm/Patience.hs | 154 |
1 files changed, 154 insertions, 0 deletions
diff --git a/src/Data/Algorithm/Patience.hs b/src/Data/Algorithm/Patience.hs new file mode 100644 index 0000000..f987659 --- /dev/null +++ b/src/Data/Algorithm/Patience.hs @@ -0,0 +1,154 @@ +{-# LANGUAGE + DeriveDataTypeable + , ViewPatterns + , CPP #-} +-- | Implements \"patience diff\" and the patience algorithm for the longest +-- increasing subsequence problem. +module Data.Algorithm.Patience + ( -- * Patience diff + diff + , Item(..), itemChar, itemValue + -- * Longest increasing subsequence + , longestIncreasing + ) where +import qualified Data.Sequence as S +import Data.Sequence ( (<|), (|>), (><), ViewL(..), ViewR(..) ) +import qualified Data.Foldable as F +import qualified Data.Map as M +import qualified Data.IntMap as IM + +import Data.List +import Data.Ord + +import Data.Typeable ( Typeable ) +import Data.Data ( Data ) + +-- If key xi is in the map, move it to xf while adjusting the value with f. +adjMove :: (a -> a) -> Int -> Int -> IM.IntMap a -> IM.IntMap a +adjMove f xi xf m = case IM.updateLookupWithKey (\_ _ -> Nothing) xi m of + (Just v, mm) -> IM.insert xf (f v) mm + (Nothing, _) -> m + +-- A "card" is an integer value (with annotation) plus a "backpointer" to +-- a card in the previous pile, if any. +data Card a = Card Int a (Maybe (Card a)) + +-- | Given: a list of distinct integers. Picks a subset of the integers +-- in the same order, i.e. a subsequence, with the property that +-- +-- * it is monotonically increasing, and +-- +-- * it is at least as long as any other such subsequence. +-- +-- This function uses patience sort: +-- <http://en.wikipedia.org/wiki/Patience_sorting>. +-- For implementation reasons, the actual list returned is the reverse of +-- the subsequence. +-- +-- You can pair each integer with an arbitrary annotation, which will be +-- carried through the algorithm. +longestIncreasing :: [(Int,a)] -> [(Int,a)] +longestIncreasing = extract . foldl' ins IM.empty where + -- Insert a card into the proper pile. + -- type Pile a = [Card a] + -- type Piles a = IM.IntMap (Pile a) -- keyed by smallest element + ins m (x,a) = + let (lt, gt) = IM.split x m + prev = (head . fst) `fmap` IM.maxView lt + new = Card x a prev + in case IM.minViewWithKey gt of + Nothing -> IM.insert x [new] m -- new pile + Just ((k,_),_) -> adjMove (new:) k x m -- top of old pile + -- Walk the backpointers, starting at the top card of the + -- highest-keyed pile. + extract (IM.maxView -> Just (c,_)) = walk $ head c + extract _ = [] + walk (Card x a c) = (x,a) : maybe [] walk c + +-- Elements whose second component appears exactly once. +unique :: (Ord t) => S.Seq (a,t) -> M.Map t a +unique = M.mapMaybe id . F.foldr ins M.empty where + ins (a,x) = M.insertWith' (\_ _ -> Nothing) x (Just a) + +-- Given two sequences of numbered "lines", returns a list of points +-- where unique lines match up. +solveLCS :: (Ord t) => S.Seq (Int,t) -> S.Seq (Int,t) -> [(Int,Int)] +solveLCS ma mb = + let xs = M.elems $ M.intersectionWith (,) (unique ma) (unique mb) + in longestIncreasing $ sortBy (comparing snd) xs + +-- Type for decomposing a diff problem. We either have two +-- lines that match, or a recursive subproblem. +data Piece a + = Match a a + | Diff (S.Seq a) (S.Seq a) + deriving (Show) + +-- Subdivides a diff problem according to the indices of matching lines. +chop :: S.Seq t -> S.Seq t -> [(Int,Int)] -> [Piece t] +chop xs ys [] + | S.null xs && S.null ys = [] + | otherwise = [Diff xs ys] +chop xs ys ((nx,ny):ns) = + let (xsr, S.viewl -> (x :< xse)) = S.splitAt nx xs + (ysr, S.viewl -> (y :< yse)) = S.splitAt ny ys + in Diff xse yse : Match x y : chop xsr ysr ns + +-- Zip a list with a Seq. +zipLS :: [a] -> S.Seq b -> S.Seq (a, b) +#if MIN_VERSION_containers(0,3,0) +zipLS = S.zip . S.fromList +#else +zipLS xs = S.fromList . zip xs . F.toList +#endif + +-- Number the elements of a Seq. +number :: S.Seq t -> S.Seq (Int,t) +number xs = zipLS [0..S.length xs - 1] xs + +-- | An element of a computed difference. +data Item t + = Old t -- ^ Value taken from the \"old\" list, i.e. left argument to 'diff' + | New t -- ^ Value taken from the \"new\" list, i.e. right argument to 'diff' + | Both t t -- ^ Value taken from both lists. Both values are provided, in case + -- your type has a non-structural definition of equality. + deriving (Eq, Ord, Show, Read, Typeable, Data) + +instance Functor Item where + fmap f (Old x ) = Old (f x) + fmap f (New x ) = New (f x) + fmap f (Both x y) = Both (f x) (f y) + +-- | The difference between two lists, according to the +-- \"patience diff\" algorithm. +diff :: (Ord t) => [t] -> [t] -> [Item t] +diff xsl ysl = F.toList $ go (S.fromList xsl) (S.fromList ysl) where + -- Handle common elements at the beginning / end. + go (S.viewl -> (x :< xs)) (S.viewl -> (y :< ys)) + | x == y = Both x y <| go xs ys + go (S.viewr -> (xs :> x)) (S.viewr -> (ys :> y)) + | x == y = go xs ys |> Both x y + -- Find an increasing sequence of matching unique lines, then + -- subdivide at those points and recurse. + go xs ys = case chop xs ys $ solveLCS (number xs) (number ys) of + -- If we fail to subdivide, just record the chunk as is. + [Diff _ _] -> fmap Old xs >< fmap New ys + ps -> recur ps + + -- Apply the algorithm recursively to a decomposed problem. + -- The decomposition list is in reversed order. + recur [] = S.empty + recur (Match x y : ps) = recur ps |> Both x y + recur (Diff xs ys : ps) = recur ps >< go xs ys + +-- | The character @\'-\'@ or @\'+\'@ or @\' \'@ for 'Old' or 'New' or 'Both' respectively. +itemChar :: Item t -> Char +itemChar (Old _ ) = '-' +itemChar (New _ ) = '+' +itemChar (Both _ _) = ' ' + +-- | The value from an 'Item'. For 'Both', returns the \"old\" value. +itemValue :: Item t -> t +itemValue (Old x ) = x +itemValue (New x ) = x +itemValue (Both x _) = x |