{-# LANGUAGE CPP, BangPatterns, PatternGuards, DeriveDataTypeable #-}

module Codec.Archive.Tar.Index.StringTable (

    StringTable,
    lookup,
    index,
    construct,

    StringTableBuilder,
    empty,
    insert,
    inserts,
    finalise,
    unfinalise,

    serialise,
    serialiseSize,
    deserialiseV1,
    deserialiseV2,

#ifdef TESTS
    prop_valid,
    prop_sorted,
    prop_finalise_unfinalise,
    prop_serialise_deserialise,
    prop_serialiseSize,
#endif
 ) where

import Data.Typeable (Typeable)

import Prelude   hiding (lookup, id)
import Data.List hiding (lookup, insert)
import Data.Function (on)
import Data.Word (Word32)
import Data.Int  (Int32)
import Data.Bits
import Data.Monoid (Monoid(..))
#if (MIN_VERSION_base(4,5,0))
import Data.Monoid ((<>))
#endif
import Control.Exception (assert)

import qualified Data.Array.Unboxed as A
import           Data.Array.Unboxed ((!))
#if MIN_VERSION_containers(0,5,0)
import qualified Data.Map.Strict        as Map
import           Data.Map.Strict (Map)
#else
import qualified Data.Map               as Map
import           Data.Map (Map)
#endif
import qualified Data.ByteString        as BS
import qualified Data.ByteString.Unsafe as BS
import qualified Data.ByteString.Lazy   as LBS
#if MIN_VERSION_bytestring(0,10,2) || defined(MIN_VERSION_bytestring_builder)
import Data.ByteString.Builder          as BS
import Data.ByteString.Builder.Extra    as BS (byteStringCopy)
#else
import Data.ByteString.Lazy.Builder     as BS
import Data.ByteString.Lazy.Builder.Extras as BS (byteStringCopy)
#endif


-- | An effecient mapping from strings to a dense set of integers.
--
data StringTable id = StringTable
         {-# UNPACK #-} !BS.ByteString           -- all strings concatenated
         {-# UNPACK #-} !(A.UArray Int32 Word32) -- string offset table
         {-# UNPACK #-} !(A.UArray Int32 Int32)  -- string index to id table
         {-# UNPACK #-} !(A.UArray Int32 Int32)  -- string id to index table
  deriving (Int -> StringTable id -> ShowS
[StringTable id] -> ShowS
StringTable id -> String
(Int -> StringTable id -> ShowS)
-> (StringTable id -> String)
-> ([StringTable id] -> ShowS)
-> Show (StringTable id)
forall id. Int -> StringTable id -> ShowS
forall id. [StringTable id] -> ShowS
forall id. StringTable id -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [StringTable id] -> ShowS
$cshowList :: forall id. [StringTable id] -> ShowS
show :: StringTable id -> String
$cshow :: forall id. StringTable id -> String
showsPrec :: Int -> StringTable id -> ShowS
$cshowsPrec :: forall id. Int -> StringTable id -> ShowS
Show, Typeable)

instance (Eq id, Enum id) => Eq (StringTable id) where
  tbl1 :: StringTable id
tbl1 == :: StringTable id -> StringTable id -> Bool
== tbl2 :: StringTable id
tbl2 = StringTable id -> StringTableBuilder id
forall id. Enum id => StringTable id -> StringTableBuilder id
unfinalise StringTable id
tbl1 StringTableBuilder id -> StringTableBuilder id -> Bool
forall a. Eq a => a -> a -> Bool
== StringTable id -> StringTableBuilder id
forall id. Enum id => StringTable id -> StringTableBuilder id
unfinalise StringTable id
tbl2

-- | Look up a string in the token table. If the string is present, return
-- its corresponding index.
--
lookup :: Enum id => StringTable id -> BS.ByteString -> Maybe id
lookup :: StringTable id -> ByteString -> Maybe id
lookup (StringTable bs :: ByteString
bs offsets :: UArray Int32 Word32
offsets ids :: UArray Int32 Int32
ids _ixs :: UArray Int32 Int32
_ixs) str :: ByteString
str =
    Int32 -> Int32 -> ByteString -> Maybe id
forall a. Enum a => Int32 -> Int32 -> ByteString -> Maybe a
binarySearch 0 (Int32
topBoundInt32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
-1) ByteString
str
  where
    (0, topBound :: Int32
topBound) = UArray Int32 Word32 -> (Int32, Int32)
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> (i, i)
A.bounds UArray Int32 Word32
offsets

    binarySearch :: Int32 -> Int32 -> ByteString -> Maybe a
binarySearch !Int32
a !Int32
b !ByteString
key
      | Int32
a Int32 -> Int32 -> Bool
forall a. Ord a => a -> a -> Bool
> Int32
b     = Maybe a
forall a. Maybe a
Nothing
      | Bool
otherwise = case ByteString -> ByteString -> Ordering
forall a. Ord a => a -> a -> Ordering
compare ByteString
key (ByteString -> UArray Int32 Word32 -> Int32 -> ByteString
index' ByteString
bs UArray Int32 Word32
offsets Int32
mid) of
          LT -> Int32 -> Int32 -> ByteString -> Maybe a
binarySearch Int32
a (Int32
midInt32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
-1) ByteString
key
          EQ -> a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> a -> Maybe a
forall a b. (a -> b) -> a -> b
$! Int -> a
forall a. Enum a => Int -> a
toEnum (Int32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (UArray Int32 Int32
ids UArray Int32 Int32 -> Int32 -> Int32
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! Int32
mid))
          GT -> Int32 -> Int32 -> ByteString -> Maybe a
binarySearch (Int32
midInt32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
+1) Int32
b ByteString
key
      where mid :: Int32
mid = (Int32
a Int32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
+ Int32
b) Int32 -> Int32 -> Int32
forall a. Integral a => a -> a -> a
`div` 2

index' :: BS.ByteString -> A.UArray Int32 Word32 -> Int32 -> BS.ByteString
index' :: ByteString -> UArray Int32 Word32 -> Int32 -> ByteString
index' bs :: ByteString
bs offsets :: UArray Int32 Word32
offsets i :: Int32
i = Int -> ByteString -> ByteString
BS.unsafeTake Int
len (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> ByteString -> ByteString
BS.unsafeDrop Int
start (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ ByteString
bs
  where
    start, end, len :: Int
    start :: Int
start = Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (UArray Int32 Word32
offsets UArray Int32 Word32 -> Int32 -> Word32
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! Int32
i)
    end :: Int
end   = Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (UArray Int32 Word32
offsets UArray Int32 Word32 -> Int32 -> Word32
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! (Int32
iInt32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
+1))
    len :: Int
len   = Int
end Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
start


-- | Given the index of a string in the table, return the string.
--
index :: Enum id => StringTable id -> id -> BS.ByteString
index :: StringTable id -> id -> ByteString
index (StringTable bs :: ByteString
bs offsets :: UArray Int32 Word32
offsets _ids :: UArray Int32 Int32
_ids ixs :: UArray Int32 Int32
ixs) =
    ByteString -> UArray Int32 Word32 -> Int32 -> ByteString
index' ByteString
bs UArray Int32 Word32
offsets (Int32 -> ByteString) -> (id -> Int32) -> id -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (UArray Int32 Int32
ixs UArray Int32 Int32 -> Int32 -> Int32
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
!) (Int32 -> Int32) -> (id -> Int32) -> id -> Int32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Int32) -> (id -> Int) -> id -> Int32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. id -> Int
forall a. Enum a => a -> Int
fromEnum


-- | Given a list of strings, construct a 'StringTable' mapping those strings
-- to a dense set of integers. Also return the ids for all the strings used
-- in the construction.
--
construct :: Enum id => [BS.ByteString] -> StringTable id
construct :: [ByteString] -> StringTable id
construct = StringTableBuilder id -> StringTable id
forall id. Enum id => StringTableBuilder id -> StringTable id
finalise (StringTableBuilder id -> StringTable id)
-> ([ByteString] -> StringTableBuilder id)
-> [ByteString]
-> StringTable id
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (StringTableBuilder id -> ByteString -> StringTableBuilder id)
-> StringTableBuilder id -> [ByteString] -> StringTableBuilder id
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\tbl :: StringTableBuilder id
tbl s :: ByteString
s -> (StringTableBuilder id, id) -> StringTableBuilder id
forall a b. (a, b) -> a
fst (ByteString -> StringTableBuilder id -> (StringTableBuilder id, id)
forall id.
Enum id =>
ByteString -> StringTableBuilder id -> (StringTableBuilder id, id)
insert ByteString
s StringTableBuilder id
tbl)) StringTableBuilder id
forall id. StringTableBuilder id
empty


data StringTableBuilder id = StringTableBuilder
                                              !(Map BS.ByteString id)
                               {-# UNPACK #-} !Word32
  deriving (StringTableBuilder id -> StringTableBuilder id -> Bool
(StringTableBuilder id -> StringTableBuilder id -> Bool)
-> (StringTableBuilder id -> StringTableBuilder id -> Bool)
-> Eq (StringTableBuilder id)
forall id.
Eq id =>
StringTableBuilder id -> StringTableBuilder id -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: StringTableBuilder id -> StringTableBuilder id -> Bool
$c/= :: forall id.
Eq id =>
StringTableBuilder id -> StringTableBuilder id -> Bool
== :: StringTableBuilder id -> StringTableBuilder id -> Bool
$c== :: forall id.
Eq id =>
StringTableBuilder id -> StringTableBuilder id -> Bool
Eq, Int -> StringTableBuilder id -> ShowS
[StringTableBuilder id] -> ShowS
StringTableBuilder id -> String
(Int -> StringTableBuilder id -> ShowS)
-> (StringTableBuilder id -> String)
-> ([StringTableBuilder id] -> ShowS)
-> Show (StringTableBuilder id)
forall id. Show id => Int -> StringTableBuilder id -> ShowS
forall id. Show id => [StringTableBuilder id] -> ShowS
forall id. Show id => StringTableBuilder id -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [StringTableBuilder id] -> ShowS
$cshowList :: forall id. Show id => [StringTableBuilder id] -> ShowS
show :: StringTableBuilder id -> String
$cshow :: forall id. Show id => StringTableBuilder id -> String
showsPrec :: Int -> StringTableBuilder id -> ShowS
$cshowsPrec :: forall id. Show id => Int -> StringTableBuilder id -> ShowS
Show, Typeable)

empty :: StringTableBuilder id
empty :: StringTableBuilder id
empty = Map ByteString id -> Word32 -> StringTableBuilder id
forall id. Map ByteString id -> Word32 -> StringTableBuilder id
StringTableBuilder Map ByteString id
forall k a. Map k a
Map.empty 0

insert :: Enum id => BS.ByteString -> StringTableBuilder id -> (StringTableBuilder id, id)
insert :: ByteString -> StringTableBuilder id -> (StringTableBuilder id, id)
insert str :: ByteString
str builder :: StringTableBuilder id
builder@(StringTableBuilder smap :: Map ByteString id
smap nextid :: Word32
nextid) =
    case ByteString -> Map ByteString id -> Maybe id
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup ByteString
str Map ByteString id
smap of
      Just id :: id
id -> (StringTableBuilder id
builder, id
id)
      Nothing -> let !id :: id
id   = Int -> id
forall a. Enum a => Int -> a
toEnum (Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
nextid)
                     !smap' :: Map ByteString id
smap' = ByteString -> id -> Map ByteString id -> Map ByteString id
forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert ByteString
str id
id Map ByteString id
smap
                   in (Map ByteString id -> Word32 -> StringTableBuilder id
forall id. Map ByteString id -> Word32 -> StringTableBuilder id
StringTableBuilder Map ByteString id
smap' (Word32
nextidWord32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+1), id
id)

inserts :: Enum id => [BS.ByteString] -> StringTableBuilder id -> (StringTableBuilder id, [id])
inserts :: [ByteString]
-> StringTableBuilder id -> (StringTableBuilder id, [id])
inserts bss :: [ByteString]
bss builder :: StringTableBuilder id
builder = (StringTableBuilder id
 -> ByteString -> (StringTableBuilder id, id))
-> StringTableBuilder id
-> [ByteString]
-> (StringTableBuilder id, [id])
forall (t :: * -> *) a b c.
Traversable t =>
(a -> b -> (a, c)) -> a -> t b -> (a, t c)
mapAccumL ((ByteString
 -> StringTableBuilder id -> (StringTableBuilder id, id))
-> StringTableBuilder id
-> ByteString
-> (StringTableBuilder id, id)
forall a b c. (a -> b -> c) -> b -> a -> c
flip ByteString -> StringTableBuilder id -> (StringTableBuilder id, id)
forall id.
Enum id =>
ByteString -> StringTableBuilder id -> (StringTableBuilder id, id)
insert) StringTableBuilder id
builder [ByteString]
bss

finalise :: Enum id => StringTableBuilder id -> StringTable id
finalise :: StringTableBuilder id -> StringTable id
finalise (StringTableBuilder smap :: Map ByteString id
smap _) =
    (ByteString
-> UArray Int32 Word32
-> UArray Int32 Int32
-> UArray Int32 Int32
-> StringTable id
forall id.
ByteString
-> UArray Int32 Word32
-> UArray Int32 Int32
-> UArray Int32 Int32
-> StringTable id
StringTable ByteString
strs UArray Int32 Word32
offsets UArray Int32 Int32
ids UArray Int32 Int32
ixs)
  where
    strs :: ByteString
strs    = [ByteString] -> ByteString
BS.concat (Map ByteString id -> [ByteString]
forall k a. Map k a -> [k]
Map.keys Map ByteString id
smap)
    offsets :: UArray Int32 Word32
offsets = (Int32, Int32) -> [Word32] -> UArray Int32 Word32
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [e] -> a i e
A.listArray (0, Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Map ByteString id -> Int
forall k a. Map k a -> Int
Map.size Map ByteString id
smap))
            ([Word32] -> UArray Int32 Word32)
-> ([ByteString] -> [Word32])
-> [ByteString]
-> UArray Int32 Word32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word32 -> ByteString -> Word32)
-> Word32 -> [ByteString] -> [Word32]
forall b a. (b -> a -> b) -> b -> [a] -> [b]
scanl (\off :: Word32
off str :: ByteString
str -> Word32
off Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int
BS.length ByteString
str)) 0
            ([ByteString] -> UArray Int32 Word32)
-> [ByteString] -> UArray Int32 Word32
forall a b. (a -> b) -> a -> b
$ Map ByteString id -> [ByteString]
forall k a. Map k a -> [k]
Map.keys Map ByteString id
smap
    ids :: UArray Int32 Int32
ids     = (Int32, Int32) -> [Int32] -> UArray Int32 Int32
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [e] -> a i e
A.listArray (0, Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Map ByteString id -> Int
forall k a. Map k a -> Int
Map.size Map ByteString id
smap) Int32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
- 1)
            ([Int32] -> UArray Int32 Int32)
-> ([id] -> [Int32]) -> [id] -> UArray Int32 Int32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (id -> Int32) -> [id] -> [Int32]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Int32) -> (id -> Int) -> id -> Int32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. id -> Int
forall a. Enum a => a -> Int
fromEnum)
            ([id] -> UArray Int32 Int32) -> [id] -> UArray Int32 Int32
forall a b. (a -> b) -> a -> b
$ Map ByteString id -> [id]
forall k a. Map k a -> [a]
Map.elems Map ByteString id
smap
    ixs :: UArray Int32 Int32
ixs     = (Int32, Int32) -> [(Int32, Int32)] -> UArray Int32 Int32
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [(i, e)] -> a i e
A.array (UArray Int32 Int32 -> (Int32, Int32)
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> (i, i)
A.bounds UArray Int32 Int32
ids) [ (Int32
id,Int32
ix) | (ix :: Int32
ix,id :: Int32
id) <- UArray Int32 Int32 -> [(Int32, Int32)]
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> [(i, e)]
A.assocs UArray Int32 Int32
ids ]

unfinalise :: Enum id => StringTable id -> StringTableBuilder id
unfinalise :: StringTable id -> StringTableBuilder id
unfinalise (StringTable strs :: ByteString
strs offsets :: UArray Int32 Word32
offsets ids :: UArray Int32 Int32
ids _) =
    Map ByteString id -> Word32 -> StringTableBuilder id
forall id. Map ByteString id -> Word32 -> StringTableBuilder id
StringTableBuilder Map ByteString id
smap Word32
nextid
  where
    smap :: Map ByteString id
smap   = [(ByteString, id)] -> Map ByteString id
forall k a. Eq k => [(k, a)] -> Map k a
Map.fromAscList
               [ (ByteString -> UArray Int32 Word32 -> Int32 -> ByteString
index' ByteString
strs UArray Int32 Word32
offsets Int32
ix, Int -> id
forall a. Enum a => Int -> a
toEnum (Int32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (UArray Int32 Int32
ids UArray Int32 Int32 -> Int32 -> Int32
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
! Int32
ix)))
               | Int32
ix <- [0..Int32
h] ]
    (0,h :: Int32
h)  = UArray Int32 Int32 -> (Int32, Int32)
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> (i, i)
A.bounds UArray Int32 Int32
ids
    nextid :: Word32
nextid = Int32 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int32
hInt32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
+1)


-------------------------
-- (de)serialisation
--

serialise :: StringTable id -> BS.Builder
serialise :: StringTable id -> Builder
serialise (StringTable strs :: ByteString
strs offs :: UArray Int32 Word32
offs ids :: UArray Int32 Int32
ids ixs :: UArray Int32 Int32
ixs) =
      let (_, !Int32
ixEnd) = UArray Int32 Word32 -> (Int32, Int32)
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> (i, i)
A.bounds UArray Int32 Word32
offs in

      Word32 -> Builder
BS.word32BE (Int -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int
BS.length ByteString
strs))
   Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Word32 -> Builder
BS.word32BE (Int32 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
ixEnd Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ 1)
   Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> ByteString -> Builder
BS.byteStringCopy ByteString
strs
   Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> (Word32 -> Builder -> Builder) -> Builder -> [Word32] -> Builder
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\n :: Word32
n r :: Builder
r -> Word32 -> Builder
BS.word32BE Word32
n Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
r) Builder
forall a. Monoid a => a
mempty (UArray Int32 Word32 -> [Word32]
forall (a :: * -> * -> *) e i. (IArray a e, Ix i) => a i e -> [e]
A.elems UArray Int32 Word32
offs)
   Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> (Int32 -> Builder -> Builder) -> Builder -> [Int32] -> Builder
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\n :: Int32
n r :: Builder
r -> Int32 -> Builder
BS.int32BE  Int32
n Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
r) Builder
forall a. Monoid a => a
mempty (UArray Int32 Int32 -> [Int32]
forall (a :: * -> * -> *) e i. (IArray a e, Ix i) => a i e -> [e]
A.elems UArray Int32 Int32
ids)
   Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> (Int32 -> Builder -> Builder) -> Builder -> [Int32] -> Builder
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\n :: Int32
n r :: Builder
r -> Int32 -> Builder
BS.int32BE  Int32
n Builder -> Builder -> Builder
forall a. Semigroup a => a -> a -> a
<> Builder
r) Builder
forall a. Monoid a => a
mempty (UArray Int32 Int32 -> [Int32]
forall (a :: * -> * -> *) e i. (IArray a e, Ix i) => a i e -> [e]
A.elems UArray Int32 Int32
ixs)

serialiseSize :: StringTable id -> Int
serialiseSize :: StringTable id -> Int
serialiseSize (StringTable strs :: ByteString
strs offs :: UArray Int32 Word32
offs _ids :: UArray Int32 Int32
_ids _ixs :: UArray Int32 Int32
_ixs) =
    let (_, !Int32
ixEnd) = UArray Int32 Word32 -> (Int32, Int32)
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> (i, i)
A.bounds UArray Int32 Word32
offs
     in 4 Int -> Int -> Int
forall a. Num a => a -> a -> a
* 2
      Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ByteString -> Int
BS.length ByteString
strs
      Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 4 Int -> Int -> Int
forall a. Num a => a -> a -> a
* (Int32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
ixEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)
      Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 8 Int -> Int -> Int
forall a. Num a => a -> a -> a
*  Int32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int32
ixEnd

deserialiseV1 :: BS.ByteString -> Maybe (StringTable id, BS.ByteString)
deserialiseV1 :: ByteString -> Maybe (StringTable id, ByteString)
deserialiseV1 bs :: ByteString
bs
  | ByteString -> Int
BS.length ByteString
bs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 8
  , let lenStrs :: Int
lenStrs = Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word32
readWord32BE ByteString
bs 0)
        lenArr :: Int
lenArr  = Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word32
readWord32BE ByteString
bs 4)
        lenTotal :: Int
lenTotal= 8 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
lenStrs Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 4 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
lenArr
  , ByteString -> Int
BS.length ByteString
bs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
lenTotal
  , let strs :: ByteString
strs = Int -> ByteString -> ByteString
BS.take Int
lenStrs (Int -> ByteString -> ByteString
BS.drop 8 ByteString
bs)
        arr :: UArray Int32 Word32
arr  = (Int32, Int32) -> [(Int32, Word32)] -> UArray Int32 Word32
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [(i, e)] -> a i e
A.array (0, Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
lenArr Int32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
- 1)
                       [ (Int32
i, ByteString -> Int -> Word32
readWord32BE ByteString
bs Int
off)
                       | (i :: Int32
i, off :: Int
off) <- [Int32] -> [Int] -> [(Int32, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [0 .. Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
lenArr Int32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
- 1]
                                         [Int
offArrS,Int
offArrSInt -> Int -> Int
forall a. Num a => a -> a -> a
+4 .. Int
offArrE]
                       ]
        ids :: UArray Int32 Int32
ids  = (Int32, Int32) -> [(Int32, Int32)] -> UArray Int32 Int32
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [(i, e)] -> a i e
A.array (0, Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
lenArr Int32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
- 1)
                       [ (Int32
i,Int32
i) | Int32
i <- [0 .. Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
lenArr Int32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
- 1] ]
        ixs :: UArray Int32 Int32
ixs  = UArray Int32 Int32
ids -- two identity mappings
        offArrS :: Int
offArrS = 8 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
lenStrs
        offArrE :: Int
offArrE = Int
offArrS Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 4 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
lenArr Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1
        !stringTable :: StringTable id
stringTable = ByteString
-> UArray Int32 Word32
-> UArray Int32 Int32
-> UArray Int32 Int32
-> StringTable id
forall id.
ByteString
-> UArray Int32 Word32
-> UArray Int32 Int32
-> UArray Int32 Int32
-> StringTable id
StringTable ByteString
strs UArray Int32 Word32
arr UArray Int32 Int32
ids UArray Int32 Int32
ixs
        !bs' :: ByteString
bs'         = Int -> ByteString -> ByteString
BS.drop Int
lenTotal ByteString
bs
  = (StringTable id, ByteString) -> Maybe (StringTable id, ByteString)
forall a. a -> Maybe a
Just (StringTable id
forall id. StringTable id
stringTable, ByteString
bs')

  | Bool
otherwise
  = Maybe (StringTable id, ByteString)
forall a. Maybe a
Nothing

deserialiseV2 :: BS.ByteString -> Maybe (StringTable id, BS.ByteString)
deserialiseV2 :: ByteString -> Maybe (StringTable id, ByteString)
deserialiseV2 bs :: ByteString
bs
  | ByteString -> Int
BS.length ByteString
bs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 8
  , let lenStrs :: Int
lenStrs = Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word32
readWord32BE ByteString
bs 0)
        lenArr :: Int
lenArr  = Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word32
readWord32BE ByteString
bs 4)
        lenTotal :: Int
lenTotal= 8                   -- the two length prefixes
                Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
lenStrs
                Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 4 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
lenArr
                Int -> Int -> Int
forall a. Num a => a -> a -> a
+(4 Int -> Int -> Int
forall a. Num a => a -> a -> a
* (Int
lenArr Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1)) Int -> Int -> Int
forall a. Num a => a -> a -> a
* 2 -- offsets array is 1 longer
  , ByteString -> Int
BS.length ByteString
bs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
lenTotal
  , let strs :: ByteString
strs = Int -> ByteString -> ByteString
BS.take Int
lenStrs (Int -> ByteString -> ByteString
BS.drop 8 ByteString
bs)
        offs :: UArray Int32 Word32
offs = (Int32, Int32) -> [Word32] -> UArray Int32 Word32
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [e] -> a i e
A.listArray (0, Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
lenArr Int32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
- 1)
                           [ ByteString -> Int -> Word32
readWord32BE ByteString
bs Int
off
                           | Int
off <- Int -> [Int]
offsets Int
offsOff ]
        -- the second two arrays are 1 shorter
        ids :: UArray Int32 Int32
ids  = (Int32, Int32) -> [Int32] -> UArray Int32 Int32
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [e] -> a i e
A.listArray (0, Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
lenArr Int32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
- 2)
                           [ ByteString -> Int -> Int32
readInt32BE ByteString
bs Int
off
                           | Int
off <- Int -> [Int]
offsets Int
idsOff ]
        ixs :: UArray Int32 Int32
ixs  = (Int32, Int32) -> [Int32] -> UArray Int32 Int32
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [e] -> a i e
A.listArray (0, Int -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
lenArr Int32 -> Int32 -> Int32
forall a. Num a => a -> a -> a
- 2)
                           [ ByteString -> Int -> Int32
readInt32BE ByteString
bs Int
off
                           | Int
off <- Int -> [Int]
offsets Int
ixsOff ]
        offsOff :: Int
offsOff = 8 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
lenStrs
        idsOff :: Int
idsOff  = Int
offsOff Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 4 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
lenArr
        ixsOff :: Int
ixsOff  = Int
idsOff  Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 4 Int -> Int -> Int
forall a. Num a => a -> a -> a
* (Int
lenArrInt -> Int -> Int
forall a. Num a => a -> a -> a
-1)
        offsets :: Int -> [Int]
offsets from :: Int
from = [Int
from,Int
fromInt -> Int -> Int
forall a. Num a => a -> a -> a
+4 .. Int
from Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 4 Int -> Int -> Int
forall a. Num a => a -> a -> a
* (Int
lenArr Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1)]
        !stringTable :: StringTable id
stringTable = ByteString
-> UArray Int32 Word32
-> UArray Int32 Int32
-> UArray Int32 Int32
-> StringTable id
forall id.
ByteString
-> UArray Int32 Word32
-> UArray Int32 Int32
-> UArray Int32 Int32
-> StringTable id
StringTable ByteString
strs UArray Int32 Word32
offs UArray Int32 Int32
ids UArray Int32 Int32
ixs
        !bs' :: ByteString
bs'         = Int -> ByteString -> ByteString
BS.drop Int
lenTotal ByteString
bs
  = (StringTable id, ByteString) -> Maybe (StringTable id, ByteString)
forall a. a -> Maybe a
Just (StringTable id
forall id. StringTable id
stringTable, ByteString
bs')

  | Bool
otherwise
  = Maybe (StringTable id, ByteString)
forall a. Maybe a
Nothing

readInt32BE :: BS.ByteString -> Int -> Int32
readInt32BE :: ByteString -> Int -> Int32
readInt32BE bs :: ByteString
bs i :: Int
i = Word32 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word32
readWord32BE ByteString
bs Int
i)

readWord32BE :: BS.ByteString -> Int -> Word32
readWord32BE :: ByteString -> Int -> Word32
readWord32BE bs :: ByteString
bs i :: Int
i =
    Bool -> Word32 -> Word32
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 0 Bool -> Bool -> Bool
&& Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+3 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= ByteString -> Int
BS.length ByteString
bs Int -> Int -> Int
forall a. Num a => a -> a -> a
- 1) (Word32 -> Word32) -> Word32 -> Word32
forall a b. (a -> b) -> a -> b
$
    Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word8
BS.unsafeIndex ByteString
bs (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 0)) Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL` 24
  Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word8
BS.unsafeIndex ByteString
bs (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 1)) Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL` 16
  Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word8
BS.unsafeIndex ByteString
bs (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 2)) Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`shiftL` 8
  Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int -> Word8
BS.unsafeIndex ByteString
bs (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 3))

#ifdef TESTS

prop_valid :: [BS.ByteString] -> Bool
prop_valid strs =
     all lookupIndex (enumStrings tbl)
  && all indexLookup (enumIds tbl)

  where
    tbl :: StringTable Int
    tbl = construct strs

    lookupIndex str = index tbl ident == str
      where Just ident = lookup tbl str

    indexLookup ident = lookup tbl str == Just ident
      where str       = index tbl ident

-- this is important so we can use Map.fromAscList
prop_sorted :: [BS.ByteString] -> Bool
prop_sorted strings =
    isSorted [ index' strs offsets ix
             | ix <- A.range (A.bounds ids) ]
  where
    _tbl :: StringTable Int
    _tbl@(StringTable strs offsets ids _ixs) = construct strings
    isSorted xs = and (zipWith (<) xs (tail xs))

prop_finalise_unfinalise :: [BS.ByteString] -> Bool
prop_finalise_unfinalise strs =
    builder == unfinalise (finalise builder)
  where
    builder :: StringTableBuilder Int
    builder = foldl' (\tbl s -> fst (insert s tbl)) empty strs

prop_serialise_deserialise :: [BS.ByteString] -> Bool
prop_serialise_deserialise strs =
    Just (strtable, BS.empty) == (deserialiseV2
                                . toStrict . BS.toLazyByteString
                                . serialise) strtable
  where
    strtable :: StringTable Int
    strtable = construct strs

prop_serialiseSize :: [BS.ByteString] -> Bool
prop_serialiseSize strs =
    (fromIntegral . LBS.length . BS.toLazyByteString . serialise) strtable
 == serialiseSize strtable
  where
    strtable :: StringTable Int
    strtable = construct strs

enumStrings :: Enum id => StringTable id -> [BS.ByteString]
enumStrings (StringTable bs offsets _ _) = map (index' bs offsets) [0..h-1]
  where (0,h) = A.bounds offsets

enumIds :: Enum id => StringTable id -> [id]
enumIds (StringTable _ offsets _ _) = [toEnum 0 .. toEnum (fromIntegral (h-1))]
  where (0,h) = A.bounds offsets

toStrict :: LBS.ByteString -> BS.ByteString
#if MIN_VERSION_bytestring(0,10,0)
toStrict = LBS.toStrict
#else
toStrict = BS.concat . LBS.toChunks
#endif

#endif

#if !(MIN_VERSION_base(4,5,0))
(<>) :: Monoid m => m -> m -> m
(<>) = mappend
#endif