From 98553c71370768131ad6fb76f1396984f97bbbdf Mon Sep 17 00:00:00 2001
From: Vincent Hanquez <vincent@snarc.org>
Date: Wed, 1 Jun 2016 07:59:59 +0100
Subject: [PATCH] fully handle various hash size in binary formats
 (pack/packindex) and named ref

---
 Data/Git/Named.hs              |  13 ++-
 Data/Git/Parser.hs             |   4 +-
 Data/Git/Ref.hs                |   9 ++
 Data/Git/Storage/FileReader.hs |   7 +-
 Data/Git/Storage/Pack.hs       |  36 +++----
 Data/Git/Storage/PackIndex.hs  | 187 +++++++++++++++++----------------
 tests/Tests.hs                 |   7 +-
 7 files changed, 143 insertions(+), 120 deletions(-)

diff --git a/Data/Git/Named.hs b/Data/Git/Named.hs
index 7fbbfd6..93cce0e 100644
--- a/Data/Git/Named.hs
+++ b/Data/Git/Named.hs
@@ -122,13 +122,13 @@ readPackedRefs gitRepo constr = do
         accu a l
             | "#" `BC.isPrefixOf` l = a
             | otherwise =
-                let (ref, r) = B.splitAt 40 l
+                let (ref, r) = consumeHexRef hashAlg l
                     name     = UTF8.toString $ B.tail r
                  in case toRefTy name of
                         -- accumulate tag, branch and remotes
-                        RefTag refname    -> a { packedTags    = (refname, fromHex ref) : packedTags a }
-                        RefBranch refname -> a { packedBranchs = (refname, fromHex ref) : packedBranchs a }
-                        RefRemote refname -> a { packedRemotes = (refname, fromHex ref) : packedRemotes a }
+                        RefTag refname    -> a { packedTags    = (refname, ref) : packedTags a }
+                        RefBranch refname -> a { packedBranchs = (refname, ref) : packedBranchs a }
+                        RefRemote refname -> a { packedRemotes = (refname, ref) : packedRemotes a }
                         -- anything else that shouldn't be there get dropped on the floor
                         _                 -> a
 
@@ -176,5 +176,8 @@ readRefFile gitRepo specty = toRefContent <$> readBinaryFile filepath
     where filepath = toPath gitRepo specty
           toRefContent content
             | "ref: " `B.isPrefixOf` content = RefLink $ toRefTy $ UTF8.toString $ head $ BC.lines $ B.drop 5 content
-            | B.length content < 42          = RefDirect $ fromHex $ B.take 40 content
+            | B.length content < 42          = RefDirect $ fst $ consumeHexRef hashAlg content
             | otherwise                      = RefContentUnknown content
+
+consumeHexRef :: HashAlgorithm hash => hash -> B.ByteString -> (Ref hash, B.ByteString)
+consumeHexRef alg b = let (b1,b2) = B.splitAt (hashDigestSize alg * 2) b in (fromHex b1, b2)
diff --git a/Data/Git/Parser.hs b/Data/Git/Parser.hs
index 3f4f617..61af23f 100644
--- a/Data/Git/Parser.hs
+++ b/Data/Git/Parser.hs
@@ -58,8 +58,8 @@ word32 = be32 <$> P.take 4
 
 ref, referenceBin, referenceHex :: HashAlgorithm hash => Parser (Ref hash)
 ref = referenceBin
-referenceBin = takeDigestSize (error "referenceBin") 1 fromBinary
-referenceHex = takeDigestSize (error "referenceHex") 2 fromHex
+referenceBin = takeDigestSize hashAlg 1 fromBinary
+referenceHex = takeDigestSize hashAlg 2 fromHex
 
 takeDigestSize :: HashAlgorithm hash => hash -> Int -> (B.ByteString -> Ref hash) -> Parser (Ref hash)
 takeDigestSize alg modifier constr = constr <$> P.take (modifier * hashDigestSize alg)
diff --git a/Data/Git/Ref.hs b/Data/Git/Ref.hs
index f0f884c..7b6301c 100644
--- a/Data/Git/Ref.hs
+++ b/Data/Git/Ref.hs
@@ -31,6 +31,8 @@ module Data.Git.Ref
     -- * Hash ByteString types to a ref
     , hash
     , hashLBS
+    , hashAlg
+    , hashAlgFromRef
     ) where
 
 import qualified Crypto.Hash
@@ -126,3 +128,10 @@ hash = Ref . Crypto.Hash.hash
 -- | hash a lazy bytestring into a reference
 hashLBS :: Crypto.Hash.HashAlgorithm hash => L.ByteString -> Ref hash
 hashLBS = Ref . Crypto.Hash.hashlazy
+
+-- | Any hash algorithm
+hashAlg :: Crypto.Hash.HashAlgorithm hash => hash
+hashAlg = error "polymorphic hash algorithm. only to use with hashDigestSize"
+
+hashAlgFromRef :: Crypto.Hash.HashAlgorithm hash => Ref hash -> hash
+hashAlgFromRef _ = hashAlg
diff --git a/Data/Git/Storage/FileReader.hs b/Data/Git/Storage/FileReader.hs
index f08aa2a..69bf347 100644
--- a/Data/Git/Storage/FileReader.hs
+++ b/Data/Git/Storage/FileReader.hs
@@ -16,10 +16,10 @@ module Data.Git.Storage.FileReader
         , fileReaderGet
         , fileReaderGetLBS
         , fileReaderGetBS
+        , fileReaderGetRef
         , fileReaderGetVLF
         , fileReaderSeek
         , fileReaderParse
-
         , fileReaderInflateToSize
         ) where
 
@@ -35,6 +35,7 @@ import Data.IORef
 
 import Data.Git.Imports
 import Data.Git.OS
+import Data.Git.Ref
 import qualified Data.Git.Parser as P
 
 import Data.Data
@@ -42,6 +43,7 @@ import Data.Word
 
 import Codec.Zlib
 import Codec.Zlib.Lowlevel
+import Crypto.Hash
 import Foreign.ForeignPtr
 import qualified Control.Exception as E
 
@@ -125,6 +127,9 @@ fileReaderGetLBS size fb = L.fromChunks <$> fileReaderGet size fb
 fileReaderGetBS :: Int -> FileReader -> IO ByteString
 fileReaderGetBS size fb = B.concat <$> fileReaderGet size fb
 
+fileReaderGetRef :: HashAlgorithm hash => hash -> FileReader -> IO (Ref hash)
+fileReaderGetRef alg fr = fromBinary <$> fileReaderGetBS (hashDigestSize alg) fr
+
 -- | seek in a handle, and reset the remaining buffer to empty.
 fileReaderSeek :: FileReader -> Word64 -> IO ()
 fileReaderSeek (FileReader { fbHandle = handle, fbRemaining = ref, fbPos = pos }) absPos = do
diff --git a/Data/Git/Storage/Pack.hs b/Data/Git/Storage/Pack.hs
index 1dbd47d..56f9431 100644
--- a/Data/Git/Storage/Pack.hs
+++ b/Data/Git/Storage/Pack.hs
@@ -147,27 +147,27 @@ getNextObjectRaw fr = do
         sobj       <- fileReaderGetPos fr
         (ty, size) <- fileReaderParse fr parseObjectHeader
         extra      <- case ty of
-                TypeDeltaRef -> Just . PtrRef . fromBinary <$> fileReaderGetBS 20 fr
+                TypeDeltaRef -> Just . PtrRef <$> fileReaderGetRef hashAlg fr
                 TypeDeltaOff -> Just . PtrOfs . deltaOffFromList <$> fileReaderGetVLF fr
                 _            -> return Nothing
         objData    <- fileReaderInflateToSize fr size
         eobj       <- fileReaderGetPos fr
 
         return (PackedObjectInfo ty sobj (eobj - sobj) size extra, objData)
-        where
-                parseObjectHeader = do
-                        (m, ty, sz) <- splitFirst <$> P.anyByte
-                        size <- if m then (sz +) <$> getNextSize 4 else return sz
-                        return (ty, size)
-                        where
-                                getNextSize n = do
-                                        (c, sz) <- splitOther n <$> P.anyByte
-                                        if c then (sz +) <$> getNextSize (n+7) else return sz
-
-                                splitFirst :: Word8 -> (Bool, ObjectType, Word64)
-                                splitFirst w = (w `testBit` 7, toEnum $ fromIntegral ((w `shiftR` 4) .&. 0x7), fromIntegral (w .&. 0xf))
-                                splitOther n w = (w `testBit` 7, fromIntegral (w .&. 0x7f) `shiftL` n)
-
-                deltaOffFromList (x:xs) = foldl' acc (fromIntegral (x `clearBit` 7)) xs
-                        where acc a w = ((a+1) `shiftL` 7) + fromIntegral (w `clearBit` 7)
-                deltaOffFromList [] = error "cannot happen"
+  where
+    parseObjectHeader = do
+        (m, ty, sz) <- splitFirst <$> P.anyByte
+        size <- if m then (sz +) <$> getNextSize 4 else return sz
+        return (ty, size)
+      where
+        getNextSize n = do
+            (c, sz) <- splitOther n <$> P.anyByte
+            if c then (sz +) <$> getNextSize (n+7) else return sz
+
+        splitFirst :: Word8 -> (Bool, ObjectType, Word64)
+        splitFirst w = (w `testBit` 7, toEnum $ fromIntegral ((w `shiftR` 4) .&. 0x7), fromIntegral (w .&. 0xf))
+        splitOther n w = (w `testBit` 7, fromIntegral (w .&. 0x7f) `shiftL` n)
+
+    deltaOffFromList (x:xs) = foldl' acc (fromIntegral (x `clearBit` 7)) xs
+      where acc a w = ((a+1) `shiftL` 7) + fromIntegral (w `clearBit` 7)
+    deltaOffFromList [] = error "cannot happen"
diff --git a/Data/Git/Storage/PackIndex.hs b/Data/Git/Storage/PackIndex.hs
index 4ada28e..2b7f1c1 100644
--- a/Data/Git/Storage/PackIndex.hs
+++ b/Data/Git/Storage/PackIndex.hs
@@ -7,23 +7,23 @@
 --
 {-# LANGUAGE OverloadedStrings, BangPatterns #-}
 module Data.Git.Storage.PackIndex
-        ( PackIndexHeader(..)
-        , PackIndex(..)
-
-        -- * handles and enumeration
-        , packIndexOpen
-        , packIndexClose
-        , withPackIndex
-        , packIndexEnumerate
-
-        -- * read from packIndex
-        , packIndexHeaderGetNbWithPrefix
-        , packIndexGetReferenceLocation
-        , packIndexGetReferencesWithPrefix
-        , packIndexReadHeader
-        , packIndexRead
-        , packIndexGetHeader
-        ) where
+    ( PackIndexHeader(..)
+    , PackIndex(..)
+
+    -- * handles and enumeration
+    , packIndexOpen
+    , packIndexClose
+    , withPackIndex
+    , packIndexEnumerate
+
+    -- * read from packIndex
+    , packIndexHeaderGetNbWithPrefix
+    , packIndexGetReferenceLocation
+    , packIndexGetReferencesWithPrefix
+    , packIndexReadHeader
+    , packIndexRead
+    , packIndexGetHeader
+    ) where
 
 import Data.List
 import Data.Bits
@@ -43,24 +43,24 @@ import qualified Data.Git.Parser as P
 
 -- | represent an packIndex header with the version and the fanout table
 data PackIndexHeader = PackIndexHeader !Word32 !(Vector Word32)
-        deriving (Show,Eq)
+    deriving (Show,Eq)
 
 data PackIndex hash = PackIndex
-        { packIndexSha1s        :: Vector (Ref hash)
-        , packIndexCRCs         :: Vector Word32
-        , packIndexPackoffs     :: Vector Word32
-        , packIndexPackChecksum :: Ref hash
-        , packIndexChecksum     :: Ref hash
-        }
+    { packIndexSha1s        :: Vector (Ref hash)
+    , packIndexCRCs         :: Vector Word32
+    , packIndexPackoffs     :: Vector Word32
+    , packIndexPackChecksum :: Ref hash
+    , packIndexChecksum     :: Ref hash
+    }
 
 -- | enumerate every indexes file in the pack directory
 packIndexEnumerate :: HashAlgorithm hash => LocalPath -> IO [Ref hash]
 packIndexEnumerate repoPath = map onlyHash . filter isPackFile <$> listDirectoryFilename (repoPath </> "objects" </> "pack")
   where
-        isPackFile :: String -> Bool
-        isPackFile x = ".idx" `isSuffixOf` x && "pack-" `isPrefixOf` x
-        onlyHash = fromHexString . takebut 4 . drop 5
-        takebut n l = take (length l - n) l
+    isPackFile :: String -> Bool
+    isPackFile x = ".idx" `isSuffixOf` x && "pack-" `isPrefixOf` x
+    onlyHash = fromHexString . takebut 4 . drop 5
+    takebut n l = take (length l - n) l
 
 -- | open an index
 packIndexOpen :: LocalPath -> Ref hash -> IO FileReader
@@ -93,74 +93,76 @@ packIndexHeaderGetNbWithPrefix (PackIndexHeader _ indexes) n
 packIndexHeaderFoldRef :: HashAlgorithm hash
                        => PackIndexHeader
                        -> FileReader
+                       -> hash
                        -> Int
                        -> (a -> Word32 -> Ref hash -> (a, Bool))
                        -> a
                        -> IO a
-packIndexHeaderFoldRef idxHdr@(PackIndexHeader _ indexes) fr refprefix f initAcc
-        | nb == 0   = return initAcc
-        | otherwise = do
-                let spos = (indexes ! refprefix) - nb
-                fileReaderSeek fr (fromIntegral (sha1Offset + spos * 20))
-                loop nb initAcc
-        where
-                loop 0 acc = return acc
-                loop n acc = do
-                        b <- fromBinary <$> fileReaderGetBS 20 fr
-                        let (!nacc, terminate) = f acc (nb-n) b
-                        if terminate
-                                then return nacc
-                                else loop (n-1) nacc
-                nb         = packIndexHeaderGetNbWithPrefix idxHdr refprefix
-                (sha1Offset,_,_) = packIndexOffsets idxHdr
+packIndexHeaderFoldRef idxHdr@(PackIndexHeader _ indexes) fr alg refprefix f initAcc
+    | nb == 0   = return initAcc
+    | otherwise = do
+        let spos     = (indexes ! refprefix) - nb
+            hashSize = hashDigestSize alg
+        fileReaderSeek fr (fromIntegral (sha1Offset + spos * fromIntegral hashSize))
+        loop nb initAcc
+  where
+    loop 0 acc = return acc
+    loop n acc = do
+        b <- fileReaderGetRef alg fr
+        let (!nacc, terminate) = f acc (nb-n) b
+        if terminate
+            then return nacc
+            else loop (n-1) nacc
+    nb         = packIndexHeaderGetNbWithPrefix idxHdr refprefix
+    (sha1Offset,_,_) = packIndexOffsets alg idxHdr
 
 -- | return the reference offset in the packfile if found
 packIndexGetReferenceLocation :: HashAlgorithm hash => PackIndexHeader -> FileReader -> Ref hash -> IO (Maybe Word64)
 packIndexGetReferenceLocation idxHdr@(PackIndexHeader _ indexes) fr ref = do
-        mrpos <- packIndexHeaderFoldRef idxHdr fr refprefix f Nothing
-        case mrpos of
-                Nothing   -> return Nothing
-                Just rpos -> do
-                        let spos = (indexes ! refprefix) - nb
-                        fileReaderSeek fr (fromIntegral (packOffset + 4 * (spos+rpos)))
-                        Just . fromIntegral . be32 <$> fileReaderGetBS 4 fr
-        where
-                f acc rpos rref = if ref == rref then (Just rpos,True) else (acc,False)
-                refprefix  = refPrefix ref
-                nb         = packIndexHeaderGetNbWithPrefix idxHdr refprefix
-                (_,_,packOffset) = packIndexOffsets idxHdr
+    mrpos <- packIndexHeaderFoldRef idxHdr fr (hashAlgFromRef ref) refprefix f Nothing
+    case mrpos of
+        Nothing   -> return Nothing
+        Just rpos -> do
+            let spos = (indexes ! refprefix) - nb
+            fileReaderSeek fr (fromIntegral (packOffset + 4 * (spos+rpos)))
+            Just . fromIntegral . be32 <$> fileReaderGetBS 4 fr
+  where
+    f acc rpos rref = if ref == rref then (Just rpos,True) else (acc,False)
+    refprefix  = refPrefix ref
+    nb         = packIndexHeaderGetNbWithPrefix idxHdr refprefix
+    (_,_,packOffset) = packIndexOffsets (hashAlgFromRef ref) idxHdr
 
 -- | get all references that start by prefix.
 packIndexGetReferencesWithPrefix :: HashAlgorithm hash => PackIndexHeader -> FileReader -> String -> IO [Ref hash]
 packIndexGetReferencesWithPrefix idxHdr fr prefix =
-        packIndexHeaderFoldRef idxHdr fr refprefix f []
-        where
-                f acc _ ref = case cmpPrefix prefix ref of
-                        GT -> (acc    ,False)
-                        EQ -> (ref:acc,False)
-                        LT -> (acc    ,True)
-                refprefix   = read ("0x" ++ take 2 prefix)
+    packIndexHeaderFoldRef idxHdr fr hashAlg refprefix f []
+  where
+    f acc _ ref = case cmpPrefix prefix ref of
+        GT -> (acc    ,False)
+        EQ -> (ref:acc,False)
+        LT -> (acc    ,True)
+    refprefix   = read ("0x" ++ take 2 prefix)
 
 -- | returns absolute offset in the index file of the sha1s, the crcs and the packfiles offset.
-packIndexOffsets :: PackIndexHeader -> (Word32, Word32, Word32)
-packIndexOffsets idx = (packIndexSha1sOffset, packIndexCRCsOffset, packIndexPackOffOffset)
-        where
-                packIndexPackOffOffset = packIndexCRCsOffset + crcsTableSz
-                packIndexCRCsOffset    = packIndexSha1sOffset + sha1TableSz
-                packIndexSha1sOffset   = fromIntegral packIndexHeaderByteSize
-                crcsTableSz        = 4 * sz
-                sha1TableSz        = 20 * sz
-                sz                 = packIndexHeaderGetSize idx
+packIndexOffsets :: HashAlgorithm hash => hash -> PackIndexHeader -> (Word32, Word32, Word32)
+packIndexOffsets alg idx = (packIndexSha1sOffset, packIndexCRCsOffset, packIndexPackOffOffset)
+  where
+    packIndexPackOffOffset = packIndexCRCsOffset + crcsTableSz
+    packIndexCRCsOffset    = packIndexSha1sOffset + sha1TableSz
+    packIndexSha1sOffset   = fromIntegral packIndexHeaderByteSize
+    crcsTableSz        = 4 * sz
+    sha1TableSz        = (fromIntegral $ hashDigestSize alg) * sz
+    sz                 = packIndexHeaderGetSize idx
 
 -- | parse index header
 parsePackIndexHeader :: P.Parser PackIndexHeader
 parsePackIndexHeader = do
-        magic   <- P.word32
-        when (magic /= 0xff744f63) $ error "wrong magic number for packIndex"
-        ver     <- P.word32
-        when (ver /= 2) $ error "unsupported packIndex version"
-        fanouts <- V.replicateM 256 P.word32
-        return $ PackIndexHeader ver fanouts
+    magic   <- P.word32
+    when (magic /= 0xff744f63) $ error "wrong magic number for packIndex"
+    ver     <- P.word32
+    when (ver /= 2) $ error "unsupported packIndex version"
+    fanouts <- V.replicateM 256 P.word32
+    return $ PackIndexHeader ver fanouts
 
 -- | read index header from an index filereader
 packIndexReadHeader :: FileReader -> IO PackIndexHeader
@@ -176,17 +178,18 @@ packIndexRead :: HashAlgorithm hash
               -> Ref hash
               -> IO (PackIndexHeader, (Vector (Ref hash), Vector Word32, Vector Word32, [ByteString], Ref hash, Ref hash))
 packIndexRead repoPath indexRef = do
-        withPackIndex repoPath indexRef $ \fr -> do
-                idx <- fileReaderParse fr parsePackIndexHeader
-                liftM2 (,) (return idx) (fileReaderParse fr (parsePackIndex $ packIndexHeaderGetSize idx))
-        where parsePackIndex sz = do
-                sha1s     <- V.replicateM (fromIntegral sz) P.referenceBin
-                crcs      <- V.replicateM (fromIntegral sz) P.word32
-                packoffs  <- V.replicateM (fromIntegral sz) P.word32
-                let nbLarge = length $ filter (== True) $ map (\packoff -> packoff `testBit` 31) $ V.toList packoffs
-                largeoffs <- replicateM nbLarge (P.take 4)
-                packfileChecksum <- P.referenceBin
-                idxfileChecksum  <- P.referenceBin
-                -- large packfile offsets
-                -- trailer
-                return (sha1s, crcs, packoffs, largeoffs, packfileChecksum, idxfileChecksum)
+    withPackIndex repoPath indexRef $ \fr -> do
+        idx <- fileReaderParse fr parsePackIndexHeader
+        liftM2 (,) (return idx) (fileReaderParse fr (parsePackIndex $ packIndexHeaderGetSize idx))
+  where
+    parsePackIndex sz = do
+        sha1s     <- V.replicateM (fromIntegral sz) P.referenceBin
+        crcs      <- V.replicateM (fromIntegral sz) P.word32
+        packoffs  <- V.replicateM (fromIntegral sz) P.word32
+        let nbLarge = length $ filter (== True) $ map (\packoff -> packoff `testBit` 31) $ V.toList packoffs
+        largeoffs <- replicateM nbLarge (P.take 4)
+        packfileChecksum <- P.referenceBin
+        idxfileChecksum  <- P.referenceBin
+        -- large packfile offsets
+        -- trailer
+        return (sha1s, crcs, packoffs, largeoffs, packfileChecksum, idxfileChecksum)
diff --git a/tests/Tests.hs b/tests/Tests.hs
index 27d0c68..060ee4d 100644
--- a/tests/Tests.hs
+++ b/tests/Tests.hs
@@ -37,8 +37,11 @@ arbitraryEntname size = entName . B.pack . map fromIntegral <$> replicateM size
                       , choose (0x30, 0x7f)
                       ]
 
-instance Arbitrary (Ref SHA1) where
-    arbitrary = fromBinary <$> arbitraryBS 20
+arbitraryRef :: HashAlgorithm hash => hash -> Gen (Ref hash)
+arbitraryRef alg = fromBinary <$> arbitraryBS (hashDigestSize alg)
+
+instance HashAlgorithm hash => Arbitrary (Ref hash) where
+    arbitrary = arbitraryRef (error "alg")
 
 arbitraryMsg = arbitraryBSno0 1
 arbitraryLazy = L.fromChunks . (:[]) <$> arbitraryBS 40
-- 
GitLab