diff --git a/Data/Git/Named.hs b/Data/Git/Named.hs index 7fbbfd63e3d76e3c98a946330f88e55a47377142..93cce0e7c24edd95bdf58677f4343630cc1706af 100644 --- a/Data/Git/Named.hs +++ b/Data/Git/Named.hs @@ -122,13 +122,13 @@ readPackedRefs gitRepo constr = do accu a l | "#" `BC.isPrefixOf` l = a | otherwise = - let (ref, r) = B.splitAt 40 l + let (ref, r) = consumeHexRef hashAlg l name = UTF8.toString $ B.tail r in case toRefTy name of -- accumulate tag, branch and remotes - RefTag refname -> a { packedTags = (refname, fromHex ref) : packedTags a } - RefBranch refname -> a { packedBranchs = (refname, fromHex ref) : packedBranchs a } - RefRemote refname -> a { packedRemotes = (refname, fromHex ref) : packedRemotes a } + RefTag refname -> a { packedTags = (refname, ref) : packedTags a } + RefBranch refname -> a { packedBranchs = (refname, ref) : packedBranchs a } + RefRemote refname -> a { packedRemotes = (refname, ref) : packedRemotes a } -- anything else that shouldn't be there get dropped on the floor _ -> a @@ -176,5 +176,8 @@ readRefFile gitRepo specty = toRefContent <$> readBinaryFile filepath where filepath = toPath gitRepo specty toRefContent content | "ref: " `B.isPrefixOf` content = RefLink $ toRefTy $ UTF8.toString $ head $ BC.lines $ B.drop 5 content - | B.length content < 42 = RefDirect $ fromHex $ B.take 40 content + | B.length content < 42 = RefDirect $ fst $ consumeHexRef hashAlg content | otherwise = RefContentUnknown content + +consumeHexRef :: HashAlgorithm hash => hash -> B.ByteString -> (Ref hash, B.ByteString) +consumeHexRef alg b = let (b1,b2) = B.splitAt (hashDigestSize alg * 2) b in (fromHex b1, b2) diff --git a/Data/Git/Parser.hs b/Data/Git/Parser.hs index 3f4f6176e2af59bb95f8b4e8bdc3b01e034ec3ad..61af23f93562f986d4ce9feeb814c581d81e0227 100644 --- a/Data/Git/Parser.hs +++ b/Data/Git/Parser.hs @@ -58,8 +58,8 @@ word32 = be32 <$> P.take 4 ref, referenceBin, referenceHex :: HashAlgorithm hash => Parser (Ref hash) ref = referenceBin -referenceBin = takeDigestSize (error "referenceBin") 1 fromBinary -referenceHex = takeDigestSize (error "referenceHex") 2 fromHex +referenceBin = takeDigestSize hashAlg 1 fromBinary +referenceHex = takeDigestSize hashAlg 2 fromHex takeDigestSize :: HashAlgorithm hash => hash -> Int -> (B.ByteString -> Ref hash) -> Parser (Ref hash) takeDigestSize alg modifier constr = constr <$> P.take (modifier * hashDigestSize alg) diff --git a/Data/Git/Ref.hs b/Data/Git/Ref.hs index f0f884c70b397efa0ce012bda704ddfaf0592b98..7b6301ceebce42ed3ac238a2270c8b2c5e447ca5 100644 --- a/Data/Git/Ref.hs +++ b/Data/Git/Ref.hs @@ -31,6 +31,8 @@ module Data.Git.Ref -- * Hash ByteString types to a ref , hash , hashLBS + , hashAlg + , hashAlgFromRef ) where import qualified Crypto.Hash @@ -126,3 +128,10 @@ hash = Ref . Crypto.Hash.hash -- | hash a lazy bytestring into a reference hashLBS :: Crypto.Hash.HashAlgorithm hash => L.ByteString -> Ref hash hashLBS = Ref . Crypto.Hash.hashlazy + +-- | Any hash algorithm +hashAlg :: Crypto.Hash.HashAlgorithm hash => hash +hashAlg = error "polymorphic hash algorithm. only to use with hashDigestSize" + +hashAlgFromRef :: Crypto.Hash.HashAlgorithm hash => Ref hash -> hash +hashAlgFromRef _ = hashAlg diff --git a/Data/Git/Storage/FileReader.hs b/Data/Git/Storage/FileReader.hs index f08aa2ae1a8f1ef6a5f96fc71c4612b8e152a864..69bf3471edf9efad371274b7873a07e9ada5d12b 100644 --- a/Data/Git/Storage/FileReader.hs +++ b/Data/Git/Storage/FileReader.hs @@ -16,10 +16,10 @@ module Data.Git.Storage.FileReader , fileReaderGet , fileReaderGetLBS , fileReaderGetBS + , fileReaderGetRef , fileReaderGetVLF , fileReaderSeek , fileReaderParse - , fileReaderInflateToSize ) where @@ -35,6 +35,7 @@ import Data.IORef import Data.Git.Imports import Data.Git.OS +import Data.Git.Ref import qualified Data.Git.Parser as P import Data.Data @@ -42,6 +43,7 @@ import Data.Word import Codec.Zlib import Codec.Zlib.Lowlevel +import Crypto.Hash import Foreign.ForeignPtr import qualified Control.Exception as E @@ -125,6 +127,9 @@ fileReaderGetLBS size fb = L.fromChunks <$> fileReaderGet size fb fileReaderGetBS :: Int -> FileReader -> IO ByteString fileReaderGetBS size fb = B.concat <$> fileReaderGet size fb +fileReaderGetRef :: HashAlgorithm hash => hash -> FileReader -> IO (Ref hash) +fileReaderGetRef alg fr = fromBinary <$> fileReaderGetBS (hashDigestSize alg) fr + -- | seek in a handle, and reset the remaining buffer to empty. fileReaderSeek :: FileReader -> Word64 -> IO () fileReaderSeek (FileReader { fbHandle = handle, fbRemaining = ref, fbPos = pos }) absPos = do diff --git a/Data/Git/Storage/Pack.hs b/Data/Git/Storage/Pack.hs index 1dbd47dc43bb3a97afd773b0dfd52edf1de3c616..56f943117be20fdcb098a1e12c19dd02f1a50768 100644 --- a/Data/Git/Storage/Pack.hs +++ b/Data/Git/Storage/Pack.hs @@ -147,27 +147,27 @@ getNextObjectRaw fr = do sobj <- fileReaderGetPos fr (ty, size) <- fileReaderParse fr parseObjectHeader extra <- case ty of - TypeDeltaRef -> Just . PtrRef . fromBinary <$> fileReaderGetBS 20 fr + TypeDeltaRef -> Just . PtrRef <$> fileReaderGetRef hashAlg fr TypeDeltaOff -> Just . PtrOfs . deltaOffFromList <$> fileReaderGetVLF fr _ -> return Nothing objData <- fileReaderInflateToSize fr size eobj <- fileReaderGetPos fr return (PackedObjectInfo ty sobj (eobj - sobj) size extra, objData) - where - parseObjectHeader = do - (m, ty, sz) <- splitFirst <$> P.anyByte - size <- if m then (sz +) <$> getNextSize 4 else return sz - return (ty, size) - where - getNextSize n = do - (c, sz) <- splitOther n <$> P.anyByte - if c then (sz +) <$> getNextSize (n+7) else return sz - - splitFirst :: Word8 -> (Bool, ObjectType, Word64) - splitFirst w = (w `testBit` 7, toEnum $ fromIntegral ((w `shiftR` 4) .&. 0x7), fromIntegral (w .&. 0xf)) - splitOther n w = (w `testBit` 7, fromIntegral (w .&. 0x7f) `shiftL` n) - - deltaOffFromList (x:xs) = foldl' acc (fromIntegral (x `clearBit` 7)) xs - where acc a w = ((a+1) `shiftL` 7) + fromIntegral (w `clearBit` 7) - deltaOffFromList [] = error "cannot happen" + where + parseObjectHeader = do + (m, ty, sz) <- splitFirst <$> P.anyByte + size <- if m then (sz +) <$> getNextSize 4 else return sz + return (ty, size) + where + getNextSize n = do + (c, sz) <- splitOther n <$> P.anyByte + if c then (sz +) <$> getNextSize (n+7) else return sz + + splitFirst :: Word8 -> (Bool, ObjectType, Word64) + splitFirst w = (w `testBit` 7, toEnum $ fromIntegral ((w `shiftR` 4) .&. 0x7), fromIntegral (w .&. 0xf)) + splitOther n w = (w `testBit` 7, fromIntegral (w .&. 0x7f) `shiftL` n) + + deltaOffFromList (x:xs) = foldl' acc (fromIntegral (x `clearBit` 7)) xs + where acc a w = ((a+1) `shiftL` 7) + fromIntegral (w `clearBit` 7) + deltaOffFromList [] = error "cannot happen" diff --git a/Data/Git/Storage/PackIndex.hs b/Data/Git/Storage/PackIndex.hs index 4ada28e1431c139eeb24e87391a22552e680d58e..2b7f1c1677fade6951bc374c434d02fec9223cb8 100644 --- a/Data/Git/Storage/PackIndex.hs +++ b/Data/Git/Storage/PackIndex.hs @@ -7,23 +7,23 @@ -- {-# LANGUAGE OverloadedStrings, BangPatterns #-} module Data.Git.Storage.PackIndex - ( PackIndexHeader(..) - , PackIndex(..) - - -- * handles and enumeration - , packIndexOpen - , packIndexClose - , withPackIndex - , packIndexEnumerate - - -- * read from packIndex - , packIndexHeaderGetNbWithPrefix - , packIndexGetReferenceLocation - , packIndexGetReferencesWithPrefix - , packIndexReadHeader - , packIndexRead - , packIndexGetHeader - ) where + ( PackIndexHeader(..) + , PackIndex(..) + + -- * handles and enumeration + , packIndexOpen + , packIndexClose + , withPackIndex + , packIndexEnumerate + + -- * read from packIndex + , packIndexHeaderGetNbWithPrefix + , packIndexGetReferenceLocation + , packIndexGetReferencesWithPrefix + , packIndexReadHeader + , packIndexRead + , packIndexGetHeader + ) where import Data.List import Data.Bits @@ -43,24 +43,24 @@ import qualified Data.Git.Parser as P -- | represent an packIndex header with the version and the fanout table data PackIndexHeader = PackIndexHeader !Word32 !(Vector Word32) - deriving (Show,Eq) + deriving (Show,Eq) data PackIndex hash = PackIndex - { packIndexSha1s :: Vector (Ref hash) - , packIndexCRCs :: Vector Word32 - , packIndexPackoffs :: Vector Word32 - , packIndexPackChecksum :: Ref hash - , packIndexChecksum :: Ref hash - } + { packIndexSha1s :: Vector (Ref hash) + , packIndexCRCs :: Vector Word32 + , packIndexPackoffs :: Vector Word32 + , packIndexPackChecksum :: Ref hash + , packIndexChecksum :: Ref hash + } -- | enumerate every indexes file in the pack directory packIndexEnumerate :: HashAlgorithm hash => LocalPath -> IO [Ref hash] packIndexEnumerate repoPath = map onlyHash . filter isPackFile <$> listDirectoryFilename (repoPath </> "objects" </> "pack") where - isPackFile :: String -> Bool - isPackFile x = ".idx" `isSuffixOf` x && "pack-" `isPrefixOf` x - onlyHash = fromHexString . takebut 4 . drop 5 - takebut n l = take (length l - n) l + isPackFile :: String -> Bool + isPackFile x = ".idx" `isSuffixOf` x && "pack-" `isPrefixOf` x + onlyHash = fromHexString . takebut 4 . drop 5 + takebut n l = take (length l - n) l -- | open an index packIndexOpen :: LocalPath -> Ref hash -> IO FileReader @@ -93,74 +93,76 @@ packIndexHeaderGetNbWithPrefix (PackIndexHeader _ indexes) n packIndexHeaderFoldRef :: HashAlgorithm hash => PackIndexHeader -> FileReader + -> hash -> Int -> (a -> Word32 -> Ref hash -> (a, Bool)) -> a -> IO a -packIndexHeaderFoldRef idxHdr@(PackIndexHeader _ indexes) fr refprefix f initAcc - | nb == 0 = return initAcc - | otherwise = do - let spos = (indexes ! refprefix) - nb - fileReaderSeek fr (fromIntegral (sha1Offset + spos * 20)) - loop nb initAcc - where - loop 0 acc = return acc - loop n acc = do - b <- fromBinary <$> fileReaderGetBS 20 fr - let (!nacc, terminate) = f acc (nb-n) b - if terminate - then return nacc - else loop (n-1) nacc - nb = packIndexHeaderGetNbWithPrefix idxHdr refprefix - (sha1Offset,_,_) = packIndexOffsets idxHdr +packIndexHeaderFoldRef idxHdr@(PackIndexHeader _ indexes) fr alg refprefix f initAcc + | nb == 0 = return initAcc + | otherwise = do + let spos = (indexes ! refprefix) - nb + hashSize = hashDigestSize alg + fileReaderSeek fr (fromIntegral (sha1Offset + spos * fromIntegral hashSize)) + loop nb initAcc + where + loop 0 acc = return acc + loop n acc = do + b <- fileReaderGetRef alg fr + let (!nacc, terminate) = f acc (nb-n) b + if terminate + then return nacc + else loop (n-1) nacc + nb = packIndexHeaderGetNbWithPrefix idxHdr refprefix + (sha1Offset,_,_) = packIndexOffsets alg idxHdr -- | return the reference offset in the packfile if found packIndexGetReferenceLocation :: HashAlgorithm hash => PackIndexHeader -> FileReader -> Ref hash -> IO (Maybe Word64) packIndexGetReferenceLocation idxHdr@(PackIndexHeader _ indexes) fr ref = do - mrpos <- packIndexHeaderFoldRef idxHdr fr refprefix f Nothing - case mrpos of - Nothing -> return Nothing - Just rpos -> do - let spos = (indexes ! refprefix) - nb - fileReaderSeek fr (fromIntegral (packOffset + 4 * (spos+rpos))) - Just . fromIntegral . be32 <$> fileReaderGetBS 4 fr - where - f acc rpos rref = if ref == rref then (Just rpos,True) else (acc,False) - refprefix = refPrefix ref - nb = packIndexHeaderGetNbWithPrefix idxHdr refprefix - (_,_,packOffset) = packIndexOffsets idxHdr + mrpos <- packIndexHeaderFoldRef idxHdr fr (hashAlgFromRef ref) refprefix f Nothing + case mrpos of + Nothing -> return Nothing + Just rpos -> do + let spos = (indexes ! refprefix) - nb + fileReaderSeek fr (fromIntegral (packOffset + 4 * (spos+rpos))) + Just . fromIntegral . be32 <$> fileReaderGetBS 4 fr + where + f acc rpos rref = if ref == rref then (Just rpos,True) else (acc,False) + refprefix = refPrefix ref + nb = packIndexHeaderGetNbWithPrefix idxHdr refprefix + (_,_,packOffset) = packIndexOffsets (hashAlgFromRef ref) idxHdr -- | get all references that start by prefix. packIndexGetReferencesWithPrefix :: HashAlgorithm hash => PackIndexHeader -> FileReader -> String -> IO [Ref hash] packIndexGetReferencesWithPrefix idxHdr fr prefix = - packIndexHeaderFoldRef idxHdr fr refprefix f [] - where - f acc _ ref = case cmpPrefix prefix ref of - GT -> (acc ,False) - EQ -> (ref:acc,False) - LT -> (acc ,True) - refprefix = read ("0x" ++ take 2 prefix) + packIndexHeaderFoldRef idxHdr fr hashAlg refprefix f [] + where + f acc _ ref = case cmpPrefix prefix ref of + GT -> (acc ,False) + EQ -> (ref:acc,False) + LT -> (acc ,True) + refprefix = read ("0x" ++ take 2 prefix) -- | returns absolute offset in the index file of the sha1s, the crcs and the packfiles offset. -packIndexOffsets :: PackIndexHeader -> (Word32, Word32, Word32) -packIndexOffsets idx = (packIndexSha1sOffset, packIndexCRCsOffset, packIndexPackOffOffset) - where - packIndexPackOffOffset = packIndexCRCsOffset + crcsTableSz - packIndexCRCsOffset = packIndexSha1sOffset + sha1TableSz - packIndexSha1sOffset = fromIntegral packIndexHeaderByteSize - crcsTableSz = 4 * sz - sha1TableSz = 20 * sz - sz = packIndexHeaderGetSize idx +packIndexOffsets :: HashAlgorithm hash => hash -> PackIndexHeader -> (Word32, Word32, Word32) +packIndexOffsets alg idx = (packIndexSha1sOffset, packIndexCRCsOffset, packIndexPackOffOffset) + where + packIndexPackOffOffset = packIndexCRCsOffset + crcsTableSz + packIndexCRCsOffset = packIndexSha1sOffset + sha1TableSz + packIndexSha1sOffset = fromIntegral packIndexHeaderByteSize + crcsTableSz = 4 * sz + sha1TableSz = (fromIntegral $ hashDigestSize alg) * sz + sz = packIndexHeaderGetSize idx -- | parse index header parsePackIndexHeader :: P.Parser PackIndexHeader parsePackIndexHeader = do - magic <- P.word32 - when (magic /= 0xff744f63) $ error "wrong magic number for packIndex" - ver <- P.word32 - when (ver /= 2) $ error "unsupported packIndex version" - fanouts <- V.replicateM 256 P.word32 - return $ PackIndexHeader ver fanouts + magic <- P.word32 + when (magic /= 0xff744f63) $ error "wrong magic number for packIndex" + ver <- P.word32 + when (ver /= 2) $ error "unsupported packIndex version" + fanouts <- V.replicateM 256 P.word32 + return $ PackIndexHeader ver fanouts -- | read index header from an index filereader packIndexReadHeader :: FileReader -> IO PackIndexHeader @@ -176,17 +178,18 @@ packIndexRead :: HashAlgorithm hash -> Ref hash -> IO (PackIndexHeader, (Vector (Ref hash), Vector Word32, Vector Word32, [ByteString], Ref hash, Ref hash)) packIndexRead repoPath indexRef = do - withPackIndex repoPath indexRef $ \fr -> do - idx <- fileReaderParse fr parsePackIndexHeader - liftM2 (,) (return idx) (fileReaderParse fr (parsePackIndex $ packIndexHeaderGetSize idx)) - where parsePackIndex sz = do - sha1s <- V.replicateM (fromIntegral sz) P.referenceBin - crcs <- V.replicateM (fromIntegral sz) P.word32 - packoffs <- V.replicateM (fromIntegral sz) P.word32 - let nbLarge = length $ filter (== True) $ map (\packoff -> packoff `testBit` 31) $ V.toList packoffs - largeoffs <- replicateM nbLarge (P.take 4) - packfileChecksum <- P.referenceBin - idxfileChecksum <- P.referenceBin - -- large packfile offsets - -- trailer - return (sha1s, crcs, packoffs, largeoffs, packfileChecksum, idxfileChecksum) + withPackIndex repoPath indexRef $ \fr -> do + idx <- fileReaderParse fr parsePackIndexHeader + liftM2 (,) (return idx) (fileReaderParse fr (parsePackIndex $ packIndexHeaderGetSize idx)) + where + parsePackIndex sz = do + sha1s <- V.replicateM (fromIntegral sz) P.referenceBin + crcs <- V.replicateM (fromIntegral sz) P.word32 + packoffs <- V.replicateM (fromIntegral sz) P.word32 + let nbLarge = length $ filter (== True) $ map (\packoff -> packoff `testBit` 31) $ V.toList packoffs + largeoffs <- replicateM nbLarge (P.take 4) + packfileChecksum <- P.referenceBin + idxfileChecksum <- P.referenceBin + -- large packfile offsets + -- trailer + return (sha1s, crcs, packoffs, largeoffs, packfileChecksum, idxfileChecksum) diff --git a/tests/Tests.hs b/tests/Tests.hs index 27d0c682d76fd5f7b0c7038a2fffcf5ee3e4f18a..060ee4dec57aff97b9300ab7423c0de1ee900410 100644 --- a/tests/Tests.hs +++ b/tests/Tests.hs @@ -37,8 +37,11 @@ arbitraryEntname size = entName . B.pack . map fromIntegral <$> replicateM size , choose (0x30, 0x7f) ] -instance Arbitrary (Ref SHA1) where - arbitrary = fromBinary <$> arbitraryBS 20 +arbitraryRef :: HashAlgorithm hash => hash -> Gen (Ref hash) +arbitraryRef alg = fromBinary <$> arbitraryBS (hashDigestSize alg) + +instance HashAlgorithm hash => Arbitrary (Ref hash) where + arbitrary = arbitraryRef (error "alg") arbitraryMsg = arbitraryBSno0 1 arbitraryLazy = L.fromChunks . (:[]) <$> arbitraryBS 40