diff --git a/Data/Git/Index.hs b/Data/Git/Index.hs index dc182a0e2cb85bab1491df33ef1f4b33cf66bd80..5d3297fd8bf18869e6b70105ce4b3d89ba459555 100644 --- a/Data/Git/Index.hs +++ b/Data/Git/Index.hs @@ -1,100 +1,206 @@ +{-# OPTIONS_GHC -fwarn-missing-signatures -fno-warn-unused-binds #-} +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE OverloadedStrings #-} module Data.Git.Index - ( + ( IndexEntry( .. ) + , parseIndex + , decodeIndex + , loadIndexFile + , indexEntryOfFile ) where +import Prelude hiding ( FilePath, readFile ) +import Filesystem +import Filesystem.Path hiding (concat) +import Control.Monad( when + , replicateM + ) +import Data.Bits + ( unsafeShiftL + , unsafeShiftR + , testBit + , (.&.) + , (.|.) + ) +import Data.Word( Word8, Word16, Word32 ) import Data.Git.Ref -import Data.Git.Imports -{- -<INDEX_HEADER> - : "DIRC" <INDEX_FILE_VERSION> <INDEX_ENTRY_COUNT> - ; --} -data IndexHeader = IndexHeader Word32 Word32 - deriving (Show,Eq) - -parseBe32 = be32 <$> A.take 4 -parseBe16 = be16 <$> A.take 2 -parseRef = fromBinary <$> A.take 20 - +import qualified Control.Exception as E +import qualified Data.Vector as V +import qualified Data.ByteString as B +import qualified Data.ByteString.Char8 as BC +import qualified Data.Git.Parser as P +import Data.Git.Parser (Parser) + +data IndexHeader = IndexHeader + { indexFileVersion :: {-# UNPACK #-} !Word32 + , indexEntryCount :: {-# UNPACK #-} !Word32 + } + deriving (Show, Eq) + +-- | Finds an index in a given vector, which must be sorted with respect to the +-- given comparison function, at which the given element could be inserted while +-- preserving the vector's sortedness. +binarySearchBy :: (e -> a -> Ordering) -> V.Vector e -> a -> Maybe e +{-# INLINE binarySearchBy #-} +binarySearchBy cmp vec e = go 0 (length vec) + where + go !l !u | u <= l = Nothing + go !l !u = + let !k = (u + l) `unsafeShiftR` 1 + !e' = V.unsafeIndex vec k in + case cmp e' e of + LT -> go (k+1) u + EQ -> Just e' + GT -> go l k + +indexEntryOfFile :: HashAlgorithm hash => B.ByteString -> V.Vector (IndexEntry hash) -> Maybe (IndexEntry hash) +indexEntryOfFile path vec = binarySearchBy (compare . fileName) vec path + +loadIndexFile :: HashAlgorithm hash => FilePath -> IO (Either String (V.Vector (IndexEntry hash))) +loadIndexFile path = (decodeIndex <$> readFile path) `E.catch` onError + where + onError :: E.SomeException -> IO (Either String a) + onError _ = return $ Left "Cannot find index file" + +decodeIndex :: HashAlgorithm hash => B.ByteString -> Either String (V.Vector (IndexEntry hash)) +decodeIndex = P.eitherParse parseIndex + +parseIndex :: HashAlgorithm hash => Parser (V.Vector (IndexEntry hash)) +parseIndex = do + hdr <- parseIndexHeader + V.replicateM (fromIntegral $ indexEntryCount hdr) parseIndexEntry + +parseIndexHeader :: Parser IndexHeader parseIndexHeader = do - magic <- parseBe32 - when (magic /= 'DIRC') $ error "wrong magic number for index" - ver <- parseBe32 - when (ver /= 2) $ error "unsupported packIndex version" - entries <- parseBe32 - return $ IndexHeader ver entries - -{- -<INDEX_FILE_FORMAT_V2> - : <INDEX_HEADER> - <EXTENDED_INDEX_CONTENTS> - <EXTENDED_CHECKSUM> - ; - -<EXTENDED_CHECKSUM> - : _sha-1_digest_( <EXTENDED_INDEX_CONTENTS> ) - ; - -<INDEX_CHECKSUM> - : _sha-1_digest_( <INDEX_CONTENTS> ) - ; - --} - -parseIndexContents entries = replicateM entries parseIndexEntry - -{- -<EXTENDED_INDEX_CONTENTS> - : <INDEX_CONTENTS> - <INDEX_CONTENTS_EXTENSIONS> - ; - --} + magic <- P.take 4 + when (magic /= "DIRC") $ fail "wrong magic number for index" + ver <- P.word32 + when (ver `notElem` [2, 3]) $ fail "unsupported packIndex version" + entries <- P.word32 + return $ IndexHeader ver entries + +-- Index entries are sorted in ascending order on the name field, +-- interpreted as a string of unsigned bytes (i.e. memcmp() order, no +-- localization, no special casing of directory separator '/'). Entries +-- with the same name are sorted by their stage field. +data IndexEntry hash = IndexEntry + { -- | 32-bit ctime seconds, the last time a file's metadata changed + ctime :: !Word32 + -- | 32-bit ctime nanosecond fractions + , ctimeNano :: !Word32 + -- | 32-bit mtime seconds, the last time a file's data changed + , mtime :: !Word32 + -- | 32-bit mtime nanosecond fractions + , mtimeNano :: !Word32 + -- | 32-bit dev + , dev :: !Word32 + -- | 32-bit ino + , ino :: !Word32 + -- | 32-bit mode, split into (high to low bits) + -- + -- 4-bit object type + -- valid values in binary are 1000 (regular file), 1010 (symbolic link) + -- and 1110 (gitlink) + -- + -- 3-bit unused + -- + -- 9-bit unix permission. Only 0755 and 0644 are valid for regular files. + -- Symbolic links and gitlinks have value 0 in this field. + , mode :: !Word32 + -- | 32-bit uid + , uid :: !Word32 + -- | 32-bit gid + , gid :: !Word32 + -- | 32-bit file size This is the on-disk size from stat(2), truncated to 32-bit. + , fileSize :: !Word32 + -- | 160-bit SHA-1 for the represented object + , fileHash :: !(Ref hash) + + -- | A 16-bit 'flags' field + , flags :: !IndexEntryFlags + -- (Version 3 or later) A 16-bit field, only applicable if the + -- "extended flag" above is 1, split into (high to low bits). + -- + -- 1-bit reserved for future + -- + -- 1-bit skip-worktree flag (used by sparse checkout) + -- + -- 1-bit intent-to-add flag (used by "git add -N") + -- + -- 13-bit unused, must be zero + , extended :: !Word16 + + , fileName :: !B.ByteString + } + deriving (Eq, Show) + +data IndexEntryFlags = IndexEntryFlags + { -- | 1-bit assume-valid flag + iefAssumeValid :: !Bool + -- | 1-bit extended flag (must be zero in version 2) + , iefExtended :: !Bool + -- | 2-bit stage (during merge) + , iefStage :: {-# UNPACK #-} !Word8 + -- | 12-bit name length if the length is less than 0xFFF; + -- otherwise 0xFFF is stored in this field. + , iefNameLength :: {-# UNPACK #-} !Word16 + } + deriving (Eq, Show) + +flagsOfWord :: Word16 -> IndexEntryFlags +flagsOfWord w = IndexEntryFlags + { iefAssumeValid = w `testBit` 15 + , iefExtended = w `testBit` 14 + , iefStage = fromIntegral $ (w `unsafeShiftR` 13) .&. 0x3 + , iefNameLength = w .&. 0xFFF + } + +parseIndexEntry :: HashAlgorithm hash => Parser (IndexEntry hash) parseIndexEntry = do - -- INDEX_ENTRY_STAT_INFO - ctime <- parseTime - mtime <- parseTime - dev <- parseBe32 - inode <- parseBe32 - mode <- parseBe32 - uid <- parseBe32 - gid <- parseBe32 - size <- parseBe32 - -- entry id, flags, name, zero padding - -- how to parse <ENTRY_ID> - flags <- parseBe16 - -- 16 bit, network byte order, binary integer. - -- bits 15-14 Reserved - -- bits 13-12 Entry stage - -- bits 11-0 Name byte length - name <- takeWhileNotNull - zeroPadding - -{- -<ENTRY_ZERO_PADDING> - # The minimum length 0x00 byte sequence necessary to make the - # written of digested byte length of the <INDEX_ENTRY> a - # multiple of 8. - ; --} + vCtime <- P.word32 -- +4 4 + vCtimeNano <- P.word32 -- +4 8 + vMtime <- P.word32 -- +4 12 + vMtimeNano <- P.word32 -- +4 16 + vDev <- P.word32 -- +4 20 + vInode <- P.word32 -- +4 24 + vMode <- P.word32 -- +4 28 + vUid <- P.word32 -- +4 32 + vGid <- P.word32 -- +4 36 + vSize <- P.word32 -- +4 40 + -- TODO ref is variable size now + vFileHash <- P.ref -- +20 60 + vFlags <- flagsOfWord <$> P.word16 -- +2 62 + vExtended <- if iefExtended vFlags -- +2 64 + then P.word16 + else return 0 + vName <- P.take . fromIntegral $ iefNameLength vFlags + let padding = 8 - ((62 + iefNameLength vFlags) `mod` 8) + _ <- replicateM (fromIntegral padding) P.anyByte + return IndexEntry + { ctime = vCtime + , ctimeNano = vCtimeNano + , mtime = vMtime + , mtimeNano = vMtimeNano + , dev = vDev + , ino = vInode + , mode = vMode + , uid = vUid + , gid = vGid + , fileSize = vSize + , fileHash = vFileHash + , flags = vFlags + , extended = vExtended + , fileName = vName + } -parseTime = (,) <$> parseBe32 <*> parseBe32 {- -<ENTRY_ID> - # Object ID of the of the file system entity contents. - ; - -<ENTRY_NAME> - # File system entity name. Path is normalized and relative to - # the working directory. - ; - <INDEX_CONTENTS_EXTENSIONS> : ( <INDEX_EXTENSION> )* ; -} +parseIndexExtension :: P.Parser (BC.ByteString, BC.ByteString) parseIndexExtension = do -- # 4 byte sequence identifying how the <INDEX_EXTENSION_DATA> -- # should be interpreted. If the first byte has a value greater @@ -104,7 +210,7 @@ parseIndexExtension = do -- # contents in the index file. Any non-optional extensions must -- # be understood by the reading application to correctly -- # interpret the index file contents. - name <- A.take 4 - dataSize <- parseBe32 - data_ <- A.take dataSize - return (name,data_) + name <- P.take 4 + dataSize <- P.word32 + data_ <- P.take $ fromIntegral dataSize + return (name, data_) diff --git a/Data/Git/Parser.hs b/Data/Git/Parser.hs index 61af23f93562f986d4ce9feeb814c581d81e0227..93254b578af35bd92bc697fa07efa43b8bcdecbe 100644 --- a/Data/Git/Parser.hs +++ b/Data/Git/Parser.hs @@ -7,6 +7,7 @@ module Data.Git.Parser , maybeParse , maybeParseChunks -- * Specific functions + , word16 , word32 , ref , referenceBin @@ -35,7 +36,7 @@ import qualified Data.ByteArray.Parse as P import Data.ByteArray (ByteArray) import Data.Bits -import Data.Word (Word8, Word32) +import Data.Word (Word8, Word16, Word32) import Data.Char (isDigit) import qualified Data.ByteString as B @@ -56,6 +57,9 @@ vlf = do word32 :: Parser Word32 word32 = be32 <$> P.take 4 +word16 :: Parser Word16 +word16 = be16 <$> P.take 2 + ref, referenceBin, referenceHex :: HashAlgorithm hash => Parser (Ref hash) ref = referenceBin referenceBin = takeDigestSize hashAlg 1 fromBinary diff --git a/git.cabal b/git.cabal index 15659e7815c9646032e3ce4156d034eab75d1453..1d5748efce2d5245be5890758a99fc1b182f04d3 100644 --- a/git.cabal +++ b/git.cabal @@ -57,6 +57,7 @@ Library Data.Git.Repository Data.Git.Diff Data.Git.Diff.Patience + Data.Git.Index Other-modules: Data.Git.Internal Data.Git.Imports Data.Git.OS