More Git tools in pure Haskell based on the 'git' Haskell package
Clone
HTTPS:
darcs clone https://vervis.peers.community/repos/RvP1E
SSH:
darcs clone USERNAME@vervis.peers.community:RvP1E
Tags
TODO
Pack.hs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 | {- This file is part of hit-harder.
-
- Written in 2016 by fr33domlover <fr33domlover@riseup.net>.
-
- ♡ Copying is an act of love. Please copy, reuse and share.
-
- The author(s) have dedicated all copyright and related and neighboring
- rights to this software to the public domain worldwide. This software is
- distributed without any warranty.
-
- You should have received a copy of the CC0 Public Domain Dedication along
- with this software. If not, see
- <http://creativecommons.org/publicdomain/zero/1.0/>.
-}
{-# LANGUAGE OverloadedStrings #-}
module Data.Git.Harder.Pack
( collectObjIds
, writePack
, serializePack
)
where
import Codec.Compression.Zlib (compress)
import Control.Monad.IO.Class (liftIO)
import Control.Monad.Trans.Hash
import Crypto.Hash.Algorithms (SHA1)
import Data.Binary.Put
import Data.Bits
import Data.ByteArray (convert)
import Data.Foldable (foldlM)
import Data.Git.Repository (resolveTreeish)
import Data.Git.Storage (Git, getObjectRaw)
import Data.Git.Storage.Object (ObjectInfo (..))
import Data.Git.Types (ObjectType (..), Commit (..))
import Data.HashSet (HashSet)
import Data.Maybe (fromMaybe)
import Data.Word (Word8, Word32, Word64)
import qualified Data.ByteString.Lazy as BL (ByteString, toChunks)
import qualified Data.HashSet as S
import Data.Git.Harder
putPackHeader :: Word32 -> Put
putPackHeader numOfObjects = do
putByteString "PACK" -- Signature
putWord32be 2 -- Version number
putWord32be numOfObjects -- Number of objects contained in the pack
putPackHeader' :: Int -> Put
putPackHeader' n =
case toIntegralSized n of
Nothing -> error "Invalid number of objects to pack"
Just w -> putPackHeader w
type ObjIdSet = HashSet ObjId
-- | Take a minimal list of commits we must send, and build a set of object IDs
-- of these commits and all the trees and blobs they refer to recursively.
collectObjIds :: Git SHA1-> [(ObjId, (Commit SHA1))] -> IO ObjIdSet
collectObjIds git pairs = do
let (commitIds, commits) = unzip pairs
treeIds = map (ObjId . commitTreeish) commits
resolve tid = do
mtree <- resolveTreeish git $ unObjId tid
return $ fromMaybe (error "invalid commit treeish ref") mtree
visit s oid _ _ = return (S.insert oid s, TAContinue)
collect = traverseTree git visit
trees <- traverse resolve treeIds
let initial = S.fromList commitIds `S.union` S.fromList treeIds
foldlM (flip collect) initial trees
data CompressedObject = CompressedObject
{ zoType :: ObjectType
, zoSize :: Word64
, zoData :: BL.ByteString
}
objectTypeCode :: Num a => ObjectType -> a
objectTypeCode TypeCommit = 1
objectTypeCode TypeTree = 2
objectTypeCode TypeBlob = 3
objectTypeCode TypeTag = 4
objectTypeCode TypeDeltaOff = 6
objectTypeCode TypeDeltaRef = 7
-- | Get the low 7 bits of a number. You get them as the low 7 bits of the
-- 'Word8' returned.
low7bits :: (Integral a, Bits a) => a -> Word8
low7bits n =
let mw = toIntegralSized $ n .&. 0x7f
msg = "toIntegralSized failed to convert small Bits a (0-127) to Word8"
in fromMaybe (error msg) mw
-- | This is an encoder for a specific encoding of arbitrary-length numbers
-- used by Git. The purpose is to support objects of arbitrary size, not
-- limiting their size representation to 32 or 64 bits.
--
-- The encoding work as follows. The number is split into sequences of 7 bits,
-- in little endian order (i.e. least significant bits come first). For each
-- sequence, a byte is constructed. The sequence serve as the low 7 bits of it,
-- and the highest bit determines whether there is another sequence after it.
-- In other words, the last byte has that bit set to 0, and all other bytes
-- have it set to 1.
--
-- The encoding contains at least one byte. If the word value is 127 or less,
-- i.e. can be expressed in 7 bits, the encoding contains a single byte.
-- Otherwise, it's more than one byte, as needed.
_putExtensibleWord :: (Integral a, Bits a) => a -> Put
_putExtensibleWord n =
let first = low7bits n
rest = unsafeShiftR n 7
in putExtensibleWord' first rest
-- | Like 'putExtensibleWord'', but lets you manually pass the first 7 bits
-- separately from the rest of the bits.
--
-- If the rest of the bits are all zeros, te encoding will contain a single
-- byte (the first 7 bits passed, and a zero high bit). Otherwise, it will
-- contain at least 2 bytes: 1 byte for the first 7 bits, and at least 1 byte
-- for the rest of the bits.
putExtensibleWord'
:: (Integral a, Bits a)
=> Word8 -- ^ The low 7 bits of the word
-> a -- ^ The rest of the bits
-> Put
putExtensibleWord' first rest =
let setHigh = (.|. 0x80)
clearHigh = (.&. 0x7f)
continues = setHigh
stops = clearHigh
in if rest == zeroBits
then putWord8 $ stops first
else do
putWord8 $ continues first
let first' = low7bits rest
rest' = unsafeShiftR rest 7
putExtensibleWord' first' rest'
putObjectHeader :: ObjectType -> Word64 -> Put
putObjectHeader otype size =
let typeBits = objectTypeCode otype
msizeLowBits = toIntegralSized $ size .&. 0x0f
sizeLowBits = case msizeLowBits of
Nothing ->
error
"toIntegralSized failed to convert small (0-15) Word64 to \
\Word8 in putObjectHeader"
Just n -> n
first7bits = unsafeShiftL typeBits 4 .|. sizeLowBits
in putExtensibleWord' first7bits (unsafeShiftR size 4)
putCompressedObject :: CompressedObject -> Put
putCompressedObject zo = do
putObjectHeader (zoType zo) (zoSize zo)
putLazyByteString $ zoData zo
compressObject :: ObjectInfo SHA1 -> CompressedObject
compressObject (ObjectInfo (t, s, _mp) odata _ochains) = CompressedObject
{ zoType = t
, zoSize = s
, zoData = compress odata
}
putObject :: ObjectInfo SHA1 -> Put
putObject = putCompressedObject . compressObject
mkPutObject :: Git SHA1 -> ObjId -> IO Put
mkPutObject git oid = do
minfo <- getObjectRaw git (unObjId oid) True
case minfo of
Nothing -> error "failed to load raw object from oid"
Just info -> return $ putObject info
writeHashed :: Put -> HashT SHA1 IO Put
writeHashed put = do
let lbs = runPut put
updateHashMulti $ BL.toChunks lbs
return $ putLazyByteString lbs
writePack :: Git SHA1 -> ObjIdSet -> IO Put
writePack git oidset = do
(put, digest) <- runHashT $ do
header <- writeHashed $ putPackHeader' $ S.size oidset
let writeObj oid = liftIO (mkPutObject git oid) >>= writeHashed
foldlM (\ put oid -> (put >>) <$> writeObj oid) header oidset
return $ put >> putByteString (convert digest)
serializePack :: Git SHA1 -> ObjIdSet -> IO BL.ByteString
serializePack git oidset = runPut <$> writePack git oidset
|