By | fr33domlover |
At | 2016-04-27 |
Title | Initial pack writer implementation, woohoo! |
Description |
Add file src/Network/Git/Transport/HTTP/Fetch/Packfile.hs 0
Edit file src/Network/Git/Transport/HTTP/Fetch/Packfile.hs 0 → 0
+ 1 {- This file is part of hit-network.
+ 2 -
+ 3 - Written in 2016 by fr33domlover <fr33domlover@riseup.net>.
+ 4 -
+ 5 - ♡ Copying is an act of love. Please copy, reuse and share.
+ 6 -
+ 7 - The author(s) have dedicated all copyright and related and neighboring
+ 8 - rights to this software to the public domain worldwide. This software is
+ 9 - distributed without any warranty.
+ 10 -
+ 11 - You should have received a copy of the CC0 Public Domain Dedication along
+ 12 - with this software. If not, see
+ 13 - <http://creativecommons.org/publicdomain/zero/1.0/>.
+ 14 -}
+ 15 + 16 module Network.Git.Transport.HTTP.Fetch.Packfile
+ 17 (
+ 18 )
+ 19 where
+ 20 + 21 putPackHeader :: Int -> Put
+ 22 putPackHeader numOfObjects = do
+ 23 putByteString "PACK" -- Signature
+ 24 putWord32be 2 -- Version number
+ 25 putInt32be numOfObjects -- Number of objects contained in the pack
+ 26 + 27 type ObjIdSet = HashSet ObjId
+ 28 + 29 -- | Take a minimal list of commits we must send, and build a set of object IDs
+ 30 -- of these commits and all the trees and blobs they refer to recursively.
+ 31 collectObjIds :: Git -> [(ObjId, Commit)] -> IO ObjIdSet
+ 32 collectObjIds git pairs = do
+ 33 let (commitIds, commits) = unzip pairs
+ 34 treeIds = map (commitTreeish git) commits
+ 35 resolve tid = do
+ 36 mtree <- resolveTreeish git $ unObjId tid
+ 37 return $ fromMaybe (error "invalid commit treeish ref") mtree
+ 38 visit s oid _ _ _ = S.insert oid s
+ 39 collect = traverseTree git visit
+ 40 trees <- traverse resolve treeIds
+ 41 let initial = S.toList commitsIds `S.union` S.toList treeIds
+ 42 foldlM (flip collect) initial trees
+ 43 + 44 -- How do we put an object? Sources of info:
+ 45 --
+ 46 -- * Git docs
+ 47 -- * Git source
+ 48 -- * hit source
+ 49 -- * libgit2 source
+ 50 --
+ 51 -- === Git docs
+ 52 --
+ 53 -- The undeltified representation is:
+ 54 --
+ 55 -- n-byte type and length (3-bit type, (n-1)*7+4-bit length)
+ 56 -- compressed data
+ 57 --
+ 58 -- === Git source
+ 59 --
+ 60 -- It seems to read a raw binary object for a given oid, and then writes to the
+ 61 -- pipe while updating the SHA1 hash incrementally. However, it does this in
+ 62 -- two steps:
+ 63 --
+ 64 -- (1) Compute and write header
+ 65 -- (2) Compress and write object content
+ 66 --
+ 67 -- I'm going to assume:
+ 68 --
+ 69 -- (1) Check how the header is formatted and do the same
+ 70 -- (2) Zlib-compress the raw object as-is and write to pack
+ 71 --
+ 72 -- This is from git source:
+ 73 --
+ 74 -- * The per-object header is a pretty dense thing, which is
+ 75 -- * - first byte: low four bits are "size", then three bits of "type",
+ 76 -- * and the high bit is "size continues".
+ 77 -- * - each byte afterwards: low seven bits are size continuation,
+ 78 -- * with the high bit being "size continues"
+ 79 + 80 data CompressedObject = CompressedObject
+ 81 { zoType :: ObjectType
+ 82 , zoSize :: Word64
+ 83 , zoData :: BL.ByteString
+ 84 }
+ 85 + 86 objectTypeCode :: Num a => ObjectType -> a
+ 87 objectTypeCode TypeCommit = 1
+ 88 objectTypeCode TypeTree = 2
+ 89 objectTypeCode TypeBlob = 3
+ 90 objectTypeCode TypeTag = 4
+ 91 objectTypeCode TypeDeltaOff = 6
+ 92 objectTypeCode TypeDeltaRef = 7
+ 93 + 94 -- | This is an encoder for a specific encoding of arbitrary-length numbers
+ 95 -- used by Git. The purpose is to support objects of arbitrary size, not
+ 96 -- limiting their size representation to 32 or 64 bits.
+ 97 --
+ 98 -- The encoding work as follows. The number is split into sequences of 7 bits,
+ 99 -- in little endian order (i.e. least significant bits come first). For each
+ 100 -- sequence, a byte is constructed. The sequence serve as the low 7 bits of it,
+ 101 -- and the highest bit determines whether there is another sequence after it.
+ 102 -- In other words, the last byte has that bit set to 0, and all other bytes
+ 103 -- have it set to 1.
+ 104 --
+ 105 -- The encoding contains at least one byte. If the word value is 127 or less,
+ 106 -- i.e. can be expressed in 7 bits, the encoding contains a single byte.
+ 107 -- Otherwise, it's more than one byte, as needed.
+ 108 putExtensibleWord :: Bits a => a -> Put
+ 109 putExtensibleWord
+ 110 + 111 -- | Like 'putExtensibleWord'', but lets you manually pass the first 7 bits
+ 112 -- separately from the rest of the bits.
+ 113 --
+ 114 -- If the rest of the bits are all zeros, te encoding will contain a single
+ 115 -- byte (the first 7 bits passed, and a zero high bit). Otherwise, it will
+ 116 -- contain at least 2 bytes: 1 byte for the first 7 bits, and at least 1 byte
+ 117 -- for the rest of the bits.
+ 118 putExtensibleWord'
+ 119 :: Bits a
+ 120 => Word8 -- ^ The low 7 bits of the word
+ 121 -> a -- ^ The rest of the bits
+ 122 -> Put
+ 123 putExtensibleWord' first rest =
+ 124 let setHigh = (.|. 0x80)
+ 125 clearHigh = (.&. 0x7f)
+ 126 continues = setHigh
+ 127 stops = clearHigh
+ 128 if rest == zeroBits
+ 129 then putWord8 $ stops first
+ 130 else do
+ 131 putWord8 $ continues first
+ 132 let mnextF = fromIntegralSized $ rest .&. 0x7f
+ 133 msg =
+ 134 "toIntegralSized failed to convert small Bits a (0-127) \
+ 135 \to Word8"
+ 136 nextF = fromMaybe (error msg) mnextF
+ 137 nextR = unsafeShiftR rest 7
+ 138 putExtensibleWord' nextF nextR
+ 139 + 140 putObjectHeader :: ObjectType -> Word64 -> Put
+ 141 putObjectHeader otype size =
+ 142 let typeBits = objectTypeCode otype
+ 143 msizeLowBits = toIntegralSized $ size .&. 0x0f
+ 144 sizeLowBits = case msizeLowBits of
+ 145 Nothing ->
+ 146 error
+ 147 "toIntegralSized failed to convert small (0-15) Word64 to \
+ 148 \Word8 in putObjectHeader"
+ 149 Just n -> n
+ 150 first7bits = unsafeShiftL typeBits 4 .|. sizeLowBits
+ 151 in putExtensibleWord' first7bits (unsafeShiftR size 4)
+ 152 + 153 putCompressedObject :: CompressedObject -> Put
+ 154 putCompressedObject zo = do
+ 155 putObjectHeader (zoType zo) (zoSize zo)
+ 156 putLazyByteString $ zoData zo
+ 157 + 158 compressObject :: ObjectInfo -> CompressedObject
+ 159 compressObect (ObjectInfo (t, s, _mp) odata _ochains) = CompressedObject
+ 160 { zoType = t
+ 161 , zoSize = s
+ 162 , zoData = compress odata
+ 163 }
+ 164 + 165 putObject :: ObjectInfo -> Put
+ 166 putObject = putCompressedObject . compressObject
+ 167 + 168 mkPutObject :: Git -> ObjId -> IO Put
+ 169 mkPutObject git oid = do
+ 170 minfo <- getObjectRaw git (unObjId oid) True
+ 171 case minfo of
+ 172 Nothing -> error "failed to load raw object from oid"
+ 173 Just info -> return $ putObject info
+ 174 + 175 writeHashed :: Put -> HashT SHA1 IO Put
+ 176 writeHashed put = do
+ 177 let lbs = runPut put
+ 178 updateHashMulti $ toChunks lbs
+ 179 return $ putLazyByteString lbs
+ 180 + 181 writePack :: Git -> ObjIdSet -> IO Put
+ 182 writePack git oidset = do
+ 183 (put, digest) <- runHashT $ do
+ 184 header <- writeHashed $ putPackHeader $ S.size oidset
+ 185 let writeObj oid = liftIO (mkPutObject git oid) >>= writeHashed
+ 186 foldlM (\ put oid -> (put >>) <$> writeObj oid) header oidset
+ 187 return $ put >> putByteString (convert digest)
… … … … Edit file src/Network/Git/Transport/SSH/Fetch/Packfile.hs 0 → 0
- 298 type ObjIdSet
+ 298 type ObjIdSet = HashSet ObjId
… … … …