From e64cf9e05905a03067b598367f4cebc13c200caa Mon Sep 17 00:00:00 2001 From: Paul Frazee Date: Thu, 25 Jun 2015 13:42:10 -0500 Subject: [PATCH] blob checkouts no longer create another copy if an existing copy exists with the same content --- app/lib/blobs.js | 41 ++++++++++++++++++++++++++++++++--------- package.json | 4 +++- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/app/lib/blobs.js b/app/lib/blobs.js index 676c6b2..5e6d595 100644 --- a/app/lib/blobs.js +++ b/app/lib/blobs.js @@ -1,6 +1,9 @@ var path = require('path') var multicb = require('multicb') var toPath = require('multiblob/util').toPath +var createHash = require('multiblob/util').createHash +var pull = require('pull-stream') +var toPull = require('stream-to-pull-stream') var querystring = require('querystring') var fs = require('fs') @@ -43,16 +46,21 @@ module.exports = function (blobs_dir, checkout_dir) { var filename = parsed.qs.name || parsed.qs.filename || parsed.hash // check if we have the blob, at the same time find an available filename - var done = multicb({ pluck: 1 }) + var done = multicb() fs.stat(toPath(blobs_dir, parsed.hash), done()) - findFreeCheckoutPath(filename, done()) + findCheckoutDst(filename, parsed.hash, done()) done(function (err, res) { - if (!res[0]) + if (!res[0][1]) return cb({ notFound: true }) + // do we need to copy? + var dst = res[1][1] + var nocopy = res[1][2] + if (nocopy) + return cb(null, dst) + // copy the file var src = toPath(blobs_dir, parsed.hash) - var dst = res[1] var read = fs.createReadStream(src) var write = fs.createWriteStream(dst) read.on('error', done) @@ -68,7 +76,8 @@ module.exports = function (blobs_dir, checkout_dir) { } // helper to create a filename in checkout_dir that isnt already in use - function findFreeCheckoutPath (filename, cb) { + // - cb(err, filepath, nocopy) - if nocopy==true, no need to do the copy operation + function findCheckoutDst (filename, hash, cb) { var n = 1 var parsed = path.parse(filename) next() @@ -82,11 +91,25 @@ module.exports = function (blobs_dir, checkout_dir) { } function next () { - var filepath = gen() - fs.stat(filepath, function (err, stat) { + var dst = gen() + // exists? + fs.stat(dst, function (err, stat) { if (!stat) - return cb(null, filepath) - next() + return cb(null, dst, false) + + // yes, check its hash + var hasher = createHash() + pull( + toPull.source(fs.createReadStream(dst)), + hasher, + pull.onEnd(function () { + // if the hash matches, we're set + if (hasher.digest == hash) + return cb(null, dst, true) + // try next + next() + }) + ) }) } } diff --git a/package.json b/package.json index 10a7be7..24f35ae 100644 --- a/package.json +++ b/package.json @@ -27,7 +27,9 @@ "dependencies": { "multiblob": "^1.4.3", "multicb": "^1.1.0", + "pull-stream": "^2.27.0", "scuttlebot": "^4.2.3", - "ssb-config": "^1.0.3" + "ssb-config": "^1.0.3", + "stream-to-pull-stream": "^1.6.1" } }