[PATCH 1/2] bundle: plug resource leak

classic Classic list List threaded Threaded
17 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH 1/2] bundle: plug resource leak

Junio C Hamano
The bundle header structure holds two lists of refs and object
names, which should be released when the user is done with it.

Signed-off-by: Junio C Hamano <[hidden email]>
---
 bundle.c    | 12 ++++++++++++
 bundle.h    |  1 +
 transport.c |  1 +
 3 files changed, 14 insertions(+)

diff --git a/bundle.c b/bundle.c
index 506ac49..9c5a6f0 100644
--- a/bundle.c
+++ b/bundle.c
@@ -102,6 +102,18 @@ int is_bundle(const char *path, int quiet)
  return (fd >= 0);
 }
 
+void release_bundle_header(struct bundle_header *header)
+{
+ int i;
+
+ for (i = 0; i < header->prerequisites.nr; i++)
+ free(header->prerequisites.list[i].name);
+ free(header->prerequisites.list);
+ for (i = 0; i < header->references.nr; i++)
+ free(header->references.list[i].name);
+ free(header->references.list);
+}
+
 static int list_refs(struct ref_list *r, int argc, const char **argv)
 {
  int i;
diff --git a/bundle.h b/bundle.h
index 1584e4d..f7ce23b 100644
--- a/bundle.h
+++ b/bundle.h
@@ -23,5 +23,6 @@ int verify_bundle(struct bundle_header *header, int verbose);
 int unbundle(struct bundle_header *header, int bundle_fd, int flags);
 int list_bundle_refs(struct bundle_header *header,
  int argc, const char **argv);
+void release_bundle_header(struct bundle_header *);
 
 #endif
diff --git a/transport.c b/transport.c
index ca3cfa4..08e15c5 100644
--- a/transport.c
+++ b/transport.c
@@ -107,6 +107,7 @@ static int close_bundle(struct transport *transport)
  struct bundle_transport_data *data = transport->data;
  if (data->fd > 0)
  close(data->fd);
+ release_bundle_header(&data->header);
  free(data);
  return 0;
 }
--
2.8.0-rc0-114-g0b3e5e5

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

[PATCH 2/2] bundle: keep a copy of bundle file name in the in-core bundle header

Junio C Hamano
This will be necessary when we start reading from a split bundle
where the header and the thin-pack data live in different files.

The in-core bundle header will read from a file that has the header,
and will record the path to that file.  We would find the name of
the file that hosts the thin-pack data from the header, and we would
take that name as relative to the file we read the header from.

Signed-off-by: Junio C Hamano <[hidden email]>
---
 builtin/bundle.c |  5 +++--
 bundle.c         | 21 +++++++++++----------
 bundle.h         |  3 ++-
 transport.c      |  3 ++-
 4 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/builtin/bundle.c b/builtin/bundle.c
index 4883a43..e63388d 100644
--- a/builtin/bundle.c
+++ b/builtin/bundle.c
@@ -36,8 +36,9 @@ int cmd_bundle(int argc, const char **argv, const char *prefix)
  }
 
  memset(&header, 0, sizeof(header));
- if (strcmp(cmd, "create") && (bundle_fd =
- read_bundle_header(bundle_file, &header)) < 0)
+ header.bundle_file = bundle_file;
+ if (strcmp(cmd, "create") &&
+    (bundle_fd = read_bundle_header(&header)) < 0)
  return 1;
 
  if (!strcmp(cmd, "verify")) {
diff --git a/bundle.c b/bundle.c
index 9c5a6f0..efe19e0 100644
--- a/bundle.c
+++ b/bundle.c
@@ -21,8 +21,7 @@ static void add_to_ref_list(const unsigned char *sha1, const char *name,
  list->nr++;
 }
 
-static int parse_bundle_header(int fd, struct bundle_header *header,
-       const char *report_path)
+static int parse_bundle_header(int fd, struct bundle_header *header, int quiet)
 {
  struct strbuf buf = STRBUF_INIT;
  int status = 0;
@@ -30,9 +29,9 @@ static int parse_bundle_header(int fd, struct bundle_header *header,
  /* The bundle header begins with the signature */
  if (strbuf_getwholeline_fd(&buf, fd, '\n') ||
     strcmp(buf.buf, bundle_signature)) {
- if (report_path)
+ if (!quiet)
  error(_("'%s' does not look like a v2 bundle file"),
-      report_path);
+      header->bundle_file);
  status = -1;
  goto abort;
  }
@@ -57,7 +56,7 @@ static int parse_bundle_header(int fd, struct bundle_header *header,
  if (get_sha1_hex(buf.buf, sha1) ||
     (buf.len > 40 && !isspace(buf.buf[40])) ||
     (!is_prereq && buf.len <= 40)) {
- if (report_path)
+ if (!quiet)
  error(_("unrecognized header: %s%s (%d)"),
       (is_prereq ? "-" : ""), buf.buf, (int)buf.len);
  status = -1;
@@ -79,13 +78,13 @@ static int parse_bundle_header(int fd, struct bundle_header *header,
  return fd;
 }
 
-int read_bundle_header(const char *path, struct bundle_header *header)
+int read_bundle_header(struct bundle_header *header)
 {
- int fd = open(path, O_RDONLY);
+ int fd = open(header->bundle_file, O_RDONLY);
 
  if (fd < 0)
- return error(_("could not open '%s'"), path);
- return parse_bundle_header(fd, header, path);
+ return error(_("could not open '%s'"), header->bundle_file);
+ return parse_bundle_header(fd, header, 0);
 }
 
 int is_bundle(const char *path, int quiet)
@@ -96,7 +95,7 @@ int is_bundle(const char *path, int quiet)
  if (fd < 0)
  return 0;
  memset(&header, 0, sizeof(header));
- fd = parse_bundle_header(fd, &header, quiet ? NULL : path);
+ fd = parse_bundle_header(fd, &header, quiet);
  if (fd >= 0)
  close(fd);
  return (fd >= 0);
@@ -112,6 +111,8 @@ void release_bundle_header(struct bundle_header *header)
  for (i = 0; i < header->references.nr; i++)
  free(header->references.list[i].name);
  free(header->references.list);
+
+ free((void *)header->bundle_file);
 }
 
 static int list_refs(struct ref_list *r, int argc, const char **argv)
diff --git a/bundle.h b/bundle.h
index f7ce23b..cf771df 100644
--- a/bundle.h
+++ b/bundle.h
@@ -10,12 +10,13 @@ struct ref_list {
 };
 
 struct bundle_header {
+ const char *bundle_file;
  struct ref_list prerequisites;
  struct ref_list references;
 };
 
 int is_bundle(const char *path, int quiet);
-int read_bundle_header(const char *path, struct bundle_header *header);
+int read_bundle_header(struct bundle_header *header);
 int create_bundle(struct bundle_header *header, const char *path,
  int argc, const char **argv);
 int verify_bundle(struct bundle_header *header, int verbose);
diff --git a/transport.c b/transport.c
index 08e15c5..03ce149 100644
--- a/transport.c
+++ b/transport.c
@@ -81,7 +81,8 @@ static struct ref *get_refs_from_bundle(struct transport *transport, int for_pus
 
  if (data->fd > 0)
  close(data->fd);
- data->fd = read_bundle_header(transport->url, &data->header);
+ data->header.bundle_file = xstrdup(transport->url);
+ data->fd = read_bundle_header(&data->header);
  if (data->fd < 0)
  die ("Could not read bundle '%s'.", transport->url);
  for (i = 0; i < data->header.references.nr; i++) {
--
2.8.0-rc0-114-g0b3e5e5

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 1/2] bundle: plug resource leak

Jeff King
In reply to this post by Junio C Hamano
On Tue, Mar 01, 2016 at 03:35:34PM -0800, Junio C Hamano wrote:

> The bundle header structure holds two lists of refs and object
> names, which should be released when the user is done with it.
>
> Signed-off-by: Junio C Hamano <[hidden email]>
> ---
>  bundle.c    | 12 ++++++++++++
>  bundle.h    |  1 +
>  transport.c |  1 +
>  3 files changed, 14 insertions(+)
>
> diff --git a/bundle.c b/bundle.c
> index 506ac49..9c5a6f0 100644
> --- a/bundle.c
> +++ b/bundle.c
> @@ -102,6 +102,18 @@ int is_bundle(const char *path, int quiet)
>   return (fd >= 0);
>  }
>  
> +void release_bundle_header(struct bundle_header *header)
> +{
> + int i;
> +
> + for (i = 0; i < header->prerequisites.nr; i++)
> + free(header->prerequisites.list[i].name);
> + free(header->prerequisites.list);
> + for (i = 0; i < header->references.nr; i++)
> + free(header->references.list[i].name);
> + free(header->references.list);
> +}

Looks good. It's probably not worth adding a release_ref_list() to
handle the repeated data structures.

I do find it hard to believe that the bundle code had to invent its own
ref storage data structure, and couldn't just use "struct ref" like all
of the other code. It doesn't look like we ever sort it or do
non-sequential access. The linked-list "struct ref" probably would have
been fine.

Not a problem you are introducing, of course, but if you are touching
this code a lot, it might be worth seeing how painful it is.

-Peff
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 1/2] bundle: plug resource leak

Junio C Hamano
Jeff King <[hidden email]> writes:

> I do find it hard to believe that the bundle code had to invent its own
> ref storage data structure, and couldn't just use "struct ref" like all
> of the other code. It doesn't look like we ever sort it or do
> non-sequential access. The linked-list "struct ref" probably would have
> been fine.
>
> Not a problem you are introducing, of course, but if you are touching
> this code a lot, it might be worth seeing how painful it is.

The bundle code being fairly old, I actually wouldn't be surprised
if it predated the wide use of "struct ref" ;-)

It is not performance critical to add entries to the list of
prerequisites or references (i.e. it is OK to have these as linear
lists, not linked lists of "struct ref"), and these lists do not
have to be ultra-efficient in their storage use (i.e. it is OK to
replace these with "struct ref" linked lists), so we could go either
way.  It's not like we would be using a lot of helper functions we
already have for "struct ref" in this code, so I'm inclined to give
a very low priority to the task of rethinking this data structure.


--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 2/2] bundle: keep a copy of bundle file name in the in-core bundle header

Jeff King
In reply to this post by Junio C Hamano
On Tue, Mar 01, 2016 at 03:36:26PM -0800, Junio C Hamano wrote:

> This will be necessary when we start reading from a split bundle
> where the header and the thin-pack data live in different files.
>
> The in-core bundle header will read from a file that has the header,
> and will record the path to that file.  We would find the name of
> the file that hosts the thin-pack data from the header, and we would
> take that name as relative to the file we read the header from.

Neat. I'm hoping this means you're working on split bundles. :)

> diff --git a/builtin/bundle.c b/builtin/bundle.c
> index 4883a43..e63388d 100644
> --- a/builtin/bundle.c
> +++ b/builtin/bundle.c
> @@ -36,8 +36,9 @@ int cmd_bundle(int argc, const char **argv, const char *prefix)
>   }
>  
>   memset(&header, 0, sizeof(header));
> - if (strcmp(cmd, "create") && (bundle_fd =
> - read_bundle_header(bundle_file, &header)) < 0)
> + header.bundle_file = bundle_file;

What are the memory ownership rules for header.bundle_file?

Here you assign from either an argv parameter or a stack buffer, and
here...

> @@ -112,6 +111,8 @@ void release_bundle_header(struct bundle_header *header)
>   for (i = 0; i < header->references.nr; i++)
>   free(header->references.list[i].name);
>   free(header->references.list);
> +
> + free((void *)header->bundle_file);
>  }

You free it.

The call in get_refs_from_bundle does do an xstrdup().

Should we have:

  void init_bundle_header(struct bundle_header *header, const char *file)
  {
        memset(header, 0, sizeof(*header));
        header.bundle_file = xstrdup(file);
  }

to abstract the whole procedure?

-Peff
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 1/2] bundle: plug resource leak

Jeff King
In reply to this post by Junio C Hamano
On Wed, Mar 02, 2016 at 01:00:38AM -0800, Junio C Hamano wrote:

> Jeff King <[hidden email]> writes:
>
> > I do find it hard to believe that the bundle code had to invent its own
> > ref storage data structure, and couldn't just use "struct ref" like all
> > of the other code. It doesn't look like we ever sort it or do
> > non-sequential access. The linked-list "struct ref" probably would have
> > been fine.
> >
> > Not a problem you are introducing, of course, but if you are touching
> > this code a lot, it might be worth seeing how painful it is.
>
> The bundle code being fairly old, I actually wouldn't be surprised
> if it predated the wide use of "struct ref" ;-)
>
> It is not performance critical to add entries to the list of
> prerequisites or references (i.e. it is OK to have these as linear
> lists, not linked lists of "struct ref"), and these lists do not
> have to be ultra-efficient in their storage use (i.e. it is OK to
> replace these with "struct ref" linked lists), so we could go either
> way.  It's not like we would be using a lot of helper functions we
> already have for "struct ref" in this code, so I'm inclined to give
> a very low priority to the task of rethinking this data structure.

Sure, I agree it's low priority by itself. It was more something to
consider if you find that you are touching the bundle code a lot.

-Peff
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 2/2] bundle: keep a copy of bundle file name in the in-core bundle header

Junio C Hamano
In reply to this post by Jeff King
Jeff King <[hidden email]> writes:

> On Tue, Mar 01, 2016 at 03:36:26PM -0800, Junio C Hamano wrote:
>
>> This will be necessary when we start reading from a split bundle
>> where the header and the thin-pack data live in different files.
>>
>> The in-core bundle header will read from a file that has the header,
>> and will record the path to that file.  We would find the name of
>> the file that hosts the thin-pack data from the header, and we would
>> take that name as relative to the file we read the header from.
>
> Neat. I'm hoping this means you're working on split bundles. :)

Let's just say that during the -rc freeze period, because I can stop
looking at or queuing completely new topics to encourage people who
are responsible for topics in the upcoming release to focus more on
responding to regressions and follow-up fixes necessary, I have a
better chance of having some leftover time to look into things
myself, at least enough to figure out what needs to be done, ;-)

>> - if (strcmp(cmd, "create") && (bundle_fd =
>> - read_bundle_header(bundle_file, &header)) < 0)
>> + header.bundle_file = bundle_file;
>
> What are the memory ownership rules for header.bundle_file?
>
> Here you assign from either an argv parameter or a stack buffer, and
> here...
>
>> @@ -112,6 +111,8 @@ void release_bundle_header(struct bundle_header *header)
>>   for (i = 0; i < header->references.nr; i++)
>>   free(header->references.list[i].name);
>>   free(header->references.list);
>> +
>> + free((void *)header->bundle_file);
>>  }
>
> You free it.
>
> The call in get_refs_from_bundle does do an xstrdup().

Good eyes.

>
> Should we have:
>
>   void init_bundle_header(struct bundle_header *header, const char *file)
>   {
> memset(header, 0, sizeof(*header));
> header.bundle_file = xstrdup(file);
>   }
>
> to abstract the whole procedure?

Maybe, maybe not.  I'll decide after adding the bundle_version field
to the structure (which will be read from an existing bundle, but
which will have to be set for a bundle being created).

Thanks.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

[PATCH v2 0/4] "split bundle" preview

Junio C Hamano
Here is a preview of the "split bundle" stuff that may serve as one
of the enabling technology to offload "git clone" traffic off of the
server core network to CDN.

Changes:

 - The "checksum" bit about the in-bundle packdata, which was
   incorrect, was dropped from the proposed log message of 1/4.

 - "init_bundle_header()" helper has been added to 3/4; the name of
   the new field in bundle_header structure is now "filename", not
   "bundle_file".  It is silly to name a field with "bundle" in it
   when the structure is about a bundle already.

 - 4/4 is new, and implements the unbundling part, i.e. running
   either "git clone" or "git bundle unbundle" on the two files
   after a split bundle is made available locally.

Junio C Hamano (4):
  bundle doc: 'verify' is not about verifying the bundle
  bundle: plug resource leak
  bundle: keep a copy of bundle file name in the in-core bundle header
  bundle v3: the beginning

 Documentation/git-bundle.txt |   9 ++-
 builtin/bundle.c             |   6 +-
 bundle.c                     | 142 +++++++++++++++++++++++++++++++++++++------
 bundle.h                     |   8 ++-
 t/t5704-bundle.sh            |  64 +++++++++++++++++++
 transport.c                  |   4 +-
 6 files changed, 204 insertions(+), 29 deletions(-)

--
2.8.0-rc0-114-g0b3e5e5

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

[PATCH v2 1/4] bundle doc: 'verify' is not about verifying the bundle

Junio C Hamano
Even though the command does read the bundle header and checks to
see if it looks reasonable, the thin-pack data stream that follows
the header in the bundle file is not checked.

The documentation gives an incorrect impression that the data
contained in the bundle is validated, but the command is to validate
that the receiving repository is ready to accept the bundle, not to
check the validity of a bundle file itself.

Rephrase the paragraph to clarify this.

Signed-off-by: Junio C Hamano <[hidden email]>
---
 Documentation/git-bundle.txt | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/Documentation/git-bundle.txt b/Documentation/git-bundle.txt
index 3a8120c..c0113a8 100644
--- a/Documentation/git-bundle.txt
+++ b/Documentation/git-bundle.txt
@@ -38,11 +38,10 @@ create <file>::
  'git-rev-list-args' arguments to define the bundle contents.
 
 verify <file>::
- Used to check that a bundle file is valid and will apply
- cleanly to the current repository.  This includes checks on the
- bundle format itself as well as checking that the prerequisite
- commits exist and are fully linked in the current repository.
- 'git bundle' prints a list of missing commits, if any, and exits
+ Verifies that the given 'file' has a valid-looking bundle
+ header, and that your repository has all prerequisite
+ objects necessary to unpack the file as a bundle.  The
+ command prints a list of missing commits, if any, and exits
  with a non-zero status.
 
 list-heads <file>::
--
2.8.0-rc0-114-g0b3e5e5

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

[PATCH v2 2/4] bundle: plug resource leak

Junio C Hamano
In reply to this post by Junio C Hamano
The bundle header structure holds two lists of refs and object
names, which should be released when the user is done with it.

Signed-off-by: Junio C Hamano <[hidden email]>
---
 bundle.c    | 12 ++++++++++++
 bundle.h    |  1 +
 transport.c |  1 +
 3 files changed, 14 insertions(+)

diff --git a/bundle.c b/bundle.c
index 506ac49..9c5a6f0 100644
--- a/bundle.c
+++ b/bundle.c
@@ -102,6 +102,18 @@ int is_bundle(const char *path, int quiet)
  return (fd >= 0);
 }
 
+void release_bundle_header(struct bundle_header *header)
+{
+ int i;
+
+ for (i = 0; i < header->prerequisites.nr; i++)
+ free(header->prerequisites.list[i].name);
+ free(header->prerequisites.list);
+ for (i = 0; i < header->references.nr; i++)
+ free(header->references.list[i].name);
+ free(header->references.list);
+}
+
 static int list_refs(struct ref_list *r, int argc, const char **argv)
 {
  int i;
diff --git a/bundle.h b/bundle.h
index 1584e4d..f7ce23b 100644
--- a/bundle.h
+++ b/bundle.h
@@ -23,5 +23,6 @@ int verify_bundle(struct bundle_header *header, int verbose);
 int unbundle(struct bundle_header *header, int bundle_fd, int flags);
 int list_bundle_refs(struct bundle_header *header,
  int argc, const char **argv);
+void release_bundle_header(struct bundle_header *);
 
 #endif
diff --git a/transport.c b/transport.c
index ca3cfa4..08e15c5 100644
--- a/transport.c
+++ b/transport.c
@@ -107,6 +107,7 @@ static int close_bundle(struct transport *transport)
  struct bundle_transport_data *data = transport->data;
  if (data->fd > 0)
  close(data->fd);
+ release_bundle_header(&data->header);
  free(data);
  return 0;
 }
--
2.8.0-rc0-114-g0b3e5e5

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

[PATCH v2 3/4] bundle: keep a copy of bundle file name in the in-core bundle header

Junio C Hamano
In reply to this post by Junio C Hamano
This will be necessary when we start reading from a split bundle
where the header and the thin-pack data live in different files.

The in-core bundle header will read from a file that has the header,
and will record the path to that file.  We would find the name of
the file that hosts the thin-pack data from the header, and we would
take that name as relative to the file we read the header from.

Helped-by: Jeff King <[hidden email]>
Signed-off-by: Junio C Hamano <[hidden email]>
---
 builtin/bundle.c |  6 +++---
 bundle.c         | 27 +++++++++++++++++----------
 bundle.h         |  4 +++-
 transport.c      |  3 ++-
 4 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/builtin/bundle.c b/builtin/bundle.c
index 4883a43..c9ede65 100644
--- a/builtin/bundle.c
+++ b/builtin/bundle.c
@@ -35,9 +35,9 @@ int cmd_bundle(int argc, const char **argv, const char *prefix)
  bundle_file = buffer;
  }
 
- memset(&header, 0, sizeof(header));
- if (strcmp(cmd, "create") && (bundle_fd =
- read_bundle_header(bundle_file, &header)) < 0)
+ init_bundle_header(&header, bundle_file);
+ if (strcmp(cmd, "create") &&
+    (bundle_fd = read_bundle_header(&header)) < 0)
  return 1;
 
  if (!strcmp(cmd, "verify")) {
diff --git a/bundle.c b/bundle.c
index 9c5a6f0..32bdb01 100644
--- a/bundle.c
+++ b/bundle.c
@@ -21,8 +21,13 @@ static void add_to_ref_list(const unsigned char *sha1, const char *name,
  list->nr++;
 }
 
-static int parse_bundle_header(int fd, struct bundle_header *header,
-       const char *report_path)
+void init_bundle_header(struct bundle_header *header, const char *name)
+{
+ memset(header, '\0', sizeof(*header));
+ header->filename = xstrdup(name);
+}
+
+static int parse_bundle_header(int fd, struct bundle_header *header, int quiet)
 {
  struct strbuf buf = STRBUF_INIT;
  int status = 0;
@@ -30,9 +35,9 @@ static int parse_bundle_header(int fd, struct bundle_header *header,
  /* The bundle header begins with the signature */
  if (strbuf_getwholeline_fd(&buf, fd, '\n') ||
     strcmp(buf.buf, bundle_signature)) {
- if (report_path)
+ if (!quiet)
  error(_("'%s' does not look like a v2 bundle file"),
-      report_path);
+      header->filename);
  status = -1;
  goto abort;
  }
@@ -57,7 +62,7 @@ static int parse_bundle_header(int fd, struct bundle_header *header,
  if (get_sha1_hex(buf.buf, sha1) ||
     (buf.len > 40 && !isspace(buf.buf[40])) ||
     (!is_prereq && buf.len <= 40)) {
- if (report_path)
+ if (!quiet)
  error(_("unrecognized header: %s%s (%d)"),
       (is_prereq ? "-" : ""), buf.buf, (int)buf.len);
  status = -1;
@@ -79,13 +84,13 @@ static int parse_bundle_header(int fd, struct bundle_header *header,
  return fd;
 }
 
-int read_bundle_header(const char *path, struct bundle_header *header)
+int read_bundle_header(struct bundle_header *header)
 {
- int fd = open(path, O_RDONLY);
+ int fd = open(header->filename, O_RDONLY);
 
  if (fd < 0)
- return error(_("could not open '%s'"), path);
- return parse_bundle_header(fd, header, path);
+ return error(_("could not open '%s'"), header->filename);
+ return parse_bundle_header(fd, header, 0);
 }
 
 int is_bundle(const char *path, int quiet)
@@ -96,7 +101,7 @@ int is_bundle(const char *path, int quiet)
  if (fd < 0)
  return 0;
  memset(&header, 0, sizeof(header));
- fd = parse_bundle_header(fd, &header, quiet ? NULL : path);
+ fd = parse_bundle_header(fd, &header, quiet);
  if (fd >= 0)
  close(fd);
  return (fd >= 0);
@@ -112,6 +117,8 @@ void release_bundle_header(struct bundle_header *header)
  for (i = 0; i < header->references.nr; i++)
  free(header->references.list[i].name);
  free(header->references.list);
+
+ free((void *)header->filename);
 }
 
 static int list_refs(struct ref_list *r, int argc, const char **argv)
diff --git a/bundle.h b/bundle.h
index f7ce23b..e059ccf 100644
--- a/bundle.h
+++ b/bundle.h
@@ -10,12 +10,14 @@ struct ref_list {
 };
 
 struct bundle_header {
+ const char *filename;
  struct ref_list prerequisites;
  struct ref_list references;
 };
 
 int is_bundle(const char *path, int quiet);
-int read_bundle_header(const char *path, struct bundle_header *header);
+void init_bundle_header(struct bundle_header *, const char *filename);
+int read_bundle_header(struct bundle_header *header);
 int create_bundle(struct bundle_header *header, const char *path,
  int argc, const char **argv);
 int verify_bundle(struct bundle_header *header, int verbose);
diff --git a/transport.c b/transport.c
index 08e15c5..7bd3206 100644
--- a/transport.c
+++ b/transport.c
@@ -81,7 +81,8 @@ static struct ref *get_refs_from_bundle(struct transport *transport, int for_pus
 
  if (data->fd > 0)
  close(data->fd);
- data->fd = read_bundle_header(transport->url, &data->header);
+ init_bundle_header(&data->header, transport->url);
+ data->fd = read_bundle_header(&data->header);
  if (data->fd < 0)
  die ("Could not read bundle '%s'.", transport->url);
  for (i = 0; i < data->header.references.nr; i++) {
--
2.8.0-rc0-114-g0b3e5e5

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

[PATCH v2 4/4] bundle v3: the beginning

Junio C Hamano
In reply to this post by Junio C Hamano
The bundle v3 format introduces an ability to have the bundle header
(which describes what references in the bundled history can be
fetched, and what objects the receiving repository must have in
order to unbundle it successfully) in one file, and the bundled pack
stream data in a separate file.

A v3 bundle file begins with a line with "# v3 git bundle", followed
by zero or more "extended header" lines, and an empty line, finally
followed by the list of prerequisites and references in the same
format as v2 bundle.  If it uses the "split bundle" feature, there
is a "data: $URL" extended header line, and nothing follows the list
of prerequisites and references.  Also, "sha1: " extended header
line may exist to help validating that the pack stream data matches
the bundle header.

A typical expected use of a split bundle is to help initial clone
that involves a huge data transfer, and would go like this:

 - Any repository people would clone and fetch from would regularly
   be repacked, and it is expected that there would be a packfile
   without prerequisites that holds all (or at least most) of the
   history of it (call it pack-$name.pack).

 - After arranging that packfile to be downloadable over popular
   transfer methods used for serving static files (such as HTTP or
   HTTPS) that are easily resumable as $URL/pack-$name.pack, a v3
   bundle file (call it $name.bndl) can be prepared with an extended
   header "data: $URL/pack-$name.pack" to point at the download
   location for the packfile, and be served at "$URL/$name.bndl".

 - An updated Git client, when trying to "git clone" from such a
   repository, may be redirected to $URL/$name.bndl", which would be
   a tiny text file (when split bundle feature is used).

 - The client would then inspect the downloaded $name.bndl, learn
   that the corresponding packfile exists at $URL/pack-$name.pack,
   and downloads it as pack-$name.pack, until the download succeeds.
   This can easily be done with "wget --continue" equivalent over an
   unreliable link.  The checksum recorded on the "sha1: " header
   line is expected to be used by this downloader (not written yet).

 - After fully downloading $name.bndl and pack-$name.pack and
   storing them next to each other, the client would clone from the
   $name.bndl; this would populate the newly created repository with
   reasonably recent history.

 - Then the client can issue "git fetch" against the original
   repository to obtain the most recent part of the history created
   since the bundle was made.

Signed-off-by: Junio C Hamano <[hidden email]>
---
 bundle.c          | 103 +++++++++++++++++++++++++++++++++++++++++++++++++-----
 bundle.h          |   3 ++
 t/t5704-bundle.sh |  64 +++++++++++++++++++++++++++++++++
 3 files changed, 161 insertions(+), 9 deletions(-)

diff --git a/bundle.c b/bundle.c
index 32bdb01..480630d 100644
--- a/bundle.c
+++ b/bundle.c
@@ -10,7 +10,8 @@
 #include "refs.h"
 #include "argv-array.h"
 
-static const char bundle_signature[] = "# v2 git bundle\n";
+static const char bundle_signature_v2[] = "# v2 git bundle\n";
+static const char bundle_signature_v3[] = "# v3 git bundle\n";
 
 static void add_to_ref_list(const unsigned char *sha1, const char *name,
  struct ref_list *list)
@@ -33,16 +34,55 @@ static int parse_bundle_header(int fd, struct bundle_header *header, int quiet)
  int status = 0;
 
  /* The bundle header begins with the signature */
- if (strbuf_getwholeline_fd(&buf, fd, '\n') ||
-    strcmp(buf.buf, bundle_signature)) {
+ if (strbuf_getwholeline_fd(&buf, fd, '\n')) {
+ bad_bundle:
  if (!quiet)
- error(_("'%s' does not look like a v2 bundle file"),
+ error(_("'%s' does not look like a supported bundle file"),
       header->filename);
  status = -1;
  goto abort;
  }
 
- /* The bundle header ends with an empty line */
+ if (!strcmp(buf.buf, bundle_signature_v2))
+ header->bundle_version = 2;
+ else if (!strcmp(buf.buf, bundle_signature_v3))
+ header->bundle_version = 3;
+ else
+ goto bad_bundle;
+
+ if (header->bundle_version == 3) {
+ /*
+ * bundle version v3 has extended headers before the
+ * list of prerequisites and references.  The extended
+ * headers end with an empty line.
+ */
+ while (!strbuf_getwholeline_fd(&buf, fd, '\n')) {
+ const char *cp;
+ if (buf.len && buf.buf[buf.len - 1] == '\n')
+ buf.buf[--buf.len] = '\0';
+ if (!buf.len)
+ break;
+ if (skip_prefix(buf.buf, "data: ", &cp)) {
+ header->datafile = xstrdup(cp);
+ continue;
+ }
+ if (skip_prefix(buf.buf, "sha1: ", &cp)) {
+ unsigned char sha1[GIT_SHA1_RAWSZ];
+ if (get_sha1_hex(cp, sha1) ||
+    cp[GIT_SHA1_HEXSZ])
+ goto bad_bundle;
+ hashcpy(header->csum, sha1);
+ continue;
+ }
+
+ goto bad_bundle;
+ }
+ }
+
+ /*
+ * The bundle header lists prerequisites and
+ * references, and the list ends with an empty line.
+ */
  while (!strbuf_getwholeline_fd(&buf, fd, '\n') &&
        buf.len && buf.buf[0] != '\n') {
  unsigned char sha1[20];
@@ -77,7 +117,8 @@ static int parse_bundle_header(int fd, struct bundle_header *header, int quiet)
 
  abort:
  if (status) {
- close(fd);
+ if (0 <= fd)
+ close(fd);
  fd = -1;
  }
  strbuf_release(&buf);
@@ -426,6 +467,7 @@ int create_bundle(struct bundle_header *header, const char *path,
  int bundle_to_stdout;
  int ref_count = 0;
  struct rev_info revs;
+ const char *bundle_signature = bundle_signature_v2;
 
  bundle_to_stdout = !strcmp(path, "-");
  if (bundle_to_stdout)
@@ -480,22 +522,65 @@ int create_bundle(struct bundle_header *header, const char *path,
  return 0;
 }
 
+/*
+ * v3 "split bundle" allows a separate packfile to be named
+ * as "data: $URL/$name_of_the_packfile".  This file is expected
+ * to be downloaded next to the bundle header file when the
+ * bundle is used.  Hence we find the path to the directory
+ * that contains the bundle header file, and append the basename
+ * part of the bundle_data_file to it, to form the name of the
+ * file that holds the pack data stream.
+ */
+static int open_bundle_data(struct bundle_header *header)
+{
+ const char *cp;
+ struct strbuf filename = STRBUF_INIT;
+ int fd;
+
+ assert(header->datafile);
+
+ cp = find_last_dir_sep(header->filename);
+ if (cp)
+ strbuf_add(&filename, header->filename,
+   (cp - header->filename) + 1);
+ cp = find_last_dir_sep(header->datafile);
+ if (!cp)
+ cp = header->datafile;
+ strbuf_addstr(&filename, cp);
+
+ fd = open(filename.buf, O_RDONLY);
+ strbuf_release(&filename);
+ return fd;
+}
+
 int unbundle(struct bundle_header *header, int bundle_fd, int flags)
 {
  const char *argv_index_pack[] = {"index-pack",
  "--fix-thin", "--stdin", NULL, NULL};
  struct child_process ip = CHILD_PROCESS_INIT;
+ int status = 0, data_fd = -1;
 
  if (flags & BUNDLE_VERBOSE)
  argv_index_pack[3] = "-v";
 
  if (verify_bundle(header, 0))
  return -1;
+
+ if (header->datafile) {
+ data_fd = open_bundle_data(header);
+ if (data_fd < 0)
+ return error(_("bundle data not found"));
+ ip.in = data_fd;
+ } else {
+ ip.in = bundle_fd;
+ }
+
  ip.argv = argv_index_pack;
- ip.in = bundle_fd;
  ip.no_stdout = 1;
  ip.git_cmd = 1;
  if (run_command(&ip))
- return error(_("index-pack died"));
- return 0;
+ status = error(_("index-pack died"));
+ if (0 <= data_fd)
+ close(data_fd);
+ return status;
 }
diff --git a/bundle.h b/bundle.h
index e059ccf..db55dc7 100644
--- a/bundle.h
+++ b/bundle.h
@@ -10,7 +10,10 @@ struct ref_list {
 };
 
 struct bundle_header {
+ int bundle_version;
  const char *filename;
+ const char *datafile;
+ unsigned char csum[GIT_SHA1_RAWSZ];
  struct ref_list prerequisites;
  struct ref_list references;
 };
diff --git a/t/t5704-bundle.sh b/t/t5704-bundle.sh
index 348d9b3..e68523c 100755
--- a/t/t5704-bundle.sh
+++ b/t/t5704-bundle.sh
@@ -71,4 +71,68 @@ test_expect_success 'prerequisites with an empty commit message' '
  git bundle verify bundle
 '
 
+# bundle v3 (experimental)
+test_expect_success 'clone from v3' '
+
+ # as "bundle create" does not exist yet for v3
+ # prepare it by hand here
+ head=$(git rev-parse HEAD) &&
+ name=$(echo $head | git pack-objects --revs v3) &&
+ test_when_finished "rm v3-$name.pack v3-$name.idx" &&
+ cat >v3.bndl <<-EOF &&
+ # v3 git bundle
+ data: v3-$name.pack
+
+ $head HEAD
+ $head refs/heads/master
+ EOF
+
+ git bundle verify v3.bndl &&
+ git bundle list-heads v3.bndl >actual &&
+ cat >expect <<-EOF &&
+ $head HEAD
+ $head refs/heads/master
+ EOF
+ test_cmp expect actual &&
+
+ git clone v3.bndl v3dst &&
+ git -C v3dst for-each-ref --format="%(objectname) %(refname)" >actual &&
+ cat >expect <<-EOF &&
+ $head refs/heads/master
+ $head refs/remotes/origin/HEAD
+ $head refs/remotes/origin/master
+ EOF
+ test_cmp expect actual &&
+ git -C v3dst fsck &&
+
+ # an "inline" v3 is still possible.
+ cat >v3i.bndl <<-EOF &&
+ # v3 git bundle
+
+ $head HEAD
+ $head refs/heads/master
+
+ EOF
+ cat v3-$name.pack >>v3i.bndl &&
+ test_when_finished "rm v3i.bndl" &&
+
+ git bundle verify v3i.bndl &&
+ git bundle list-heads v3i.bndl >actual &&
+ cat >expect <<-EOF &&
+ $head HEAD
+ $head refs/heads/master
+ EOF
+ test_cmp expect actual &&
+
+ git clone v3i.bndl v3idst &&
+ git -C v3idst for-each-ref --format="%(objectname) %(refname)" >actual &&
+ cat >expect <<-EOF &&
+ $head refs/heads/master
+ $head refs/remotes/origin/HEAD
+ $head refs/remotes/origin/master
+ EOF
+ test_cmp expect actual &&
+ git -C v3idst fsck
+'
+
 test_done
--
2.8.0-rc0-114-g0b3e5e5

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH v2 3/4] bundle: keep a copy of bundle file name in the in-core bundle header

Jeff King
In reply to this post by Junio C Hamano
On Wed, Mar 02, 2016 at 12:32:40PM -0800, Junio C Hamano wrote:

> + free((void *)header->filename);

This cast is necessary, because...

> diff --git a/bundle.h b/bundle.h
> index f7ce23b..e059ccf 100644
> --- a/bundle.h
> +++ b/bundle.h
> @@ -10,12 +10,14 @@ struct ref_list {
>  };
>  
>  struct bundle_header {
> + const char *filename;
>   struct ref_list prerequisites;
>   struct ref_list references;
>  };

...this is const, even though we know it is allocated on the heap.

I am OK if we want to keep it "conceptually const" so that anybody
looking at the struct knows they should not touch it. But I am also OK
with just making it "char *".

-Peff
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH v2 4/4] bundle v3: the beginning

Duy Nguyen
In reply to this post by Junio C Hamano
On Thu, Mar 3, 2016 at 3:32 AM, Junio C Hamano <[hidden email]> wrote:
>  - After arranging that packfile to be downloadable over popular
>    transfer methods used for serving static files (such as HTTP or
>    HTTPS) that are easily resumable as $URL/pack-$name.pack, a v3
>    bundle file (call it $name.bndl) can be prepared with an extended
>    header "data: $URL/pack-$name.pack" to point at the download
>    location for the packfile, and be served at "$URL/$name.bndl".

Extra setup to offload things to CDN is great and all. But would it be
ok if we introduced a minimal resumable download service via
git-daemon to enable this feature with very little setup? Like
git-shell, you can only download certain packfiles for this use case
and nothing else with this service.
--
Duy
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH v2 4/4] bundle v3: the beginning

Junio C Hamano
Duy Nguyen <[hidden email]> writes:

> would it be
> ok if we introduced a minimal resumable download service via
> git-daemon to enable this feature with very little setup? Like
> git-shell, you can only download certain packfiles for this use case
> and nothing else with this service.

I think it is a matter of priorities.

A minimalistic site that offers only git-daemon traffic without a
working HTTP server would certainly benefit from such a thing, but
serving static files efficiently over the web is commodity service
these days.  Wouldn't it be sufficient to just recommend having a
normal HTTP server serving static files, which should be "very
little setup" in today's world?

Such a "minimal resumable download service" over the git-daemon
transport still has to reinvent what is already done well by the
HTTP servers and clients (e.g. support of ETag equivalent to make
sure that the client can notice that the underlying data has changed
for a given resource, headers to communicate the total length,
making a range request and responding to it, etc. etc.).

In addition,, by going the custom protocol route, you wouldn't
benefit from caching HTTP proxies available to the clients.

So I am not sure if the benefit outweighs the cost.

I wouldn't stop you if you really want to do it, but again, it is a
matter of priorities.  I personally feel that it would be a waste of
engineering talent, and it certainly would be a waste of review
bandwidth, if you gave priority to this over other more widely
useful parts of the system.  The procedure to repack should be
updated to produce such a base pack with the separate bundle header
on the server side, the protocol needs to be updated to allow
redirection for "clone" traffic, the logic to decide when to
redirect must be designed (e.g. "single branch" clone should not
choose a pack/bundle that represents the full repository, but a pack
for the branch that was asked), etc.  There are still tons of things
that need to be done, and it would be a distraction to invent a
custom download service nobody other than git-daemon talks before
all of the above is done.

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH v2 4/4] bundle v3: the beginning

Duy Nguyen
On Thu, Mar 3, 2016 at 9:57 AM, Junio C Hamano <[hidden email]> wrote:

> Duy Nguyen <[hidden email]> writes:
>
>> would it be
>> ok if we introduced a minimal resumable download service via
>> git-daemon to enable this feature with very little setup? Like
>> git-shell, you can only download certain packfiles for this use case
>> and nothing else with this service.
>
> I think it is a matter of priorities.
>
> A minimalistic site that offers only git-daemon traffic without a
> working HTTP server would certainly benefit from such a thing, but
> serving static files efficiently over the web is commodity service
> these days.  Wouldn't it be sufficient to just recommend having a
> normal HTTP server serving static files, which should be "very
> little setup" in today's world?
>
> Such a "minimal resumable download service" over the git-daemon
> transport still has to reinvent what is already done well by the
> HTTP servers and clients (e.g. support of ETag equivalent to make
> sure that the client can notice that the underlying data has changed
> for a given resource, headers to communicate the total length,
> making a range request and responding to it, etc. etc.).
>
> In addition,, by going the custom protocol route, you wouldn't
> benefit from caching HTTP proxies available to the clients.
>
> So I am not sure if the benefit outweighs the cost.

What I had in mind was individuals who just want to publish their work
over git://. Right now it's just a matter of running git-daemon and
configuring it a bit. If it was me, I wouldn't expect all the bells
and whistles that come with http. But I agree that this is low
priority, "scratch your own itch" kind of thing. Let's have resumable
clone with standard download protocols first, then we'll see.
--
Duy
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH v2 4/4] bundle v3: the beginning

Christian Couder-2
In reply to this post by Junio C Hamano
I am responding to this 2+ month old email because I am investigating
adding an alternate object store at the same level as loose and packed
objects. This alternate object store could be used for large files. I
am working on this for GitLab. (Yeah, I am working, as a freelance,
for both Booking.com and GitLab these days.)

On Wed, Mar 2, 2016 at 9:32 PM, Junio C Hamano <[hidden email]> wrote:

> The bundle v3 format introduces an ability to have the bundle header
> (which describes what references in the bundled history can be
> fetched, and what objects the receiving repository must have in
> order to unbundle it successfully) in one file, and the bundled pack
> stream data in a separate file.
>
> A v3 bundle file begins with a line with "# v3 git bundle", followed
> by zero or more "extended header" lines, and an empty line, finally
> followed by the list of prerequisites and references in the same
> format as v2 bundle.  If it uses the "split bundle" feature, there
> is a "data: $URL" extended header line, and nothing follows the list
> of prerequisites and references.  Also, "sha1: " extended header
> line may exist to help validating that the pack stream data matches
> the bundle header.
>
> A typical expected use of a split bundle is to help initial clone
> that involves a huge data transfer, and would go like this:
>
>  - Any repository people would clone and fetch from would regularly
>    be repacked, and it is expected that there would be a packfile
>    without prerequisites that holds all (or at least most) of the
>    history of it (call it pack-$name.pack).
>
>  - After arranging that packfile to be downloadable over popular
>    transfer methods used for serving static files (such as HTTP or
>    HTTPS) that are easily resumable as $URL/pack-$name.pack, a v3
>    bundle file (call it $name.bndl) can be prepared with an extended
>    header "data: $URL/pack-$name.pack" to point at the download
>    location for the packfile, and be served at "$URL/$name.bndl".
>
>  - An updated Git client, when trying to "git clone" from such a
>    repository, may be redirected to $URL/$name.bndl", which would be
>    a tiny text file (when split bundle feature is used).
>
>  - The client would then inspect the downloaded $name.bndl, learn
>    that the corresponding packfile exists at $URL/pack-$name.pack,
>    and downloads it as pack-$name.pack, until the download succeeds.
>    This can easily be done with "wget --continue" equivalent over an
>    unreliable link.  The checksum recorded on the "sha1: " header
>    line is expected to be used by this downloader (not written yet).

I wonder if this mechanism could also be used or extended to clone and
fetch an alternate object database.

In [1], [2] and [3], and this was also discussed during the
Contributor Summit last month, Peff says that he started working on
alternate object database support a long time ago, and that the hard
part is a protocol extension to tell remotes that you can access some
objects in a different way.

If a Git client would download a "$name.bndl" v3 bundle file that
would have a "data: $URL/alt-odb-$name.odb" extended header, the Git
client would just need to download "$URL/alt-odb-$name.odb" and use
the alternate object database support on this file.

This way it would know all it has to know to access the objects in the
alternate database. The alternate object database may not contain the
real objects, if they are too big for example, but just files that
describe how to get the real objects.

>  - After fully downloading $name.bndl and pack-$name.pack and
>    storing them next to each other, the client would clone from the
>    $name.bndl; this would populate the newly created repository with
>    reasonably recent history.
>
>  - Then the client can issue "git fetch" against the original
>    repository to obtain the most recent part of the history created
>    since the bundle was made.

[1] http://thread.gmane.org/gmane.comp.version-control.git/206886/focus=207040
[2] http://thread.gmane.org/gmane.comp.version-control.git/247171
[3] http://thread.gmane.org/gmane.comp.version-control.git/202902/focus=203020
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html