[PATCH] blame: can specify shas of commits to ignore on command line

classic Classic list List threaded Threaded
8 messages Options
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

[PATCH] blame: can specify shas of commits to ignore on command line

Dylan Reid
Add the ability for git-blame to ignore changes that occur in
certain commits.  The new "-I <sha>" argument can be used to specify
a commit that should be ignored.  This is useful if you have a
few commits that you know didn't cause a problem, for example you
switched from "u8" to "uint8_t" and it messed up your history.

   Allow multiple commits to be specified and store an array in the
scoreboard structure so it is accessible.  Add should_ignore_commit
function to check if a commit should be ignored. Call
"should_ignore_commit" from blame_overlap and if the commit should
be ignored then pass all the blame on to the parent of the ignored
commit.  This is done by calling "pass_whole_blame" which has been
relocated to a above blame_overlap, but is unchanged.

Signed-off-by: Dylan Reid <[hidden email]>
---
 Documentation/blame-options.txt |    6 ++
 builtin/blame.c                 |  124 +++++++++++++++++++++++++++++----------
 t/t8003-blame.sh                |   42 +++++++++++++
 3 files changed, 141 insertions(+), 31 deletions(-)

diff --git a/Documentation/blame-options.txt b/Documentation/blame-options.txt
index d820569..eb9c825 100644
--- a/Documentation/blame-options.txt
+++ b/Documentation/blame-options.txt
@@ -52,6 +52,12 @@ of lines before or after the line given by <start>.
 --porcelain::
  Show in a format designed for machine consumption.
 
+--ignore-commit <sha>::
+ Ignore the specified commit when assigning blame.  This is
+ useful if there are commits in the history that make non
+ functional changes to the lines you are interested in
+ finding blame for.
+
 --incremental::
  Show the result incrementally in a format designed for
  machine consumption.
diff --git a/builtin/blame.c b/builtin/blame.c
index fc15863..e4bd095 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -176,6 +176,14 @@ struct blame_entry {
 };
 
 /*
+ * List of any commits to ignore
+ */
+struct ignore_commits {
+ unsigned char (*ignore_shas)[20];
+ unsigned num_ignore_shas;
+};
+
+/*
  * The current state of the blame assignment.
  */
 struct scoreboard {
@@ -198,6 +206,9 @@ struct scoreboard {
  /* look-up a line in the final buffer */
  int num_lines;
  int *lineno;
+
+ /* List of the shas of commits that should be ignored */
+ struct ignore_commits *ignored_commits;
 };
 
 static inline int same_suspect(struct origin *a, struct origin *b)
@@ -653,6 +664,44 @@ static void decref_split(struct blame_entry *split)
 }
 
 /*
+ * The blobs of origin and porigin exactly match, so everything
+ * origin is suspected for can be blamed on the parent.
+ */
+static void pass_whole_blame(struct scoreboard *sb,
+     struct origin *origin, struct origin *porigin)
+{
+   struct blame_entry *e;
+
+   if (!porigin->file.ptr && origin->file.ptr) {
+      /* Steal its file */
+      porigin->file = origin->file;
+      origin->file.ptr = NULL;
+   }
+   for (e = sb->ent; e; e = e->next) {
+      if (!same_suspect(e->suspect, origin))
+ continue;
+      origin_incref(porigin);
+      origin_decref(e->suspect);
+      e->suspect = porigin;
+   }
+}
+
+/*
+ * Helper to determine if the given commit should be ignored
+ */
+static unsigned should_ignore_commit(const struct scoreboard *sb,
+     struct commit *commit)
+{
+ unsigned i;
+ unsigned num_shas = sb->ignored_commits->num_ignore_shas;
+ for(i = 0; i < num_shas; i++)
+ if(!hashcmp(commit->object.sha1,
+    sb->ignored_commits->ignore_shas[i]))
+ return 1;
+ return 0;
+}
+
+/*
  * Helper for blame_chunk().  blame_entry e is known to overlap with
  * the patch hunk; split it and pass blame to the parent.
  */
@@ -660,12 +709,15 @@ static void blame_overlap(struct scoreboard *sb, struct blame_entry *e,
   int tlno, int plno, int same,
   struct origin *parent)
 {
- struct blame_entry split[3];
-
- split_overlap(split, e, tlno, plno, same, parent);
- if (split[1].suspect)
- split_blame(sb, split, e);
- decref_split(split);
+ if(should_ignore_commit(sb, e->suspect->commit))
+ pass_whole_blame(sb, e->suspect, parent);
+ else {
+ struct blame_entry split[3];
+ split_overlap(split, e, tlno, plno, same, parent);
+ if (split[1].suspect)
+ split_blame(sb, split, e);
+ decref_split(split);
+ }
 }
 
 /*
@@ -1105,29 +1157,6 @@ static int find_copy_in_parent(struct scoreboard *sb,
 }
 
 /*
- * The blobs of origin and porigin exactly match, so everything
- * origin is suspected for can be blamed on the parent.
- */
-static void pass_whole_blame(struct scoreboard *sb,
-     struct origin *origin, struct origin *porigin)
-{
- struct blame_entry *e;
-
- if (!porigin->file.ptr && origin->file.ptr) {
- /* Steal its file */
- porigin->file = origin->file;
- origin->file.ptr = NULL;
- }
- for (e = sb->ent; e; e = e->next) {
- if (!same_suspect(e->suspect, origin))
- continue;
- origin_incref(porigin);
- origin_decref(e->suspect);
- e->suspect = porigin;
- }
-}
-
-/*
  * We pass blame from the current commit to its parents.  We keep saying
  * "parent" (and "porigin"), but what we mean is to find scapegoat to
  * exonerate ourselves.
@@ -1540,8 +1569,14 @@ static void assign_blame(struct scoreboard *sb, int opt)
 
  /* Take responsibility for the remaining entries */
  for (ent = sb->ent; ent; ent = ent->next)
- if (same_suspect(ent->suspect, suspect))
- found_guilty_entry(ent);
+ if (same_suspect(ent->suspect, suspect)) {
+ if (should_ignore_commit(sb, commit) &&
+    ent->suspect->previous)
+ pass_whole_blame(sb, ent->suspect,
+ ent->suspect->previous);
+ else
+ found_guilty_entry(ent);
+   }
  origin_decref(suspect);
 
  if (DEBUG) /* sanity */
@@ -2204,6 +2239,24 @@ static int blame_bottomtop_callback(const struct option *option, const char *arg
  return 0;
 }
 
+static int blame_ignorecommit_callback(const struct option *option,
+       const char *arg, int unset)
+{
+ struct ignore_commits *commits = option->value;
+ if (!arg)
+ return -1;
+ commits->ignore_shas =
+ realloc(commits->ignore_shas,
+ ( (commits->num_ignore_shas + 1) *
+  20 ));
+ if(!get_sha1(arg, commits->ignore_shas[commits->num_ignore_shas]))
+ commits->num_ignore_shas++;
+ else
+ return -1;
+
+ return 0;
+}
+
 int cmd_blame(int argc, const char **argv, const char *prefix)
 {
  struct rev_info revs;
@@ -2215,6 +2268,7 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
  const char *final_commit_name = NULL;
  enum object_type type;
 
+ static struct ignore_commits ignored_commits;
  static const char *bottomtop = NULL;
  static int output_option = 0, opt = 0;
  static int show_stats = 0;
@@ -2239,6 +2293,7 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
  { OPTION_CALLBACK, 'C', NULL, &opt, "score", "Find line copies within and across files", PARSE_OPT_OPTARG, blame_copy_callback },
  { OPTION_CALLBACK, 'M', NULL, &opt, "score", "Find line movements within and across files", PARSE_OPT_OPTARG, blame_move_callback },
  OPT_CALLBACK('L', NULL, &bottomtop, "n,m", "Process only line range n,m, counting from 1", blame_bottomtop_callback),
+ OPT_CALLBACK(0, "ignore-commit", &ignored_commits, "sha", "don't blame the specified commit for anything", blame_ignorecommit_callback),
  OPT_END()
  };
 
@@ -2252,6 +2307,8 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
  save_commit_buffer = 0;
  dashdash_pos = 0;
 
+ memset(&ignored_commits, 0, sizeof(ignored_commits));
+
  parse_options_start(&ctx, argc, argv, prefix, PARSE_OPT_KEEP_DASHDASH |
     PARSE_OPT_KEEP_ARGV0);
  for (;;) {
@@ -2369,6 +2426,8 @@ parse_done:
  setup_revisions(argc, argv, &revs, NULL);
  memset(&sb, 0, sizeof(sb));
 
+ sb.ignored_commits = &ignored_commits;
+
  sb.revs = &revs;
  if (!reverse)
  final_commit_name = prepare_final(&sb);
@@ -2468,6 +2527,9 @@ parse_done:
  ent = e;
  }
 
+ if (ignored_commits.ignore_shas)
+ free(ignored_commits.ignore_shas);
+
  if (show_stats) {
  printf("num read blob: %d\n", num_read_blob);
  printf("num get patch: %d\n", num_get_patch);
diff --git a/t/t8003-blame.sh b/t/t8003-blame.sh
index 230143c..f90d37a 100755
--- a/t/t8003-blame.sh
+++ b/t/t8003-blame.sh
@@ -185,4 +185,46 @@ test_expect_success 'indent of line numbers, ten lines' '
  test $(grep -c "  " actual) = 9
 '
 
+test_expect_success 'blame ignore setup with split' '
+ echo A A A >one
+ echo b b b b b >> one
+ echo c c c c c >> one
+ git add one
+ GIT_AUTHOR_NAME=AddLine git commit -m "add As"
+ INIT_ADD_SHA=`git log --pretty=format:"%H" HEAD^..`
+ echo " " A A A >one
+ echo b b b b b >> one
+ echo c c c c c >> one
+ git add one
+ GIT_AUTHOR_NAME=Indent git commit -m "reindent"
+ IGNORE_SHA=`git log --pretty=format:"%H" HEAD^..`
+'
+
+test_expect_success 'blame ignore commit' '
+ git blame --ignore-commit $IGNORE_SHA one > ignored &&
+ test $(grep -c "AddLine" ignored) = 3
+'
+
+test_expect_success 'blame ignore first commit' '
+ git blame --ignore-commit $IGNORE_SHA --ignore-commit $INIT_ADD_SHA \
+ $INIT_ADD_SHA.. -- one > ignore_1st &&
+ test $(grep -c "AddLine" ignore_1st) = 3
+'
+
+test_expect_success 'blame ignore setup avoid blame_overlap' '
+ echo A A A >one
+ git add one
+ GIT_AUTHOR_NAME=AddLine git commit -m "add As"
+ INIT_ADD_SHA=`git log --pretty=format:"%H" HEAD^..`
+ echo " " A A A >one
+ git add one
+ GIT_AUTHOR_NAME=Indent git commit -m "reindent"
+ IGNORE_SHA=`git log --pretty=format:"%H" HEAD^..`
+'
+
+test_expect_success 'blame ignore commit no blame_overlap call' '
+ git blame --ignore-commit $IGNORE_SHA one > ignored &&
+ test $(grep -c "Indent" ignored) = 0
+'
+
 test_done
--
1.7.1.6.gc7a9

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: [PATCH] blame: can specify shas of commits to ignore on command line

René Scharfe
Am 04.05.2010 04:21, schrieb Dylan Reid:
> Add the ability for git-blame to ignore changes that occur in
> certain commits.  The new "-I <sha>" argument can be used to specify
> a commit that should be ignored.  This is useful if you have a
> few commits that you know didn't cause a problem, for example you
> switched from "u8" to "uint8_t" and it messed up your history.

Only the long option --ignore-commit works with your patch, -I doesn't.

>    Allow multiple commits to be specified and store an array in the
> scoreboard structure so it is accessible.  Add should_ignore_commit
> function to check if a commit should be ignored. Call
> "should_ignore_commit" from blame_overlap and if the commit should
> be ignored then pass all the blame on to the parent of the ignored
> commit.  This is done by calling "pass_whole_blame" which has been
> relocated to a above blame_overlap, but is unchanged.
>
> Signed-off-by: Dylan Reid <[hidden email]>
> ---
>  Documentation/blame-options.txt |    6 ++
>  builtin/blame.c                 |  124 +++++++++++++++++++++++++++++----------
>  t/t8003-blame.sh                |   42 +++++++++++++
>  3 files changed, 141 insertions(+), 31 deletions(-)

Oh, nice!  And with tests and documentation! :)

> diff --git a/Documentation/blame-options.txt b/Documentation/blame-options.txt
> index d820569..eb9c825 100644
> --- a/Documentation/blame-options.txt
> +++ b/Documentation/blame-options.txt
> @@ -52,6 +52,12 @@ of lines before or after the line given by <start>.
>  --porcelain::
>   Show in a format designed for machine consumption.
>  
> +--ignore-commit <sha>::
> + Ignore the specified commit when assigning blame.  This is
> + useful if there are commits in the history that make non
> + functional changes to the lines you are interested in
> + finding blame for.
> +
>  --incremental::
>   Show the result incrementally in a format designed for
>   machine consumption.
> diff --git a/builtin/blame.c b/builtin/blame.c
> index fc15863..e4bd095 100644
> --- a/builtin/blame.c
> +++ b/builtin/blame.c
> @@ -176,6 +176,14 @@ struct blame_entry {
>  };
>  
>  /*
> + * List of any commits to ignore
> + */
> +struct ignore_commits {
> + unsigned char (*ignore_shas)[20];
> + unsigned num_ignore_shas;
> +};
> +
> +/*
>   * The current state of the blame assignment.
>   */
>  struct scoreboard {
> @@ -198,6 +206,9 @@ struct scoreboard {
>   /* look-up a line in the final buffer */
>   int num_lines;
>   int *lineno;
> +
> + /* List of the shas of commits that should be ignored */
> + struct ignore_commits *ignored_commits;
>  };
>  
>  static inline int same_suspect(struct origin *a, struct origin *b)
> @@ -653,6 +664,44 @@ static void decref_split(struct blame_entry *split)
>  }
>  
>  /*
> + * The blobs of origin and porigin exactly match, so everything
> + * origin is suspected for can be blamed on the parent.
> + */
> +static void pass_whole_blame(struct scoreboard *sb,
> +     struct origin *origin, struct origin *porigin)
> +{
> +   struct blame_entry *e;
> +
> +   if (!porigin->file.ptr && origin->file.ptr) {
> +      /* Steal its file */
> +      porigin->file = origin->file;
> +      origin->file.ptr = NULL;
> +   }
> +   for (e = sb->ent; e; e = e->next) {
> +      if (!same_suspect(e->suspect, origin))
> + continue;
> +      origin_incref(porigin);
> +      origin_decref(e->suspect);
> +      e->suspect = porigin;
> +   }
> +}

This function was moved from below, but it seems to be indented with
three spaces instead of tabs now.  Adding a declaration without moving
the function would avoid that and result in a smaller patch.

> +
> +/*
> + * Helper to determine if the given commit should be ignored
> + */
> +static unsigned should_ignore_commit(const struct scoreboard *sb,
> +     struct commit *commit)
> +{
> + unsigned i;
> + unsigned num_shas = sb->ignored_commits->num_ignore_shas;
> + for(i = 0; i < num_shas; i++)
> + if(!hashcmp(commit->object.sha1,
> +    sb->ignored_commits->ignore_shas[i]))
> + return 1;
> + return 0;
> +}
> +
> +/*
>   * Helper for blame_chunk().  blame_entry e is known to overlap with
>   * the patch hunk; split it and pass blame to the parent.
>   */
> @@ -660,12 +709,15 @@ static void blame_overlap(struct scoreboard *sb, struct blame_entry *e,
>    int tlno, int plno, int same,
>    struct origin *parent)
>  {
> - struct blame_entry split[3];
> -
> - split_overlap(split, e, tlno, plno, same, parent);
> - if (split[1].suspect)
> - split_blame(sb, split, e);
> - decref_split(split);
> + if(should_ignore_commit(sb, e->suspect->commit))
> + pass_whole_blame(sb, e->suspect, parent);
> + else {
> + struct blame_entry split[3];
> + split_overlap(split, e, tlno, plno, same, parent);
> + if (split[1].suspect)
> + split_blame(sb, split, e);
> + decref_split(split);
> + }
>  }
>  
>  /*
> @@ -1105,29 +1157,6 @@ static int find_copy_in_parent(struct scoreboard *sb,
>  }
>  
>  /*
> - * The blobs of origin and porigin exactly match, so everything
> - * origin is suspected for can be blamed on the parent.
> - */
> -static void pass_whole_blame(struct scoreboard *sb,
> -     struct origin *origin, struct origin *porigin)
> -{
> - struct blame_entry *e;
> -
> - if (!porigin->file.ptr && origin->file.ptr) {
> - /* Steal its file */
> - porigin->file = origin->file;
> - origin->file.ptr = NULL;
> - }
> - for (e = sb->ent; e; e = e->next) {
> - if (!same_suspect(e->suspect, origin))
> - continue;
> - origin_incref(porigin);
> - origin_decref(e->suspect);
> - e->suspect = porigin;
> - }
> -}
> -
> -/*
>   * We pass blame from the current commit to its parents.  We keep saying
>   * "parent" (and "porigin"), but what we mean is to find scapegoat to
>   * exonerate ourselves.
> @@ -1540,8 +1569,14 @@ static void assign_blame(struct scoreboard *sb, int opt)
>  
>   /* Take responsibility for the remaining entries */
>   for (ent = sb->ent; ent; ent = ent->next)
> - if (same_suspect(ent->suspect, suspect))
> - found_guilty_entry(ent);
> + if (same_suspect(ent->suspect, suspect)) {
> + if (should_ignore_commit(sb, commit) &&
> +    ent->suspect->previous)
> + pass_whole_blame(sb, ent->suspect,
> + ent->suspect->previous);
> + else
> + found_guilty_entry(ent);
> +   }

An ignored commit can still be blamed if there is no other commit to
pass it on.  So e.g. the initial commit for the file could end up being
blamed for lines that were added by later commits which are being
ignored.  That may look confusing.

Would it make sense to pass the blame to some kind of null commit, i.e.
a special marker that says "I could tell you who is to blame for this
line but you said you don't want to know"?

René
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: [PATCH] blame: can specify shas of commits to ignore on command line

Dylan Reid
Thanks for the speedy reply.

> Only the long option --ignore-commit works with your patch, -I doesn't.
>
Correct.  I took that out because I didn't want to use "-I' which
could eventually be used by a more commonly used feature.  Do you
agree or do you think it is worth using it.  It's a UI decision I
didn't want to make.

>
> This function was moved from below, but it seems to be indented with
> three spaces instead of tabs now.  Adding a declaration without moving
> the function would avoid that and result in a smaller patch.
>

Good catch, I'll do that and re-send.

>
> An ignored commit can still be blamed if there is no other commit to
> pass it on.  So e.g. the initial commit for the file could end up being
> blamed for lines that were added by later commits which are being
> ignored.  That may look confusing.
>
> Would it make sense to pass the blame to some kind of null commit, i.e.
> a special marker that says "I could tell you who is to blame for this
> line but you said you don't want to know"?
>

A null commit could work.  I think the behavior should be to not
ignore the commit. Meaning if you specify a commit that introduced a
line of code that line of code will still be blamed on the ignored
commit.  Does That sound logical or is it too confusing?


Thanks,

Dylan
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: [PATCH] blame: can specify shas of commits to ignore on command line

Junio C Hamano
Dylan Reid <[hidden email]> writes:

> A null commit could work.  I think the behavior should be to not
> ignore the commit. Meaning if you specify a commit that introduced a
> line of code that line of code will still be blamed on the ignored
> commit.  Does That sound logical or is it too confusing?

I am already confused.  If the command must return C when you say "ignore
C" and C introduced a line you are interested in, then what is the point
of specifying commits to be ignored?
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: [PATCH] blame: can specify shas of commits to ignore on command line

Dylan Reid
> I am already confused.  If the command must return C when you say "ignore
> C" and C introduced a line you are interested in, then what is the point
> of specifying commits to be ignored?
>

I was thinking that it would ignore changed lines, not added lines.  Make sense?
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: [PATCH] blame: can specify shas of commits to ignore on command line

Junio C Hamano
Dylan Reid <[hidden email]> writes:

>> I am already confused.  If the command must return C when you say "ignore
>> C" and C introduced a line you are interested in, then what is the point
>> of specifying commits to be ignored?
>>
>
> I was thinking that it would ignore changed lines, not added lines.  Make sense?

Not really.  I don't think you can distinguish "changed lines" and "added
lines" reliably.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: [PATCH] blame: can specify shas of commits to ignore on command line

Dylan Reid
I've noticed that trying to come up with a solution tonight.  I'll
look at it some more tomorrow.  If you can't tell the difference
between added and changed then there would be no easy way to mark a
line as ignored either.

If the changes get split fine enough could you tell that blame_ent
doesn't exist in a parent commit?

Thanks,

Dylan

On Wed, May 5, 2010 at 1:19 AM, Junio C Hamano <[hidden email]> wrote:

> Dylan Reid <[hidden email]> writes:
>
>>> I am already confused.  If the command must return C when you say "ignore
>>> C" and C introduced a line you are interested in, then what is the point
>>> of specifying commits to be ignored?
>>>
>>
>> I was thinking that it would ignore changed lines, not added lines.  Make sense?
>
> Not really.  I don't think you can distinguish "changed lines" and "added
> lines" reliably.
>
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Re: [PATCH] blame: can specify shas of commits to ignore on command line

cnighswonger
I'm not subscribed to the list, so please reply-to-all or cc me on responses.

Is this feature dead? There seems to be some question about the value of such a feature in the discussion along this thread, however, such a feature would have the wonderful benefit of allowing a change such as that being discussed here: http://koha.1045719.n5.nabble.com/Koha-3-8-release-schedule-amp-perltidy-process-td5547369.html to be made and yet preserve the usefulness of git-blame.

To recap the issue in the thread at koha-devel:

A massive perl-tidy is being considered to be applied project wide in a single commit. The real show-stopper is that this breaks git-blame. If git-blame could be told to ignore that single commit id, life would be good (I think).

At any rate it provides a use-case which might justify such a feature.

Kind Regards,
Chris
Loading...