Improve on 'approxidate'

classic Classic list List threaded Threaded
6 messages Options
Reply | Threaded
Open this post in threaded view
|

Improve on 'approxidate'

Linus Torvalds-3

This is not a new failure mode - approxidate has always been kind of
random in the input it accepts, but some of the randomness is more
irritating than others.

For example:

        Jun 6, 5AM -> Mon Jun 22 05:00:00 2009
        5AM Jun 6 -> Sat Jun  6 05:00:00 2009

Whaa? The reason for the above is that approxidate squirrells away the '6'
from "Jun 6" to see if it's going to be a relative number, and then
forgets about it when it sees a new number (the '5' in '5AM'). So the odd
"June 22" date is because today is July 22nd, and if it doesn't have
another day of the month, it will just pick todays mday - having ignored
the '6' entirely due to getting all excited about seeing a new number (5).

There are other oddnesses. This does not fix them all, but I think it
makes for fewer _really_ perplexing cases. At least now we have

        Jun 6, 5AM -> Sat Jun  6 05:00:00 2009
        5AM, Jun 6 -> Sat Jun  6 05:00:00 2009

which makes me happier. I can still point to cases that don't work as
well, but those are separate issues.

Signed-off-by: Linus Torvalds <[hidden email]>
---
 date.c |   93 +++++++++++++++++++++++++++++++++++++++++++--------------------
 1 files changed, 63 insertions(+), 30 deletions(-)

diff --git a/date.c b/date.c
index 409a17d..51c6461 100644
--- a/date.c
+++ b/date.c
@@ -525,11 +525,8 @@ static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt
  }
  }
 
- if (num > 0 && num < 32) {
- tm->tm_mday = num;
- } else if (num > 0 && num < 13) {
+ if (num > 0 && num < 13 && tm->tm_mon < 0)
  tm->tm_mon = num-1;
- }
 
  return n;
 }
@@ -657,42 +654,59 @@ void datestamp(char *buf, int bufsize)
  date_string(now, offset, buf, bufsize);
 }
 
-static void update_tm(struct tm *tm, unsigned long sec)
+/*
+ * Relative time update (eg "2 days ago").  If we haven't set the time
+ * yet, we need to set it from current time.
+ */
+static unsigned long update_tm(struct tm *tm, struct tm *now, unsigned long sec)
 {
- time_t n = mktime(tm) - sec;
+ time_t n;
+
+ if (tm->tm_mday < 0)
+ tm->tm_mday = now->tm_mday;
+ if (tm->tm_mon < 0)
+ tm->tm_mon = now->tm_mon;
+ if (tm->tm_year < 0) {
+ tm->tm_year = now->tm_year;
+ if (tm->tm_mon > now->tm_mon)
+ tm->tm_year--;
+ }
+
+ n = mktime(tm) - sec;
  localtime_r(&n, tm);
+ return n;
 }
 
-static void date_yesterday(struct tm *tm, int *num)
+static void date_yesterday(struct tm *tm, struct tm *now, int *num)
 {
- update_tm(tm, 24*60*60);
+ update_tm(tm, now, 24*60*60);
 }
 
-static void date_time(struct tm *tm, int hour)
+static void date_time(struct tm *tm, struct tm *now, int hour)
 {
  if (tm->tm_hour < hour)
- date_yesterday(tm, NULL);
+ date_yesterday(tm, now, NULL);
  tm->tm_hour = hour;
  tm->tm_min = 0;
  tm->tm_sec = 0;
 }
 
-static void date_midnight(struct tm *tm, int *num)
+static void date_midnight(struct tm *tm, struct tm *now, int *num)
 {
- date_time(tm, 0);
+ date_time(tm, now, 0);
 }
 
-static void date_noon(struct tm *tm, int *num)
+static void date_noon(struct tm *tm, struct tm *now, int *num)
 {
- date_time(tm, 12);
+ date_time(tm, now, 12);
 }
 
-static void date_tea(struct tm *tm, int *num)
+static void date_tea(struct tm *tm, struct tm *now, int *num)
 {
- date_time(tm, 17);
+ date_time(tm, now, 17);
 }
 
-static void date_pm(struct tm *tm, int *num)
+static void date_pm(struct tm *tm, struct tm *now, int *num)
 {
  int hour, n = *num;
  *num = 0;
@@ -706,7 +720,7 @@ static void date_pm(struct tm *tm, int *num)
  tm->tm_hour = (hour % 12) + 12;
 }
 
-static void date_am(struct tm *tm, int *num)
+static void date_am(struct tm *tm, struct tm *now, int *num)
 {
  int hour, n = *num;
  *num = 0;
@@ -720,7 +734,7 @@ static void date_am(struct tm *tm, int *num)
  tm->tm_hour = (hour % 12);
 }
 
-static void date_never(struct tm *tm, int *num)
+static void date_never(struct tm *tm, struct tm *now, int *num)
 {
  time_t n = 0;
  localtime_r(&n, tm);
@@ -728,7 +742,7 @@ static void date_never(struct tm *tm, int *num)
 
 static const struct special {
  const char *name;
- void (*fn)(struct tm *, int *);
+ void (*fn)(struct tm *, struct tm *, int *);
 } special[] = {
  { "yesterday", date_yesterday },
  { "noon", date_noon },
@@ -757,7 +771,7 @@ static const struct typelen {
  { NULL }
 };
 
-static const char *approxidate_alpha(const char *date, struct tm *tm, int *num)
+static const char *approxidate_alpha(const char *date, struct tm *tm, struct tm *now, int *num)
 {
  const struct typelen *tl;
  const struct special *s;
@@ -778,7 +792,7 @@ static const char *approxidate_alpha(const char *date, struct tm *tm, int *num)
  for (s = special; s->name; s++) {
  int len = strlen(s->name);
  if (match_string(date, s->name) == len) {
- s->fn(tm, num);
+ s->fn(tm, now, num);
  return end;
  }
  }
@@ -800,7 +814,7 @@ static const char *approxidate_alpha(const char *date, struct tm *tm, int *num)
  while (tl->type) {
  int len = strlen(tl->type);
  if (match_string(date, tl->type) >= len-1) {
- update_tm(tm, tl->length * *num);
+ update_tm(tm, now, tl->length * *num);
  *num = 0;
  return end;
  }
@@ -818,7 +832,7 @@ static const char *approxidate_alpha(const char *date, struct tm *tm, int *num)
  n++;
  diff += 7*n;
 
- update_tm(tm, diff * 24 * 60 * 60);
+ update_tm(tm, now, diff * 24 * 60 * 60);
  return end;
  }
  }
@@ -866,6 +880,22 @@ static const char *approxidate_digit(const char *date, struct tm *tm, int *num)
  return end;
 }
 
+/*
+ * Do we have a pending number at the end, or when
+ * we see a new one? Let's assume it's a month day,
+ * as in "Dec 6, 1992"
+ */
+static void pending_number(struct tm *tm, int *num)
+{
+ int number = *num;
+
+ if (number) {
+ *num = 0;
+ if (tm->tm_mday < 0 && number < 32)
+ tm->tm_mday = number;
+ }
+}
+
 unsigned long approxidate(const char *date)
 {
  int number = 0;
@@ -881,21 +911,24 @@ unsigned long approxidate(const char *date)
  time_sec = tv.tv_sec;
  localtime_r(&time_sec, &tm);
  now = tm;
+
+ tm.tm_year = -1;
+ tm.tm_mon = -1;
+ tm.tm_mday = -1;
+
  for (;;) {
  unsigned char c = *date;
  if (!c)
  break;
  date++;
  if (isdigit(c)) {
+ pending_number(&tm, &number);
  date = approxidate_digit(date-1, &tm, &number);
  continue;
  }
  if (isalpha(c))
- date = approxidate_alpha(date-1, &tm, &number);
+ date = approxidate_alpha(date-1, &tm, &now, &number);
  }
- if (number > 0 && number < 32)
- tm.tm_mday = number;
- if (tm.tm_mon > now.tm_mon && tm.tm_year == now.tm_year)
- tm.tm_year--;
- return mktime(&tm);
+ pending_number(&tm, &number);
+ return update_tm(&tm, &now, 0);
 }
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Further 'approxidate' improvements

Linus Torvalds-3

The previous patch to improve approxidate got us to the point that a lot
of the remaining annoyances were due to the 'strict' date handling running
first, and deciding that it got a good enough date that the approximate
date routines were never even invoked.

For example, using a date string like

        6AM, June 7, 2009

the strict date logic would be perfectly happy with the "June 7, 2009"
part, and ignore the 6AM part that it didn't understand - resulting in the
information getting dropped on the floor:

        6AM, June 7, 2009 -> Sat Jun 6 00:00:00 2009

and the date being calculated as if it was midnight, and the '6AM' having
confused the date routines into thinking about '6 June' rather than 'June
7' at 6AM (ie notice how the _day_ was wrong due to this, not just the
time).

So this makes the strict date routines a bit stricter, and requires that
not just the date, but also the time, has actually been parsed. With that
fix, and trivial extension of the approxidate routines, git now properly
parses the date as

        6AM, June 7, 2009 -> Sun Jun  7 06:00:00 2009

without dropping the fuzzy time ("6AM" or "noon" or any of the other
non-strict time formats) on the floor.

Signed-off-by: Linus Torvalds <[hidden email]>
---
On Sat, 22 Aug 2009, Linus Torvalds wrote:
>
> There are other oddnesses. This does not fix them all, but I think it
> makes for fewer _really_ perplexing cases. At least now we have
>
> Jun 6, 5AM -> Sat Jun  6 05:00:00 2009
> 5AM, Jun 6 -> Sat Jun  6 05:00:00 2009
>
> which makes me happier. I can still point to cases that don't work as
> well, but those are separate issues.

This gets rid of the remaining "obviously bogus" issues with parsing of
fuzzy dates. I'm sure there are other issues still remaining, but now I
can't come up with any trivial cases any more without having clear
garbage in the string.

So trying to date-parse nonsensical crud still gives odd results:

        I ate six hot-dogs in June -> Sat Jun  6 18:09:26 2009

because it parses "six" and "June" and then puts it together as a date,
and then adds the current time (and year) and is happy.

But parsing random things amusingly is a _feature_. Misparsing something
that makes sense as a date is a bug.

 date.c |   32 +++++++++++++++++++++++++++-----
 1 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/date.c b/date.c
index 51c6461..1de1845 100644
--- a/date.c
+++ b/date.c
@@ -24,6 +24,8 @@ time_t tm_to_time_t(const struct tm *tm)
  return -1;
  if (month < 2 || (year + 2) % 4)
  day--;
+ if (tm->tm_hour < 0 || tm->tm_min < 0 || tm->tm_sec < 0)
+ return -1;
  return (year * 365 + (year + 1) / 4 + mdays[month] + day) * 24*60*60UL +
  tm->tm_hour * 60*60 + tm->tm_min * 60 + tm->tm_sec;
 }
@@ -425,13 +427,19 @@ static int match_multi_number(unsigned long num, char c, const char *date, char
  return end - date;
 }
 
-/* Have we filled in any part of the time/date yet? */
+/*
+ * Have we filled in any part of the time/date yet?
+ * We just do a binary 'and' to see if the sign bit
+ * is set in all the values.
+ */
 static inline int nodate(struct tm *tm)
 {
- return tm->tm_year < 0 &&
- tm->tm_mon < 0 &&
- tm->tm_mday < 0 &&
- !(tm->tm_hour | tm->tm_min | tm->tm_sec);
+ return (tm->tm_year &
+ tm->tm_mon &
+ tm->tm_mday &
+ tm->tm_hour &
+ tm->tm_min &
+ tm->tm_sec) < 0;
 }
 
 /*
@@ -580,6 +588,9 @@ int parse_date(const char *date, char *result, int maxlen)
  tm.tm_mon = -1;
  tm.tm_mday = -1;
  tm.tm_isdst = -1;
+ tm.tm_hour = -1;
+ tm.tm_min = -1;
+ tm.tm_sec = -1;
  offset = -1;
  tm_gmt = 0;
 
@@ -893,6 +904,17 @@ static void pending_number(struct tm *tm, int *num)
  *num = 0;
  if (tm->tm_mday < 0 && number < 32)
  tm->tm_mday = number;
+ else if (tm->tm_mon < 0 && number < 13)
+ tm->tm_mon = number-1;
+ else if (tm->tm_year < 0) {
+ if (number > 1969 && number < 2100)
+ tm->tm_year = number - 1900;
+ else if (number > 69 && number < 100)
+ tm->tm_year = number;
+ else if (number < 38)
+ tm->tm_year = 100 + number;
+ /* We screw up for number = 00 ? */
+ }
  }
 }
 
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: Further 'approxidate' improvements

Nicolas Pitre
On Sat, 22 Aug 2009, Linus Torvalds wrote:

> So trying to date-parse nonsensical crud still gives odd results:
>
> I ate six hot-dogs in June -> Sat Jun  6 18:09:26 2009
>
> because it parses "six" and "June" and then puts it together as a date,
> and then adds the current time (and year) and is happy.
>
> But parsing random things amusingly is a _feature_. Misparsing something
> that makes sense as a date is a bug.

Maybe that would be a good idea to write a test just for this, so known
cases making sense aren't accidentally broken by eventual modifications
to add more such cases.


Nicolas
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: Improve on 'approxidate'

Jeff King
In reply to this post by Linus Torvalds-3
On Sat, Aug 22, 2009 at 03:10:07PM -0700, Linus Torvalds wrote:

>  unsigned long approxidate(const char *date)
>  {
>   int number = 0;
> @@ -881,21 +911,24 @@ unsigned long approxidate(const char *date)
>   time_sec = tv.tv_sec;
>   localtime_r(&time_sec, &tm);
>   now = tm;
> +
> + tm.tm_year = -1;
> + tm.tm_mon = -1;
> + tm.tm_mday = -1;

This breaks relative dates like "3.months.ago", because
approxidate_alpha needs to see the "current" date in tm (and now it sees
-1, subtracts from it, and assumes we are just crossing a year boundary
because of the negative).  3.years.ago is also broken, but I don't think
3.days.ago is.

Probably we just need to pass "now" to approxidate_alpha, and it needs
to call update_tm under the case for "months" and "years" (and I haven't
quite figured out why those are not part of the "tl" list).
Unfortunately, I'm out of time to look at it more right now, but I'll
take a look tonight or tomorrow if you don't beat me to it.

-Peff
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: Improve on 'approxidate'

Jeff King
On Sun, Aug 30, 2009 at 06:35:58PM -0400, Jeff King wrote:

> > + tm.tm_year = -1;
> > + tm.tm_mon = -1;
> > + tm.tm_mday = -1;
>
> This breaks relative dates like "3.months.ago", because
> approxidate_alpha needs to see the "current" date in tm (and now it sees
> -1, subtracts from it, and assumes we are just crossing a year boundary
> because of the negative).  3.years.ago is also broken, but I don't think
> 3.days.ago is.
>
> Probably we just need to pass "now" to approxidate_alpha, and it needs
> to call update_tm under the case for "months" and "years" (and I haven't
> quite figured out why those are not part of the "tl" list).
> Unfortunately, I'm out of time to look at it more right now, but I'll
> take a look tonight or tomorrow if you don't beat me to it.

OK, I looked at it.

The fix is pretty straightforward. We _do_ already pass "now" to
approxidate_alpha, and it looks like you already fixed the "typelen"
array case (which handles seconds, minutes, hours, days, and weeks) by
calling update_tm. But all of those units are convertible to seconds,
and months and years are not, which explains why they are handled
separately.

So I think we can just "cheat" and call update_tm to fill in the fields
from "now" as we would for the other units, and then tweak the "struct
tm" as we did before. I.e.,:

diff --git a/date.c b/date.c
index 8e57e5e..e9ee4aa 100644
--- a/date.c
+++ b/date.c
@@ -857,7 +857,9 @@ static const char *approxidate_alpha(const char *date, struct tm *tm, struct tm
  }
 
  if (match_string(date, "months") >= 5) {
- int n = tm->tm_mon - *num;
+ int n;
+ update_tm(tm, now, 0); /* fill in date fields if needed */
+ n = tm->tm_mon - *num;
  *num = 0;
  while (n < 0) {
  n += 12;
@@ -868,6 +870,7 @@ static const char *approxidate_alpha(const char *date, struct tm *tm, struct tm
  }
 
  if (match_string(date, "years") >= 4) {
+ update_tm(tm, now, 0); /* fill in date fields if needed */
  tm->tm_year -= *num;
  *num = 0;
  return end;

I'll wrap this fix up in a commit message with tests and add it to the
"test approxidate" series I'm brewing.

-Peff
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: Improve on 'approxidate'

Linus Torvalds-3
In reply to this post by Jeff King


On Sun, 30 Aug 2009, Jeff King wrote:
>
> This breaks relative dates like "3.months.ago", because
> approxidate_alpha needs to see the "current" date in tm (and now it sees
> -1, subtracts from it, and assumes we are just crossing a year boundary
> because of the negative).  3.years.ago is also broken, but I don't think
> 3.days.ago is.

Gaah. Thanks for noticing and the fixes. I had tested the relative modes,
but only the "fixed offset" ones (days, hours, minutes, seconds), not the
months and years cases.

                        Linus
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html