[PATCH v4 0/1] http: Add Accept-Language header if possible

classic Classic list List threaded Threaded
46 messages Options
123
Reply | Threaded
Open this post in threaded view
|

[PATCH v4 0/1] http: Add Accept-Language header if possible

Yi EungJun
Changes since v3:

* Fix styles and syntax. (Thanks to Jeff King and Eric Sunshine)
* Cache Accept-Language header. (Thanks to Jeff King)
* Remove floating point numbers. (Thanks to Junio C Hamano)
* Make the for-loop to get the value of the header simpler.
* Add more comments.

Yi EungJun (1):
  http: Add Accept-Language header if possible

 http.c                     | 134 +++++++++++++++++++++++++++++++++++++++++++++
 remote-curl.c              |   2 +
 t/t5550-http-fetch-dumb.sh |  31 +++++++++++
 3 files changed, 167 insertions(+)

--
2.0.1.473.g731ddce.dirty

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

[PATCH v4 1/1] http: Add Accept-Language header if possible

Yi EungJun
From: Yi EungJun <[hidden email]>

Add an Accept-Language header which indicates the user's preferred
languages defined by $LANGUAGE, $LC_ALL, $LC_MESSAGES and $LANG.

Examples:
  LANGUAGE= -> ""
  LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
  LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
  LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"

This gives git servers a chance to display remote error messages in
the user's preferred language.

Signed-off-by: Yi EungJun <[hidden email]>
---
 http.c                     | 134 +++++++++++++++++++++++++++++++++++++++++++++
 remote-curl.c              |   2 +
 t/t5550-http-fetch-dumb.sh |  31 +++++++++++
 3 files changed, 167 insertions(+)

diff --git a/http.c b/http.c
index 3a28b21..ed4e8e1 100644
--- a/http.c
+++ b/http.c
@@ -67,6 +67,8 @@ static struct curl_slist *no_pragma_header;
 
 static struct active_request_slot *active_queue_head;
 
+static struct strbuf *cached_accept_language = NULL;
+
 size_t fread_buffer(char *ptr, size_t eltsize, size_t nmemb, void *buffer_)
 {
  size_t size = eltsize * nmemb;
@@ -512,6 +514,9 @@ void http_cleanup(void)
  cert_auth.password = NULL;
  }
  ssl_cert_password_required = 0;
+
+ if (cached_accept_language)
+ strbuf_release(cached_accept_language);
 }
 
 struct active_request_slot *get_active_slot(void)
@@ -983,6 +988,129 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type,
  strbuf_addstr(charset, "ISO-8859-1");
 }
 
+/*
+ * Guess the user's preferred languages from the value in LANGUAGE environment
+ * variable and LC_MESSAGES locale category.
+ *
+ * The result can be a colon-separated list like "ko:ja:en".
+ */
+static const char *get_preferred_languages(void)
+{
+ const char *retval;
+
+ retval = getenv("LANGUAGE");
+ if (retval && *retval)
+ return retval;
+
+ retval = setlocale(LC_MESSAGES, NULL);
+ if (retval && *retval &&
+ strcmp(retval, "C") &&
+ strcmp(retval, "POSIX"))
+ return retval;
+
+ return NULL;
+}
+
+/*
+ * Get an Accept-Language header which indicates user's preferred languages.
+ *
+ * Examples:
+ *   LANGUAGE= -> ""
+ *   LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
+ *   LANGUAGE=ko_KR.UTF-8:sr@latin -> "Accept-Language: ko-KR, sr; q=0.9, *; q=0.1"
+ *   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
+ *   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"
+ *   LANGUAGE= LANG=C -> ""
+ */
+static struct strbuf *get_accept_language(void)
+{
+ const char *lang_begin, *pos;
+ int q, max_q;
+ int num_langs;
+ int decimal_places;
+ int is_codeset_or_modifier = 0;
+ static struct strbuf buf = STRBUF_INIT;
+ struct strbuf q_format_buf = STRBUF_INIT;
+ char *q_format;
+
+ if (cached_accept_language)
+ return cached_accept_language;
+
+ lang_begin = get_preferred_languages();
+
+ /* Don't add Accept-Language header if no language is preferred. */
+ if (!(lang_begin && *lang_begin)) {
+ cached_accept_language = &buf;
+ return cached_accept_language;
+ }
+
+ /* Count number of preferred lang_begin to decide precision of q-factor */
+ for (num_langs = 1, pos = lang_begin; *pos; pos++)
+ if (*pos == ':')
+ num_langs++;
+
+ /* Decide the precision for q-factor on number of preferred lang_begin. */
+ num_langs += 1; /* for '*' */
+ decimal_places = 1 + (num_langs > 10) + (num_langs > 100);
+ strbuf_addf(&q_format_buf, "; q=0.%%0%dd", decimal_places);
+ q_format = strbuf_detach(&q_format_buf, NULL);
+ for (max_q = 1; decimal_places-- > 0;) max_q *= 10;
+ q = max_q;
+
+ strbuf_addstr(&buf, "Accept-Language: ");
+
+ /*
+ * Convert a list of colon-separated locale values [1][2] to a list of
+ * comma-separated language tags [3] which can be used as a value of
+ * Accept-Language header.
+ *
+ * [1]: http://pubs.opengroup.org/onlinepubs/007908799/xbd/envvar.html
+ * [2]: http://www.gnu.org/software/libc/manual/html_node/Using-gettextized-software.html
+ * [3]: http://tools.ietf.org/html/rfc7231#section-5.3.5
+ */
+ for (pos = lang_begin; ; pos++) {
+ if (*pos == ':' || !*pos) {
+ /* Ignore if this character is the first one. */
+ if (pos == lang_begin)
+ continue;
+
+ is_codeset_or_modifier = 0;
+
+ /* Put a q-factor only if it is less than 1.0. */
+ if (q < max_q)
+ strbuf_addf(&buf, q_format, q);
+
+ if (q > 1)
+ q--;
+
+ /* NULL pos means this is the last language. */
+ if (*pos)
+ strbuf_addstr(&buf, ", ");
+ else
+ break;
+
+ } else if (is_codeset_or_modifier)
+ continue;
+ else if (*pos == '.' || *pos == '@') /* Remove .codeset and @modifier. */
+ is_codeset_or_modifier = 1;
+ else
+ strbuf_addch(&buf, *pos == '_' ? '-' : *pos);
+ }
+
+ /* Don't add Accept-Language header if no language is preferred. */
+ if (q >= max_q) {
+ cached_accept_language = &buf;
+ return cached_accept_language;
+ }
+
+ /* Add '*' with minimum q-factor greater than 0.0. */
+ strbuf_addstr(&buf, ", *");
+ strbuf_addf(&buf, q_format, 1);
+
+ cached_accept_language = &buf;
+ return cached_accept_language;
+}
+
 /* http_request() targets */
 #define HTTP_REQUEST_STRBUF 0
 #define HTTP_REQUEST_FILE 1
@@ -995,6 +1123,7 @@ static int http_request(const char *url,
  struct slot_results results;
  struct curl_slist *headers = NULL;
  struct strbuf buf = STRBUF_INIT;
+ struct strbuf* accept_language;
  int ret;
 
  slot = get_active_slot();
@@ -1020,6 +1149,11 @@ static int http_request(const char *url,
  fwrite_buffer);
  }
 
+ accept_language = get_accept_language();
+
+ if (accept_language && accept_language->len > 0)
+ headers = curl_slist_append(headers, accept_language->buf);
+
  strbuf_addstr(&buf, "Pragma:");
  if (options && options->no_cache)
  strbuf_addstr(&buf, " no-cache");
diff --git a/remote-curl.c b/remote-curl.c
index 4493b38..07f2a5d 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -946,6 +946,8 @@ int main(int argc, const char **argv)
  struct strbuf buf = STRBUF_INIT;
  int nongit;
 
+ git_setup_gettext();
+
  git_extract_argv0_path(argv[0]);
  setup_git_directory_gently(&nongit);
  if (argc < 2) {
diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh
index ac71418..d2dac44 100755
--- a/t/t5550-http-fetch-dumb.sh
+++ b/t/t5550-http-fetch-dumb.sh
@@ -196,5 +196,36 @@ test_expect_success 'reencoding is robust to whitespace oddities' '
  grep "this is the error message" stderr
 '
 
+check_language () {
+ echo "Accept-Language: $1\r" >expect &&
+ test_must_fail env \
+ GIT_CURL_VERBOSE=1 \
+ LANGUAGE=$2 \
+ LC_ALL=$3 \
+ LC_MESSAGES=$4 \
+ LANG=$5 \
+ git clone "$HTTPD_URL/accept/language" 2>stderr &&
+ grep -i ^Accept-Language: stderr >actual &&
+ test_cmp expect actual
+}
+
+test_expect_success 'git client sends Accept-Language based on LANGUAGE, LC_ALL, LC_MESSAGES and LANG' '
+ check_language "ko-KR, *; q=0.1" ko_KR.UTF-8 de_DE.UTF-8 ja_JP.UTF-8 en_US.UTF-8 &&
+ check_language "de-DE, *; q=0.1" ""          de_DE.UTF-8 ja_JP.UTF-8 en_US.UTF-8 &&
+ check_language "ja-JP, *; q=0.1" ""          ""          ja_JP.UTF-8 en_US.UTF-8 &&
+ check_language "en-US, *; q=0.1" ""          ""          ""          en_US.UTF-8
+'
+
+test_expect_success 'git client sends Accept-Language with many preferred languages' '
+ check_language "ko-KR, en-US; q=0.99, fr-CA; q=0.98, de; q=0.97, sr; q=0.96, \
+ja; q=0.95, zh; q=0.94, sv; q=0.93, pt; q=0.92, nb; q=0.91, *; q=0.01" \
+ ko_KR.EUC-KR:en_US.UTF-8:fr_CA:de.UTF-8@euro:sr@latin:ja:zh:sv:pt:nb
+'
+
+test_expect_success 'git client does not send Accept-Language' '
+ test_must_fail env GIT_CURL_VERBOSE=1 LANGUAGE= git clone "$HTTPD_URL/accept/language" 2>stderr &&
+ ! grep "^Accept-Language:" stderr
+'
+
 stop_httpd
 test_done
--
2.0.1.473.g731ddce.dirty

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH v4 1/1] http: Add Accept-Language header if possible

Junio C Hamano
Yi EungJun <[hidden email]> writes:

> From: Yi EungJun <[hidden email]>
>
> Add an Accept-Language header which indicates the user's preferred
> languages defined by $LANGUAGE, $LC_ALL, $LC_MESSAGES and $LANG.
>
> Examples:
>   LANGUAGE= -> ""
>   LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
>   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
>   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"
>
> This gives git servers a chance to display remote error messages in
> the user's preferred language.
>
> Signed-off-by: Yi EungJun <[hidden email]>
> ---
>  http.c                     | 134 +++++++++++++++++++++++++++++++++++++++++++++
>  remote-curl.c              |   2 +
>  t/t5550-http-fetch-dumb.sh |  31 +++++++++++
>  3 files changed, 167 insertions(+)
>
> diff --git a/http.c b/http.c
> index 3a28b21..ed4e8e1 100644
> --- a/http.c
> +++ b/http.c
> @@ -67,6 +67,8 @@ static struct curl_slist *no_pragma_header;
>  
>  static struct active_request_slot *active_queue_head;
>  
> +static struct strbuf *cached_accept_language = NULL;

Please drop " = NULL" that is unnecessary for BSS.

> @@ -512,6 +514,9 @@ void http_cleanup(void)
>   cert_auth.password = NULL;
>   }
>   ssl_cert_password_required = 0;
> +
> + if (cached_accept_language)
> + strbuf_release(cached_accept_language);
>  }



> @@ -983,6 +988,129 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type,
>   strbuf_addstr(charset, "ISO-8859-1");
>  }
>  
> +/*
> + * Guess the user's preferred languages from the value in LANGUAGE environment
> + * variable and LC_MESSAGES locale category.
> + *
> + * The result can be a colon-separated list like "ko:ja:en".
> + */
> +static const char *get_preferred_languages(void)
> +{
> + const char *retval;
> +
> + retval = getenv("LANGUAGE");
> + if (retval && *retval)
> + return retval;
> +
> + retval = setlocale(LC_MESSAGES, NULL);
> + if (retval && *retval &&
> + strcmp(retval, "C") &&
> + strcmp(retval, "POSIX"))
> + return retval;
> +
> + return NULL;
> +}
> +
> +/*
> + * Get an Accept-Language header which indicates user's preferred languages.
> + *
> + * Examples:
> + *   LANGUAGE= -> ""
> + *   LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
> + *   LANGUAGE=ko_KR.UTF-8:sr@latin -> "Accept-Language: ko-KR, sr; q=0.9, *; q=0.1"
> + *   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
> + *   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"
> + *   LANGUAGE= LANG=C -> ""
> + */
> +static struct strbuf *get_accept_language(void)
> +{
> + const char *lang_begin, *pos;
> + int q, max_q;
> + int num_langs;
> + int decimal_places;
> + int is_codeset_or_modifier = 0;
> + static struct strbuf buf = STRBUF_INIT;
> + struct strbuf q_format_buf = STRBUF_INIT;
> + char *q_format;
> +
> + if (cached_accept_language)
> + return cached_accept_language;
> +
> + lang_begin = get_preferred_languages();
> +
> + /* Don't add Accept-Language header if no language is preferred. */
> + if (!(lang_begin && *lang_begin)) {

It is not wrong per-se, but given how hard get_preferred_languages()
tries not to return a pointer to an empty string, this seems a bit
overly defensive to me.

> + cached_accept_language = &buf;
> + return cached_accept_language;

It is somewhat unconventional to have a static pointer outside to
point at a singleton and then have a singleton actually as a static
structure.  I would have done without "buf" in this function and
instead started this function like so:

        if (cached_accept_language)
        return cached_accept_language;

        cached_accept_language = xmalloc(sizeof(struct strbuf));
        strbuf_init(cached_accept_language, 0);
        lang_begin =  get_preferred_languages();
        if (!lang_begin)
                return cached_accept_language;

> + }
> +
> + /* Count number of preferred lang_begin to decide precision of q-factor */
> + for (num_langs = 1, pos = lang_begin; *pos; pos++)
> + if (*pos == ':')
> + num_langs++;
> +
> + /* Decide the precision for q-factor on number of preferred lang_begin. */
> + num_langs += 1; /* for '*' */


> + decimal_places = 1 + (num_langs > 10) + (num_langs > 100);

What if you got 60000 languages ;-)?  I do not think we want to bend
backwards and make the code list all 60000 of them, assigning a
unique and decreasing q value to each of them, forming an overlong
Accept-Language header, but at the same time, I do not think we want
to show nonsense output because we compute the precision incorrectly
here.

> + strbuf_addf(&q_format_buf, "; q=0.%%0%dd", decimal_places);
> + q_format = strbuf_detach(&q_format_buf, NULL);

q_format_buf is an overkill use of strbuf, isn't it?  Just

        char q_format_buf[32];
        sprintf(q_format_buf, ";q=0.%%0%d", decimal_places);

or something should be more than sufficient, no?

> + for (max_q = 1; decimal_places-- > 0;) max_q *= 10;

As you have to do one loop like this that amounts to computing log10
of num_langs, why not compute decimal_places the same way while at
it?  It may also make sense to cap the number of languages to avoid
spitting out overly long Accept-Language header with practicaly
useless list of many languages.  That is, something along the lines
of ... (note that I may very well have off-by-one or off-by-ten
errors here you may need to tweak to get right):

        if (MAX_LANGS < num_langs)
        num_langs = MAX_LANGS;
        for (max_q = 1, decimal_places = 1;
             max_q < num_langs;
             decimal_places++, max_q *= 10)
             ;

If you are to use the MAX_LANGS cap, the main loop would also need
to pay attention to it by breaking out of the loop early before you
reach the end of the string, of course.

> + q = max_q;
> +
> + strbuf_addstr(&buf, "Accept-Language: ");
> +
> + /*
> + * Convert a list of colon-separated locale values [1][2] to a list of
> + * comma-separated language tags [3] which can be used as a value of
> + * Accept-Language header.
> + *
> + * [1]: http://pubs.opengroup.org/onlinepubs/007908799/xbd/envvar.html
> + * [2]: http://www.gnu.org/software/libc/manual/html_node/Using-gettextized-software.html
> + * [3]: http://tools.ietf.org/html/rfc7231#section-5.3.5
> + */
> + for (pos = lang_begin; ; pos++) {
> + if (*pos == ':' || !*pos) {
> + /* Ignore if this character is the first one. */
> + if (pos == lang_begin)
> + continue;

By doing this "ignore empty" here, but not doing the same when you
count num_langs, are you potentially miscounting num_langs?

> + is_codeset_or_modifier = 0;
> +
> + /* Put a q-factor only if it is less than 1.0. */
> + if (q < max_q)

... is it the same thing as "do not do this for the first round, but
do so for all the other round"?

> + strbuf_addf(&buf, q_format, q);
> +
> + if (q > 1)

Hmm, I am puzzled.  C this ever be an issue (unless you have
off-by-one error or you add "cap num_langs to MAX_LANGS", that is)?

> + q--;

> + /* NULL pos means this is the last language. */
> + if (*pos)
> + strbuf_addstr(&buf, ", ");
> + else
> + break;
> +
> + } else if (is_codeset_or_modifier)
> + continue;
> + else if (*pos == '.' || *pos == '@') /* Remove .codeset and @modifier. */
> + is_codeset_or_modifier = 1;
> + else
> + strbuf_addch(&buf, *pos == '_' ? '-' : *pos);
> + }
> +
> + /* Don't add Accept-Language header if no language is preferred. */
> + if (q >= max_q) {

Can q go over max_q, or is it "q may be max_q"?  In other words, is
this essentially saying "if we did not find any language in the
preferred languages list"?

> + cached_accept_language = &buf;
> + return cached_accept_language;
> + }
> +
> + /* Add '*' with minimum q-factor greater than 0.0. */
> + strbuf_addstr(&buf, ", *");
> + strbuf_addf(&buf, q_format, 1);
> +
> + cached_accept_language = &buf;
> + return cached_accept_language;
> +}
> +
>  /* http_request() targets */
>  #define HTTP_REQUEST_STRBUF 0
>  #define HTTP_REQUEST_FILE 1
> @@ -995,6 +1123,7 @@ static int http_request(const char *url,
>   struct slot_results results;
>   struct curl_slist *headers = NULL;
>   struct strbuf buf = STRBUF_INIT;
> + struct strbuf* accept_language;

As we write in C, not C++, our asterisks stick to the variable, not
the type.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH v4 1/1] http: Add Accept-Language header if possible

Yi EungJun
Thanks very much for your detailed review and sorry for late reply.

2014-07-22 4:01 GMT+09:00 Junio C Hamano <[hidden email]>:

> Yi EungJun <[hidden email]> writes:
>
>> From: Yi EungJun <[hidden email]>
>>
>> Add an Accept-Language header which indicates the user's preferred
>> languages defined by $LANGUAGE, $LC_ALL, $LC_MESSAGES and $LANG.
>>
>> Examples:
>>   LANGUAGE= -> ""
>>   LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
>>   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
>>   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"
>>
>> This gives git servers a chance to display remote error messages in
>> the user's preferred language.
>>
>> Signed-off-by: Yi EungJun <[hidden email]>
>> ---
>>  http.c                     | 134 +++++++++++++++++++++++++++++++++++++++++++++
>>  remote-curl.c              |   2 +
>>  t/t5550-http-fetch-dumb.sh |  31 +++++++++++
>>  3 files changed, 167 insertions(+)
>>
>> diff --git a/http.c b/http.c
>> index 3a28b21..ed4e8e1 100644
>> --- a/http.c
>> +++ b/http.c
>> @@ -67,6 +67,8 @@ static struct curl_slist *no_pragma_header;
>>
>>  static struct active_request_slot *active_queue_head;
>>
>> +static struct strbuf *cached_accept_language = NULL;
>
> Please drop " = NULL" that is unnecessary for BSS.

Thanks, I'll fix it.

>
>> @@ -512,6 +514,9 @@ void http_cleanup(void)
>>               cert_auth.password = NULL;
>>       }
>>       ssl_cert_password_required = 0;
>> +
>> +     if (cached_accept_language)
>> +             strbuf_release(cached_accept_language);
>>  }
>
>
>
>> @@ -983,6 +988,129 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type,
>>               strbuf_addstr(charset, "ISO-8859-1");
>>  }
>>
>> +/*
>> + * Guess the user's preferred languages from the value in LANGUAGE environment
>> + * variable and LC_MESSAGES locale category.
>> + *
>> + * The result can be a colon-separated list like "ko:ja:en".
>> + */
>> +static const char *get_preferred_languages(void)
>> +{
>> +     const char *retval;
>> +
>> +     retval = getenv("LANGUAGE");
>> +     if (retval && *retval)
>> +             return retval;
>> +
>> +     retval = setlocale(LC_MESSAGES, NULL);
>> +     if (retval && *retval &&
>> +             strcmp(retval, "C") &&
>> +             strcmp(retval, "POSIX"))
>> +             return retval;
>> +
>> +     return NULL;
>> +}
>> +
>> +/*
>> + * Get an Accept-Language header which indicates user's preferred languages.
>> + *
>> + * Examples:
>> + *   LANGUAGE= -> ""
>> + *   LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
>> + *   LANGUAGE=ko_KR.UTF-8:sr@latin -> "Accept-Language: ko-KR, sr; q=0.9, *; q=0.1"
>> + *   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
>> + *   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"
>> + *   LANGUAGE= LANG=C -> ""
>> + */
>> +static struct strbuf *get_accept_language(void)
>> +{
>> +     const char *lang_begin, *pos;
>> +     int q, max_q;
>> +     int num_langs;
>> +     int decimal_places;
>> +     int is_codeset_or_modifier = 0;
>> +     static struct strbuf buf = STRBUF_INIT;
>> +     struct strbuf q_format_buf = STRBUF_INIT;
>> +     char *q_format;
>> +
>> +     if (cached_accept_language)
>> +             return cached_accept_language;
>> +
>> +     lang_begin = get_preferred_languages();
>> +
>> +     /* Don't add Accept-Language header if no language is preferred. */
>> +     if (!(lang_begin && *lang_begin)) {
>
> It is not wrong per-se, but given how hard get_preferred_languages()
> tries not to return a pointer to an empty string, this seems a bit
> overly defensive to me.

Thanks, I'll fix it.

>
>> +             cached_accept_language = &buf;
>> +             return cached_accept_language;
>
> It is somewhat unconventional to have a static pointer outside to
> point at a singleton and then have a singleton actually as a static
> structure.  I would have done without "buf" in this function and
> instead started this function like so:
>
>         if (cached_accept_language)
>                 return cached_accept_language;
>
>         cached_accept_language = xmalloc(sizeof(struct strbuf));
>         strbuf_init(cached_accept_language, 0);
>         lang_begin =  get_preferred_languages();
>         if (!lang_begin)
>                 return cached_accept_language;
>

Thanks, I'll fix it as you suggest.

>> +     }
>> +
>> +     /* Count number of preferred lang_begin to decide precision of q-factor */
>> +     for (num_langs = 1, pos = lang_begin; *pos; pos++)
>> +             if (*pos == ':')
>> +                     num_langs++;
>> +
>> +     /* Decide the precision for q-factor on number of preferred lang_begin. */
>> +     num_langs += 1; /* for '*' */
>
>
>> +     decimal_places = 1 + (num_langs > 10) + (num_langs > 100);
>
> What if you got 60000 languages ;-)?  I do not think we want to bend
> backwards and make the code list all 60000 of them, assigning a
> unique and decreasing q value to each of them, forming an overlong
> Accept-Language header, but at the same time, I do not think we want
> to show nonsense output because we compute the precision incorrectly
> here.

In that case, less-preferred 59,001 languages will have same q-value
of 0.001. Unfortunately, there is no way to represent user's
preferences of over 1,000 languages since the HTTP specification
restricts the range of q-value from 0.001 to 1.000 [1]. I think this
code reflects user's preferences better than Google chrome whose
minimum q-value is 0.2 and Mozilla firefox whose minimum q-value is
0.01 [2].

>
>> +     strbuf_addf(&q_format_buf, "; q=0.%%0%dd", decimal_places);
>> +     q_format = strbuf_detach(&q_format_buf, NULL);
>
> q_format_buf is an overkill use of strbuf, isn't it?  Just
>
>         char q_format_buf[32];
>         sprintf(q_format_buf, ";q=0.%%0%d", decimal_places);
>
> or something should be more than sufficient, no?

Thanks, I'll fix it as you suggest.

>
>> +     for (max_q = 1; decimal_places-- > 0;) max_q *= 10;
>
> As you have to do one loop like this that amounts to computing log10
> of num_langs, why not compute decimal_places the same way while at
> it?  It may also make sense to cap the number of languages to avoid
> spitting out overly long Accept-Language header with practicaly
> useless list of many languages.  That is, something along the lines
> of ... (note that I may very well have off-by-one or off-by-ten
> errors here you may need to tweak to get right):
>
>         if (MAX_LANGS < num_langs)
>                 num_langs = MAX_LANGS;
>         for (max_q = 1, decimal_places = 1;
>              max_q < num_langs;
>              decimal_places++, max_q *= 10)
>              ;
>
> If you are to use the MAX_LANGS cap, the main loop would also need
> to pay attention to it by breaking out of the loop early before you
> reach the end of the string, of course.

Thanks for good point. As you said, we should limit the length of the
value of Accept-Language header because some HTTP servers respond 4xx
Client Error if any header's value is very long (4KB or more) [3].

But MAX_LANGS may not be enough because the header can be too long
even if the number of languages does not exceed MAX_LANGS if language
tag is too long. Many of language tags are 2-3 characters but some are
11 characters; Even it is possible that a user has a very long custom
language tag.

I think this problem can be solved by one of these solutions:

A. Set MAX_LANGS conservatively (100 or less).
B. Limit the length of Accept-Language header directly (4KB or less).
C. Negotiate with server; Resend a request with shorter
Accept-Language header if the server responds 4xx error.

>
>> +     q = max_q;
>> +
>> +     strbuf_addstr(&buf, "Accept-Language: ");
>> +
>> +     /*
>> +      * Convert a list of colon-separated locale values [1][2] to a list of
>> +      * comma-separated language tags [3] which can be used as a value of
>> +      * Accept-Language header.
>> +      *
>> +      * [1]: http://pubs.opengroup.org/onlinepubs/007908799/xbd/envvar.html
>> +      * [2]: http://www.gnu.org/software/libc/manual/html_node/Using-gettextized-software.html
>> +      * [3]: http://tools.ietf.org/html/rfc7231#section-5.3.5
>> +      */
>> +     for (pos = lang_begin; ; pos++) {
>> +             if (*pos == ':' || !*pos) {
>> +                     /* Ignore if this character is the first one. */
>> +                     if (pos == lang_begin)
>> +                             continue;
>
> By doing this "ignore empty" here, but not doing the same when you
> count num_langs, are you potentially miscounting num_langs?

Yes, num_langs can be larger than actual number of languages. For
example, if LANGUAGE="en:" num_langs is 2. I wanted to make the logic
to compute num_langs as simple as possible and thought num_langs does
not need to be very accurate because it is used only to compute the
precision of q-factor (max_q and decimal_places).

Do we need to compute num_langs accurately?

>
>> +                     is_codeset_or_modifier = 0;
>> +
>> +                     /* Put a q-factor only if it is less than 1.0. */
>> +                     if (q < max_q)
>
> ... is it the same thing as "do not do this for the first round, but
> do so for all the other round"?

Yes, q-factor is q/max_q and q-factor of 1.0 is not necessary because:
> if no "q" parameter is present, the default weight is 1.
> -- http://tools.ietf.org/html/rfc7231#section-5.3.1

>
>> +                             strbuf_addf(&buf, q_format, q);
>> +
>> +                     if (q > 1)
>
> Hmm, I am puzzled.  C this ever be an issue (unless you have
> off-by-one error or you add "cap num_langs to MAX_LANGS", that is)?

Yes, it may be an issue if the number of languages is larger than the
max_q. max_q must not be larger than 1000 but the number of languages
may be.

But this if-statement will be not necessary if we limit the number of
languages to 1,000 or less (by MAX_LANGS you suggest).

>
>> +                             q--;
>
>> +                     /* NULL pos means this is the last language. */
>> +                     if (*pos)
>> +                             strbuf_addstr(&buf, ", ");
>> +                     else
>> +                             break;
>> +
>> +             } else if (is_codeset_or_modifier)
>> +                     continue;
>> +             else if (*pos == '.' || *pos == '@') /* Remove .codeset and @modifier. */
>> +                     is_codeset_or_modifier = 1;
>> +             else
>> +                     strbuf_addch(&buf, *pos == '_' ? '-' : *pos);
>> +     }
>> +
>> +     /* Don't add Accept-Language header if no language is preferred. */
>> +     if (q >= max_q) {
>
> Can q go over max_q, or is it "q may be max_q"?  In other words, is
> this essentially saying "if we did not find any language in the
> preferred languages list"?

Yes. q may be max_q if we did not find any language in the preferred
languages list. But there is no chance that q goes over max_q.

But it will be not necessary if num_langs is computed accurately.

>
>> +             cached_accept_language = &buf;
>> +             return cached_accept_language;
>> +     }
>> +
>> +     /* Add '*' with minimum q-factor greater than 0.0. */
>> +     strbuf_addstr(&buf, ", *");
>> +     strbuf_addf(&buf, q_format, 1);
>> +
>> +     cached_accept_language = &buf;
>> +     return cached_accept_language;
>> +}
>> +
>>  /* http_request() targets */
>>  #define HTTP_REQUEST_STRBUF  0
>>  #define HTTP_REQUEST_FILE    1
>> @@ -995,6 +1123,7 @@ static int http_request(const char *url,
>>       struct slot_results results;
>>       struct curl_slist *headers = NULL;
>>       struct strbuf buf = STRBUF_INIT;
>> +     struct strbuf* accept_language;
>
> As we write in C, not C++, our asterisks stick to the variable, not
> the type.

Thanks, I'll fix it.

[1]: http://tools.ietf.org/html/rfc7231#section-5.3.1
[2]: https://hg.mozilla.org/integration/mozilla-inbound/rev/1418f9ce6f8b
[3]: http://stackoverflow.com/questions/686217/maximum-on-http-header-values/8623061#8623061
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

[PATCH v5 0/1] http: Add Accept-Language header if possible

Yi EungJun
In reply to this post by Yi EungJun
Changes since v4

* Fix styles as Junio C Hamano suggested.
* Limit number of languages and length of Accept-Language header.

Yi EungJun (1):
  http: Add Accept-Language header if possible

 http.c                     | 154 +++++++++++++++++++++++++++++++++++++++++++++
 remote-curl.c              |   2 +
 t/t5550-http-fetch-dumb.sh |  31 +++++++++
 3 files changed, 187 insertions(+)

--
2.2.0

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

[PATCH v5 1/1] http: Add Accept-Language header if possible

Yi EungJun
From: Yi EungJun <[hidden email]>

Add an Accept-Language header which indicates the user's preferred
languages defined by $LANGUAGE, $LC_ALL, $LC_MESSAGES and $LANG.

Examples:
  LANGUAGE= -> ""
  LANGUAGE=ko:en -> "Accept-Language: ko, en;q=0.9, *;q=0.1"
  LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *;q=0.1"
  LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *;q=0.1"

This gives git servers a chance to display remote error messages in
the user's preferred language.

Limit the number of languages to 1,000 because q-value must not be
smaller than 0.001, and limit the length of Accept-Language header to
4,000 bytes for some HTTP servers which cannot accept such long header.

Signed-off-by: Yi EungJun <[hidden email]>
---
 http.c                     | 154 +++++++++++++++++++++++++++++++++++++++++++++
 remote-curl.c              |   2 +
 t/t5550-http-fetch-dumb.sh |  31 +++++++++
 3 files changed, 187 insertions(+)

diff --git a/http.c b/http.c
index 040f362..69624af 100644
--- a/http.c
+++ b/http.c
@@ -68,6 +68,8 @@ static struct curl_slist *no_pragma_header;
 
 static struct active_request_slot *active_queue_head;
 
+static struct strbuf *cached_accept_language;
+
 size_t fread_buffer(char *ptr, size_t eltsize, size_t nmemb, void *buffer_)
 {
  size_t size = eltsize * nmemb;
@@ -515,6 +517,9 @@ void http_cleanup(void)
  cert_auth.password = NULL;
  }
  ssl_cert_password_required = 0;
+
+ if (cached_accept_language)
+ strbuf_release(cached_accept_language);
 }
 
 struct active_request_slot *get_active_slot(void)
@@ -986,6 +991,149 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type,
  strbuf_addstr(charset, "ISO-8859-1");
 }
 
+/*
+ * Guess the user's preferred languages from the value in LANGUAGE environment
+ * variable and LC_MESSAGES locale category.
+ *
+ * The result can be a colon-separated list like "ko:ja:en".
+ */
+static const char *get_preferred_languages(void)
+{
+ const char *retval;
+
+ retval = getenv("LANGUAGE");
+ if (retval && *retval)
+ return retval;
+
+ retval = setlocale(LC_MESSAGES, NULL);
+ if (retval && *retval &&
+ strcmp(retval, "C") &&
+ strcmp(retval, "POSIX"))
+ return retval;
+
+ return NULL;
+}
+
+/*
+ * Get an Accept-Language header which indicates user's preferred languages.
+ *
+ * Examples:
+ *   LANGUAGE= -> ""
+ *   LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
+ *   LANGUAGE=ko_KR.UTF-8:sr@latin -> "Accept-Language: ko-KR, sr; q=0.9, *; q=0.1"
+ *   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
+ *   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"
+ *   LANGUAGE= LANG=C -> ""
+ */
+static struct strbuf *get_accept_language(void)
+{
+ const char *lang_begin, *pos;
+ int q, max_q;
+ int num_langs;
+ int decimal_places;
+ int is_codeset_or_modifier = 0;
+ char q_format[32];
+ /*
+ * MAX_LANGS must not be larger than 1,000. If it is larger than that,
+ * q-value will be smaller than 0.001, the minimum q-value the HTTP
+ * specification allows [1].
+ *
+ * [1]: http://tools.ietf.org/html/rfc7231#section-5.3.1
+ */
+ const int MAX_LANGS = 1000;
+ const int MAX_SIZE_OF_HEADER = 4000;
+ int last_size = 0;
+
+ if (cached_accept_language)
+ return cached_accept_language;
+
+ cached_accept_language = xmalloc(sizeof(struct strbuf));
+ strbuf_init(cached_accept_language, 0);
+ lang_begin = get_preferred_languages();
+
+ /* Don't add Accept-Language header if no language is preferred. */
+ if (!lang_begin)
+ return cached_accept_language;
+
+ /* Count number of preferred lang_begin to decide precision of q-factor. */
+ for (num_langs = 1, pos = lang_begin; *pos; pos++)
+ if (*pos == ':')
+ num_langs++;
+
+ /* Decide the precision for q-factor on number of preferred lang_begin. */
+ num_langs += 1; /* for '*' */
+
+ if (MAX_LANGS < num_langs)
+ num_langs = MAX_LANGS;
+
+ for (max_q = 1, decimal_places = 0;
+ max_q < num_langs;
+ decimal_places++, max_q *= 10);
+
+ sprintf(q_format, ";q=0.%%0%dd", decimal_places);
+
+ q = max_q;
+
+ strbuf_addstr(cached_accept_language, "Accept-Language: ");
+
+ /*
+ * Convert a list of colon-separated locale values [1][2] to a list of
+ * comma-separated language tags [3] which can be used as a value of
+ * Accept-Language header.
+ *
+ * [1]: http://pubs.opengroup.org/onlinepubs/007908799/xbd/envvar.html
+ * [2]: http://www.gnu.org/software/libc/manual/html_node/Using-gettextized-software.html
+ * [3]: http://tools.ietf.org/html/rfc7231#section-5.3.5
+ */
+ for (pos = lang_begin; ; pos++) {
+ if (*pos == ':' || !*pos) {
+ /* Ignore if this character is the first one. */
+ if (pos == lang_begin)
+ continue;
+
+ is_codeset_or_modifier = 0;
+
+ /* Put a q-factor only if it is less than 1.0. */
+ if (q < max_q)
+ strbuf_addf(cached_accept_language, q_format, q);
+
+ if (q > 1)
+ q--;
+
+ last_size = cached_accept_language->len;
+
+ /* NULL pos means this is the last language. */
+ if (*pos)
+ strbuf_addstr(cached_accept_language, ", ");
+ else
+ break;
+
+ } else if (is_codeset_or_modifier)
+ continue;
+ else if (*pos == '.' || *pos == '@') /* Remove .codeset and @modifier. */
+ is_codeset_or_modifier = 1;
+ else
+ strbuf_addch(cached_accept_language, *pos == '_' ? '-' : *pos);
+
+ if (cached_accept_language->len > MAX_SIZE_OF_HEADER) {
+ strbuf_remove(cached_accept_language, last_size,
+ cached_accept_language->len - last_size);
+ break;
+ }
+ }
+
+ /* Don't add Accept-Language header if no language is preferred. */
+ if (q >= max_q) {
+ return cached_accept_language;
+ }
+
+ /* Add '*' with minimum q-factor greater than 0.0. */
+ strbuf_addstr(cached_accept_language, ", *");
+ strbuf_addf(cached_accept_language, q_format, 1);
+
+ return cached_accept_language;
+}
+
 /* http_request() targets */
 #define HTTP_REQUEST_STRBUF 0
 #define HTTP_REQUEST_FILE 1
@@ -998,6 +1146,7 @@ static int http_request(const char *url,
  struct slot_results results;
  struct curl_slist *headers = NULL;
  struct strbuf buf = STRBUF_INIT;
+ struct strbuf *accept_language;
  int ret;
 
  slot = get_active_slot();
@@ -1023,6 +1172,11 @@ static int http_request(const char *url,
  fwrite_buffer);
  }
 
+ accept_language = get_accept_language();
+
+ if (accept_language && accept_language->len > 0)
+ headers = curl_slist_append(headers, accept_language->buf);
+
  strbuf_addstr(&buf, "Pragma:");
  if (options && options->no_cache)
  strbuf_addstr(&buf, " no-cache");
diff --git a/remote-curl.c b/remote-curl.c
index dd63bc2..04989e5 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -962,6 +962,8 @@ int main(int argc, const char **argv)
  struct strbuf buf = STRBUF_INIT;
  int nongit;
 
+ git_setup_gettext();
+
  git_extract_argv0_path(argv[0]);
  setup_git_directory_gently(&nongit);
  if (argc < 2) {
diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh
index ac71418..197c361 100755
--- a/t/t5550-http-fetch-dumb.sh
+++ b/t/t5550-http-fetch-dumb.sh
@@ -196,5 +196,36 @@ test_expect_success 'reencoding is robust to whitespace oddities' '
  grep "this is the error message" stderr
 '
 
+check_language () {
+ echo "Accept-Language: $1\r" >expect &&
+ test_must_fail env \
+ GIT_CURL_VERBOSE=1 \
+ LANGUAGE=$2 \
+ LC_ALL=$3 \
+ LC_MESSAGES=$4 \
+ LANG=$5 \
+ git clone "$HTTPD_URL/accept/language" 2>stderr &&
+ grep -i ^Accept-Language: stderr >actual &&
+ test_cmp expect actual
+}
+
+test_expect_success 'git client sends Accept-Language based on LANGUAGE, LC_ALL, LC_MESSAGES and LANG' '
+ check_language "ko-KR, *;q=0.1" ko_KR.UTF-8 de_DE.UTF-8 ja_JP.UTF-8 en_US.UTF-8 &&
+ check_language "de-DE, *;q=0.1" ""          de_DE.UTF-8 ja_JP.UTF-8 en_US.UTF-8 &&
+ check_language "ja-JP, *;q=0.1" ""          ""          ja_JP.UTF-8 en_US.UTF-8 &&
+ check_language "en-US, *;q=0.1" ""          ""          ""          en_US.UTF-8
+'
+
+test_expect_success 'git client sends Accept-Language with many preferred languages' '
+ check_language "ko-KR, en-US;q=0.99, fr-CA;q=0.98, de;q=0.97, sr;q=0.96, \
+ja;q=0.95, zh;q=0.94, sv;q=0.93, pt;q=0.92, nb;q=0.91, *;q=0.01" \
+ ko_KR.EUC-KR:en_US.UTF-8:fr_CA:de.UTF-8@euro:sr@latin:ja:zh:sv:pt:nb
+'
+
+test_expect_success 'git client does not send Accept-Language' '
+ test_must_fail env GIT_CURL_VERBOSE=1 LANGUAGE= git clone "$HTTPD_URL/accept/language" 2>stderr &&
+ ! grep "^Accept-Language:" stderr
+'
+
 stop_httpd
 test_done
--
2.2.0

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH v5 1/1] http: Add Accept-Language header if possible

Junio C Hamano
Yi EungJun <[hidden email]> writes:

> From: Yi EungJun <[hidden email]>
>
> Add an Accept-Language header which indicates the user's preferred
> languages defined by $LANGUAGE, $LC_ALL, $LC_MESSAGES and $LANG.
>
> Examples:
>   LANGUAGE= -> ""
>   LANGUAGE=ko:en -> "Accept-Language: ko, en;q=0.9, *;q=0.1"
>   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *;q=0.1"
>   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *;q=0.1"
>
> This gives git servers a chance to display remote error messages in
> the user's preferred language.
>
> Limit the number of languages to 1,000 because q-value must not be
> smaller than 0.001, and limit the length of Accept-Language header to
> 4,000 bytes for some HTTP servers which cannot accept such long header.
>
> Signed-off-by: Yi EungJun <[hidden email]>
> ---
>  http.c                     | 154 +++++++++++++++++++++++++++++++++++++++++++++
>  remote-curl.c              |   2 +
>  t/t5550-http-fetch-dumb.sh |  31 +++++++++
>  3 files changed, 187 insertions(+)
>
> diff --git a/http.c b/http.c
> index 040f362..69624af 100644
> --- a/http.c
> +++ b/http.c
> @@ -68,6 +68,8 @@ static struct curl_slist *no_pragma_header;
>  
>  static struct active_request_slot *active_queue_head;
>  
> +static struct strbuf *cached_accept_language;
> +
>  size_t fread_buffer(char *ptr, size_t eltsize, size_t nmemb, void *buffer_)
>  {
>   size_t size = eltsize * nmemb;
> @@ -515,6 +517,9 @@ void http_cleanup(void)
>   cert_auth.password = NULL;
>   }
>   ssl_cert_password_required = 0;
> +
> + if (cached_accept_language)
> + strbuf_release(cached_accept_language);

Is this correct?

You still keep cached_accept_language pointer itself, so the next
call to get_accept_language() would say "Ah, cached_accept_language
is there, so its contents (which is empty because we released it
here) must be valid and to be reused".  Perhaps you want to free it,
too, i.e.

        if (cached_accept_language) {
                strbuf_release(cached_accept_language);
                free(cached_accept_language);
                cached_accept_language = NULL;
        }

or something?

> +static struct strbuf *get_accept_language(void)
> +{
> + ...
> + if (cached_accept_language)
> + return cached_accept_language;
> +
> + cached_accept_language = xmalloc(sizeof(struct strbuf));
> + ...
> + for (max_q = 1, decimal_places = 0;
> + max_q < num_langs;
> + decimal_places++, max_q *= 10);

Have that "empty statement" on its own separate line, i.e.

        for (a, counter = 0;
             b;
             c, counter++)
             ; /* just counting */

Alternatively, you can make it more obvious that the purpose of loop
is to count, i.e.

        for (a, counter = 0;
             b;
             c)
             counter++;

> +test_expect_success 'git client does not send Accept-Language' '
> + test_must_fail env GIT_CURL_VERBOSE=1 LANGUAGE= git clone "$HTTPD_URL/accept/language" 2>stderr &&
> + ! grep "^Accept-Language:" stderr
> +'

Hmph, this test smells a bit brittle.  What is the reason you expect
"git clone" to fail?  Is it because there is no repository at the
named URL at "$HTTPD_URL/accept/language"?  Is that the only plausible
reason for a failure?

It might be better to use the URL to a repository that is expected
to be served by the server started in this test and expect success.
If it bothers you that "clone" creates a new copy that is otherwise
unused by this test, you can use something like "ls-remote" instead,
I would think.
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH v5 1/1] http: Add Accept-Language header if possible

Eric Sunshine
In reply to this post by Yi EungJun
On Tue, Dec 2, 2014 at 7:12 AM, Yi EungJun <[hidden email]> wrote:

> From: Yi EungJun <[hidden email]>
>
> Add an Accept-Language header which indicates the user's preferred
> languages defined by $LANGUAGE, $LC_ALL, $LC_MESSAGES and $LANG.
>
> Examples:
>   LANGUAGE= -> ""
>   LANGUAGE=ko:en -> "Accept-Language: ko, en;q=0.9, *;q=0.1"
>   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *;q=0.1"
>   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *;q=0.1"
>
> This gives git servers a chance to display remote error messages in
> the user's preferred language.
>
> Limit the number of languages to 1,000 because q-value must not be
> smaller than 0.001, and limit the length of Accept-Language header to
> 4,000 bytes for some HTTP servers which cannot accept such long header.
>
> Signed-off-by: Yi EungJun <[hidden email]>
> ---
> diff --git a/http.c b/http.c
> index 040f362..69624af 100644
> --- a/http.c
> +++ b/http.c
> @@ -68,6 +68,8 @@ static struct curl_slist *no_pragma_header;
>
>  static struct active_request_slot *active_queue_head;
>
> +static struct strbuf *cached_accept_language;

Is there a reason this needs to be a pointer to a strbuf rather than
just a strbuf? That is wouldn't this work?

    static struct strbuf cached_accept_language = STRBUF_INIT;

>  size_t fread_buffer(char *ptr, size_t eltsize, size_t nmemb, void *buffer_)
>  {
>         size_t size = eltsize * nmemb;
> @@ -515,6 +517,9 @@ void http_cleanup(void)
>                 cert_auth.password = NULL;
>         }
>         ssl_cert_password_required = 0;
> +
> +       if (cached_accept_language)
> +               strbuf_release(cached_accept_language);

Junio already mentioned that this is leaking the memory of the strbuf
struct itself which was xmalloc()'d by get_accept_language().

>  }
>
>  struct active_request_slot *get_active_slot(void)
> @@ -986,6 +991,149 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type,
>                 strbuf_addstr(charset, "ISO-8859-1");
>  }
>
> +/*
> + * Guess the user's preferred languages from the value in LANGUAGE environment
> + * variable and LC_MESSAGES locale category.
> + *
> + * The result can be a colon-separated list like "ko:ja:en".
> + */
> +static const char *get_preferred_languages(void)
> +{
> +       const char *retval;
> +
> +       retval = getenv("LANGUAGE");
> +       if (retval && *retval)
> +               return retval;
> +
> +       retval = setlocale(LC_MESSAGES, NULL);
> +       if (retval && *retval &&
> +               strcmp(retval, "C") &&
> +               strcmp(retval, "POSIX"))
> +               return retval;
> +
> +       return NULL;

Mental note: This function will never return an empty string "", even
if LANGUAGE or LC_MESSAGES is empty.

> +}
> +
> +/*
> + * Get an Accept-Language header which indicates user's preferred languages.
> + *
> + * Examples:
> + *   LANGUAGE= -> ""
> + *   LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
> + *   LANGUAGE=ko_KR.UTF-8:sr@latin -> "Accept-Language: ko-KR, sr; q=0.9, *; q=0.1"
> + *   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
> + *   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"
> + *   LANGUAGE= LANG=C -> ""
> + */
> +static struct strbuf *get_accept_language(void)

I find this API a bit strange. Use of strbuf to construct the returned
string is an implementation detail of this function. From the caller's
point of view, it should just be receiving a constant string: one
which it needs neither to modify nor free. Also, if the caller were to
modify the returned strbuf for some reason, then that modification
would impact all future calls to get_accept_language() since the
strbuf is 'static' and not recomputed. Instead, I would expect the
declaration to be:

    static const char *get_accept_language(void)

> +{
> +       const char *lang_begin, *pos;
> +       int q, max_q;
> +       int num_langs;
> +       int decimal_places;
> +       int is_codeset_or_modifier = 0;
> +       char q_format[32];
> +       const int MAX_LANGS = 1000;
> +       const int MAX_SIZE_OF_HEADER = 4000;
> +       int last_size = 0;
> +
> +       if (cached_accept_language)
> +               return cached_accept_language;
> +
> +       cached_accept_language = xmalloc(sizeof(struct strbuf));
> +       strbuf_init(cached_accept_language, 0);
> +       lang_begin = get_preferred_languages();

Mental note: lang_begin will never be the empty string "".

> +       /* Don't add Accept-Language header if no language is preferred. */
> +       if (!lang_begin)
> +               return cached_accept_language;
> +
> +       /* Count number of preferred lang_begin to decide precision of q-factor. */
> +       for (num_langs = 1, pos = lang_begin; *pos; pos++)
> +               if (*pos == ':')
> +                       num_langs++;
> +
> +       /* Decide the precision for q-factor on number of preferred lang_begin. */
> +       num_langs += 1; /* for '*' */
> +
> +       if (MAX_LANGS < num_langs)
> +               num_langs = MAX_LANGS;
> +
> +       for (max_q = 1, decimal_places = 0;
> +               max_q < num_langs;
> +               decimal_places++, max_q *= 10);
> +
> +       sprintf(q_format, ";q=0.%%0%dd", decimal_places);
> +
> +       q = max_q;
> +
> +       strbuf_addstr(cached_accept_language, "Accept-Language: ");
> +
> +       for (pos = lang_begin; ; pos++) {
> +               if (*pos == ':' || !*pos) {
> +                       /* Ignore if this character is the first one. */
> +                       if (pos == lang_begin)
> +                               continue;

If lang_begin were ever to point at an empty string "", then this
logic would access memory beyond the end-of-string. Since
get_preferred_languages() won't return an empty string, this case is
safe, but it's not obvious to the casual reader and some person
modifying the code in the future might not notice this restriction. I
personally would feel more comfortable if the no-empty-string
assumption was documented formally with an assert(*lang_begin) or
die() before entering the loop; or if the code was rewritten to be
less fragile.

> +                       is_codeset_or_modifier = 0;
> +
> +                       /* Put a q-factor only if it is less than 1.0. */
> +                       if (q < max_q)
> +                               strbuf_addf(cached_accept_language, q_format, q);
> +
> +                       if (q > 1)
> +                               q--;
> +
> +                       last_size = cached_accept_language->len;
> +
> +                       /* NULL pos means this is the last language. */

Nit: This feels somewhat backward. It sounds like the comment is
explaining the 'then' part of the 'if' rather than the 'else' part.
Perhaps rephrase like this:

    /* non-NULL pos means more languages */

or, better yet, just drop the comment since the code is self-explanatory.

> +                       if (*pos)
> +                               strbuf_addstr(cached_accept_language, ", ");
> +                       else
> +                               break;
> +
> +               } else if (is_codeset_or_modifier)
> +                       continue;
> +               else if (*pos == '.' || *pos == '@') /* Remove .codeset and @modifier. */
> +                       is_codeset_or_modifier = 1;
> +               else
> +                       strbuf_addch(cached_accept_language, *pos == '_' ? '-' : *pos);
> +
> +               if (cached_accept_language->len > MAX_SIZE_OF_HEADER) {

Mental note: Here you respect MAX_SIZE_OF_HEADER.

> +                       strbuf_remove(cached_accept_language, last_size,
> +                                       cached_accept_language->len - last_size);
> +                       break;
> +               }
> +       }
> +
> +       /* Don't add Accept-Language header if no language is preferred. */

Is this comment correct? The "Accept-Language:" header was already
added to cached_accept_language much earlier in the function.

> +       if (q >= max_q) {
> +               return cached_accept_language;
> +       }

Style: Unnecessary braces.

> +       /* Add '*' with minimum q-factor greater than 0.0. */
> +       strbuf_addstr(cached_accept_language, ", *");
> +       strbuf_addf(cached_accept_language, q_format, 1);

Here you don't respect MAX_SIZE_OF_HEADER.

> +
> +       return cached_accept_language;
> +}
> +
>  /* http_request() targets */
>  #define HTTP_REQUEST_STRBUF    0
>  #define HTTP_REQUEST_FILE      1
> @@ -998,6 +1146,7 @@ static int http_request(const char *url,
>         struct slot_results results;
>         struct curl_slist *headers = NULL;
>         struct strbuf buf = STRBUF_INIT;
> +       struct strbuf *accept_language;
>         int ret;
>
>         slot = get_active_slot();
> @@ -1023,6 +1172,11 @@ static int http_request(const char *url,
>                                          fwrite_buffer);
>         }
>
> +       accept_language = get_accept_language();
> +
> +       if (accept_language && accept_language->len > 0)
> +               headers = curl_slist_append(headers, accept_language->buf);
> +
>         strbuf_addstr(&buf, "Pragma:");
>         if (options && options->no_cache)
>                 strbuf_addstr(&buf, " no-cache");
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH v5 1/1] http: Add Accept-Language header if possible

Junio C Hamano
Eric Sunshine <[hidden email]> writes:

>> @@ -515,6 +517,9 @@ void http_cleanup(void)
>>                 cert_auth.password = NULL;
>>         }
>>         ssl_cert_password_required = 0;
>> +
>> +       if (cached_accept_language)
>> +               strbuf_release(cached_accept_language);
>
> Junio already mentioned that this is leaking the memory of the strbuf
> struct itself which was xmalloc()'d by get_accept_language().

I actually didn't ;-)  A singleton cached_accept_language strbuf
itself being kept around, with its reuse by get_accept_language(),
is fine and is not a leak.  But clearing the strbuf alone will
introduce correctness problem---the second HTTP connection will see
an empty strbuf, get_accept_language() will say "we've already
computed and the header we must issue is an empty string", which is
not correct.

In the fix-up "SQUASH???" commit I queued on top of this patch on
'pu', I had to run "sort -u" on the output to the standard error
stream, as there seemed to be two HTTP connections and the actual
output had two headers, even though the test expected only one in
the output.  I suspect that it is a fallout from this bug that the
original code passed that test that expects only one.

>> +static struct strbuf *get_accept_language(void)
>
> I find this API a bit strange. Use of strbuf to construct the returned
> string is an implementation detail of this function. From the caller's
> point of view, it should just be receiving a constant string: one
> which it needs neither to modify nor free. Also, if the caller were to
> modify the returned strbuf for some reason, then that modification
> would impact all future calls to get_accept_language() since the
> strbuf is 'static' and not recomputed. Instead, I would expect the
> declaration to be:
>
>     static const char *get_accept_language(void)

Makes sense to me.

>> +                       /* Put a q-factor only if it is less than 1.0. */
>> +                       if (q < max_q)
>> +                               strbuf_addf(cached_accept_language, q_format, q);
>> +
>> +                       if (q > 1)
>> +                               q--;

I didn't mention this but if q ever goes below 1, wouldn't it mean
that there is no point continuing this loop?
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH v5 1/1] http: Add Accept-Language header if possible

Michael Blume
On Wed, Dec 3, 2014 at 1:37 PM, Junio C Hamano <[hidden email]> wrote:

> Eric Sunshine <[hidden email]> writes:
>
>>> @@ -515,6 +517,9 @@ void http_cleanup(void)
>>>                 cert_auth.password = NULL;
>>>         }
>>>         ssl_cert_password_required = 0;
>>> +
>>> +       if (cached_accept_language)
>>> +               strbuf_release(cached_accept_language);
>>
>> Junio already mentioned that this is leaking the memory of the strbuf
>> struct itself which was xmalloc()'d by get_accept_language().
>
> I actually didn't ;-)  A singleton cached_accept_language strbuf
> itself being kept around, with its reuse by get_accept_language(),
> is fine and is not a leak.  But clearing the strbuf alone will
> introduce correctness problem---the second HTTP connection will see
> an empty strbuf, get_accept_language() will say "we've already
> computed and the header we must issue is an empty string", which is
> not correct.
>
> In the fix-up "SQUASH???" commit I queued on top of this patch on
> 'pu', I had to run "sort -u" on the output to the standard error
> stream, as there seemed to be two HTTP connections and the actual
> output had two headers, even though the test expected only one in
> the output.  I suspect that it is a fallout from this bug that the
> original code passed that test that expects only one.
>
>>> +static struct strbuf *get_accept_language(void)
>>
>> I find this API a bit strange. Use of strbuf to construct the returned
>> string is an implementation detail of this function. From the caller's
>> point of view, it should just be receiving a constant string: one
>> which it needs neither to modify nor free. Also, if the caller were to
>> modify the returned strbuf for some reason, then that modification
>> would impact all future calls to get_accept_language() since the
>> strbuf is 'static' and not recomputed. Instead, I would expect the
>> declaration to be:
>>
>>     static const char *get_accept_language(void)
>
> Makes sense to me.
>
>>> +                       /* Put a q-factor only if it is less than 1.0. */
>>> +                       if (q < max_q)
>>> +                               strbuf_addf(cached_accept_language, q_format, q);
>>> +
>>> +                       if (q > 1)
>>> +                               q--;
>
> I didn't mention this but if q ever goes below 1, wouldn't it mean
> that there is no point continuing this loop?
> --
> To unsubscribe from this list: send the line "unsubscribe git" in
> the body of a message to [hidden email]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
This seems to be failing under Mac OS for me

not ok 25 - git client sends Accept-Language based on LANGUAGE,
LC_ALL, LC_MESSAGES and LANG
#
# check_language "ko-KR, *;q=0.1" ko_KR.UTF-8 de_DE.UTF-8 ja_JP.UTF-8
en_US.UTF-8 &&
# check_language "de-DE, *;q=0.1" ""          de_DE.UTF-8 ja_JP.UTF-8
en_US.UTF-8 &&
# check_language "ja-JP, *;q=0.1" ""          ""          ja_JP.UTF-8
en_US.UTF-8 &&
# check_language "en-US, *;q=0.1" ""          ""          ""
en_US.UTF-8
#
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH v5 1/1] http: Add Accept-Language header if possible

Michael Blume
On Wed, Dec 3, 2014 at 2:00 PM, Michael Blume <[hidden email]> wrote:

> On Wed, Dec 3, 2014 at 1:37 PM, Junio C Hamano <[hidden email]> wrote:
>> Eric Sunshine <[hidden email]> writes:
>>
>>>> @@ -515,6 +517,9 @@ void http_cleanup(void)
>>>>                 cert_auth.password = NULL;
>>>>         }
>>>>         ssl_cert_password_required = 0;
>>>> +
>>>> +       if (cached_accept_language)
>>>> +               strbuf_release(cached_accept_language);
>>>
>>> Junio already mentioned that this is leaking the memory of the strbuf
>>> struct itself which was xmalloc()'d by get_accept_language().
>>
>> I actually didn't ;-)  A singleton cached_accept_language strbuf
>> itself being kept around, with its reuse by get_accept_language(),
>> is fine and is not a leak.  But clearing the strbuf alone will
>> introduce correctness problem---the second HTTP connection will see
>> an empty strbuf, get_accept_language() will say "we've already
>> computed and the header we must issue is an empty string", which is
>> not correct.
>>
>> In the fix-up "SQUASH???" commit I queued on top of this patch on
>> 'pu', I had to run "sort -u" on the output to the standard error
>> stream, as there seemed to be two HTTP connections and the actual
>> output had two headers, even though the test expected only one in
>> the output.  I suspect that it is a fallout from this bug that the
>> original code passed that test that expects only one.
>>
>>>> +static struct strbuf *get_accept_language(void)
>>>
>>> I find this API a bit strange. Use of strbuf to construct the returned
>>> string is an implementation detail of this function. From the caller's
>>> point of view, it should just be receiving a constant string: one
>>> which it needs neither to modify nor free. Also, if the caller were to
>>> modify the returned strbuf for some reason, then that modification
>>> would impact all future calls to get_accept_language() since the
>>> strbuf is 'static' and not recomputed. Instead, I would expect the
>>> declaration to be:
>>>
>>>     static const char *get_accept_language(void)
>>
>> Makes sense to me.
>>
>>>> +                       /* Put a q-factor only if it is less than 1.0. */
>>>> +                       if (q < max_q)
>>>> +                               strbuf_addf(cached_accept_language, q_format, q);
>>>> +
>>>> +                       if (q > 1)
>>>> +                               q--;
>>
>> I didn't mention this but if q ever goes below 1, wouldn't it mean
>> that there is no point continuing this loop?
>> --
>> To unsubscribe from this list: send the line "unsubscribe git" in
>> the body of a message to [hidden email]
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> This seems to be failing under Mac OS for me
>
> not ok 25 - git client sends Accept-Language based on LANGUAGE,
> LC_ALL, LC_MESSAGES and LANG
> #
> # check_language "ko-KR, *;q=0.1" ko_KR.UTF-8 de_DE.UTF-8 ja_JP.UTF-8
> en_US.UTF-8 &&
> # check_language "de-DE, *;q=0.1" ""          de_DE.UTF-8 ja_JP.UTF-8
> en_US.UTF-8 &&
> # check_language "ja-JP, *;q=0.1" ""          ""          ja_JP.UTF-8
> en_US.UTF-8 &&
> # check_language "en-US, *;q=0.1" ""          ""          ""
> en_US.UTF-8
> #


verbose results

Initialized empty Git repository in
/Users/Shared/Jenkins/Home/jobs/git/workspace/t/trash
directory.t5550-http-fetch-dumb/.git/
expecting success:
git config push.default matching &&
echo content1 >file &&
git add file &&
git commit -m one
echo content2 >file &&
git add file &&
git commit -m two

[master (root-commit) f5983e9] one
 Author: A U Thor <[hidden email]>
 1 file changed, 1 insertion(+)
 create mode 100644 file
[master 2ff8a06] two
 Author: A U Thor <[hidden email]>
 1 file changed, 1 insertion(+), 1 deletion(-)
ok 1 - setup repository

expecting success:
cp -R .git "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
(cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
git config core.bare true &&
mkdir -p hooks &&
echo "exec git update-server-info" >hooks/post-update &&
chmod +x hooks/post-update &&
hooks/post-update
) &&
git remote add public "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
git push public master:master

Everything up-to-date
ok 2 - create http-accessible bare repository with loose objects

expecting success:
git clone $HTTPD_URL/dumb/repo.git clone-tmpl &&
cp -R clone-tmpl clone &&
test_cmp file clone/file

Cloning into 'clone-tmpl'...
ok 3 - clone http repository

expecting success:
mkdir -p "$HTTPD_DOCUMENT_ROOT_PATH/auth/dumb/" &&
cp -Rf "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" \
      "$HTTPD_DOCUMENT_ROOT_PATH/auth/dumb/repo.git"

ok 4 - create password-protected repository

expecting success:
write_script "$TRASH_DIRECTORY/askpass" <<-\EOF &&
echo >>"$TRASH_DIRECTORY/askpass-query" "askpass: $*" &&
case "$*" in
*Username*)
what=user
;;
*Password*)
what=pass
;;
esac &&
cat "$TRASH_DIRECTORY/askpass-$what"
EOF
GIT_ASKPASS="$TRASH_DIRECTORY/askpass" &&
export GIT_ASKPASS &&
export TRASH_DIRECTORY

ok 5 - setup askpass helper

expecting success:
set_askpass wrong &&
test_must_fail git clone "$HTTPD_URL/auth/dumb/repo.git" clone-auth-fail &&
expect_askpass both wrong

Cloning into 'clone-auth-fail'...
fatal: Authentication failed for 'http://127.0.0.1:5550/auth/dumb/repo.git/'
ok 6 - cloning password-protected repository can fail

expecting success:
set_askpass wrong &&
git clone "$HTTPD_URL_USER_PASS/auth/dumb/repo.git" clone-auth-none &&
expect_askpass none

Cloning into 'clone-auth-none'...
ok 7 - http auth can use user/pass in URL

expecting success:
set_askpass wrong pass@host &&
git clone "$HTTPD_URL_USER/auth/dumb/repo.git" clone-auth-pass &&
expect_askpass pass user@host

Cloning into 'clone-auth-pass'...
ok 8 - http auth can use just user in URL

expecting success:
set_askpass user@host pass@host &&
git clone "$HTTPD_URL/auth/dumb/repo.git" clone-auth-both &&
expect_askpass both user@host

Cloning into 'clone-auth-both'...
ok 9 - http auth can request both user and pass

expecting success:
test_config_global credential.helper "!f() {
cat >/dev/null
echo username=user@host
echo password=pass@host
}; f" &&
set_askpass wrong &&
git clone "$HTTPD_URL/auth/dumb/repo.git" clone-auth-helper &&
expect_askpass none

Cloning into 'clone-auth-helper'...
ok 10 - http auth respects credential helper config

expecting success:
test_config_global "credential.$HTTPD_URL.username" user@host &&
set_askpass wrong pass@host &&
git clone "$HTTPD_URL/auth/dumb/repo.git" clone-auth-user &&
expect_askpass pass user@host

Cloning into 'clone-auth-user'...
ok 11 - http auth can get username from config

expecting success:
test_config_global "credential.$HTTPD_URL.username" wrong &&
set_askpass wrong pass@host &&
git clone "$HTTPD_URL_USER/auth/dumb/repo.git" clone-auth-user2 &&
expect_askpass pass user@host

Cloning into 'clone-auth-user2'...
ok 12 - configured username does not override URL

expecting success:
echo content >>file &&
git commit -a -m two &&
git push public &&
(cd clone && git pull) &&
test_cmp file clone/file

[master d4af499] two
 Author: A U Thor <[hidden email]>
 1 file changed, 1 insertion(+)
To /Users/Shared/Jenkins/Home/jobs/git/workspace/t/trash
directory.t5550-http-fetch-dumb/httpd/www/repo.git
   2ff8a06..d4af499  master -> master
From http://127.0.0.1:5550/dumb/repo
   2ff8a06..d4af499  master     -> origin/master
Updating 2ff8a06..d4af499
Fast-forward
 file | 1 +
 1 file changed, 1 insertion(+)
ok 13 - fetch changes via http

expecting success:
cp -R clone-tmpl clone2 &&

HEAD=$(git rev-parse --verify HEAD) &&
(cd clone2 &&
git http-fetch -a -w heads/master-new $HEAD $(git config remote.origin.url) &&
git checkout master-new &&
test $HEAD = $(git rev-parse --verify HEAD)) &&
test_cmp file clone2/file

Switched to branch 'master-new'
ok 14 - fetch changes via manual http-fetch

expecting success:
git push public master:other &&
(cd clone &&
git remote set-head origin -d &&
git remote set-head origin -a &&
git symbolic-ref refs/remotes/origin/HEAD > output &&
echo refs/remotes/origin/master > expect &&
test_cmp expect output
)

To /Users/Shared/Jenkins/Home/jobs/git/workspace/t/trash
directory.t5550-http-fetch-dumb/httpd/www/repo.git
 * [new branch]      master -> other
origin/HEAD set to master
ok 15 - http remote detects correct HEAD

expecting success:
cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo.git
"$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git &&
(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git &&
git --bare repack -a -d
) &&
git clone $HTTPD_URL/dumb/repo_pack.git

Cloning into 'repo_pack'...
ok 16 - fetch packed objects

expecting success:
cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git
"$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git &&
(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git &&
p=`ls objects/pack/pack-*.pack` &&
chmod u+w $p &&
printf %0256d 0 | dd of=$p bs=256 count=1 seek=1 conv=notrunc
) &&
mkdir repo_bad1.git &&
(cd repo_bad1.git &&
git --bare init &&
test_must_fail git --bare fetch $HTTPD_URL/dumb/repo_bad1.git &&
test 0 = `ls objects/pack/pack-*.pack | wc -l`
)

1+0 records in
1+0 records out
256 bytes transferred in 0.000014 secs (18512790 bytes/sec)
Initialized empty Git repository in
/Users/Shared/Jenkins/Home/jobs/git/workspace/t/trash
directory.t5550-http-fetch-dumb/repo_bad1.git/
fatal: pack has bad object at offset 168: inflate returned -5
error: Unable to find d4af499a00b28bc0ab78fa94cc6a449fae19b08d under
http://127.0.0.1:5550/dumb/repo_bad1.git
Cannot obtain needed object d4af499a00b28bc0ab78fa94cc6a449fae19b08d
error: fetch failed.
ls: objects/pack/pack-*.pack: No such file or directory
ok 17 - fetch notices corrupt pack

expecting success:
cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git
"$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git &&
(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git &&
p=`ls objects/pack/pack-*.idx` &&
chmod u+w $p &&
printf %0256d 0 | dd of=$p bs=256 count=1 seek=1 conv=notrunc
) &&
mkdir repo_bad2.git &&
(cd repo_bad2.git &&
git --bare init &&
test_must_fail git --bare fetch $HTTPD_URL/dumb/repo_bad2.git &&
test 0 = `ls objects/pack | wc -l`
)

1+0 records in
1+0 records out
256 bytes transferred in 0.000013 secs (19522579 bytes/sec)
Initialized empty Git repository in
/Users/Shared/Jenkins/Home/jobs/git/workspace/t/trash
directory.t5550-http-fetch-dumb/repo_bad2.git/
error: non-monotonic index
/Users/Shared/Jenkins/Home/jobs/git/workspace/t/trash
directory.t5550-http-fetch-dumb/repo_bad2.git/objects/pack/pack-7244949d8d6e59a30923b3fff7801f159ef4ba5d.idx.temp
error: Unable to find d4af499a00b28bc0ab78fa94cc6a449fae19b08d under
http://127.0.0.1:5550/dumb/repo_bad2.git
Cannot obtain needed object d4af499a00b28bc0ab78fa94cc6a449fae19b08d
error: fetch failed.
ok 18 - fetch notices corrupt idx

expecting success:
grep /git-upload-pack <"$HTTPD_ROOT_PATH"/access.log >act
: >exp
test_cmp exp act

ok 19 - did not use upload-pack service

expecting success:
test_must_fail git clone "$HTTPD_URL/error/text" 2>stderr &&
grep "this is the error message" stderr

remote: this is the error message
ok 20 - git client shows text/plain errors

expecting success:
test_must_fail git clone "$HTTPD_URL/error/html" 2>stderr &&
! grep "this is the error message" stderr

ok 21 - git client does not show html errors

expecting success:
test_must_fail git clone "$HTTPD_URL/error/charset" 2>stderr &&
grep "this is the error message" stderr

remote: this is the error message
ok 22 - git client shows text/plain with a charset

expecting success:
test_must_fail git clone "$HTTPD_URL/error/utf16" 2>stderr &&
grep "this is the error message" stderr

remote: this is the error message
ok 23 - http error messages are reencoded

expecting success:
test_must_fail git clone "$HTTPD_URL/error/odd-spacing" 2>stderr &&
grep "this is the error message" stderr

remote: this is the error message
ok 24 - reencoding is robust to whitespace oddities

expecting success:
check_language "ko-KR, *;q=0.1" ko_KR.UTF-8 de_DE.UTF-8 ja_JP.UTF-8
en_US.UTF-8 &&
check_language "de-DE, *;q=0.1" ""          de_DE.UTF-8 ja_JP.UTF-8
en_US.UTF-8 &&
check_language "ja-JP, *;q=0.1" ""          ""          ja_JP.UTF-8
en_US.UTF-8 &&
check_language "en-US, *;q=0.1" ""          ""          ""          en_US.UTF-8

not ok 25 - git client sends Accept-Language based on LANGUAGE,
LC_ALL, LC_MESSAGES and LANG
#
# check_language "ko-KR, *;q=0.1" ko_KR.UTF-8 de_DE.UTF-8 ja_JP.UTF-8
en_US.UTF-8 &&
# check_language "de-DE, *;q=0.1" ""          de_DE.UTF-8 ja_JP.UTF-8
en_US.UTF-8 &&
# check_language "ja-JP, *;q=0.1" ""          ""          ja_JP.UTF-8
en_US.UTF-8 &&
# check_language "en-US, *;q=0.1" ""          ""          ""
en_US.UTF-8
#

expecting success:
check_language "ko-KR, en-US;q=0.99, fr-CA;q=0.98, de;q=0.97, sr;q=0.96, \
ja;q=0.95, zh;q=0.94, sv;q=0.93, pt;q=0.92, nb;q=0.91, *;q=0.01" \
ko_KR.EUC-KR:en_US.UTF-8:fr_CA:de.UTF-8@euro:sr@latin:ja:zh:sv:pt:nb

ok 26 - git client sends Accept-Language with many preferred languages

expecting success:
GIT_CURL_VERBOSE=1 LANGUAGE= git ls-remote "$HTTPD_URL/dumb/repo.git"
2>stderr &&
! grep "^Accept-Language:" stderr

d4af499a00b28bc0ab78fa94cc6a449fae19b08d HEAD
d4af499a00b28bc0ab78fa94cc6a449fae19b08d refs/heads/master
d4af499a00b28bc0ab78fa94cc6a449fae19b08d refs/heads/other
ok 27 - git client does not send an empty Accept-Language

# failed 1 among 27 test(s)
1..27
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

[PATCH v6 0/1] http: Add Accept-Language header if possible

Yi EungJun
Changes since v5

From Junio C Hamano's review:

* The tests use `ls-remote` instead of `clone` for tests; I copied the test
  code from ba8e63dc30a80656fddc616f714fb217ad220c04.

* Set cached_accept_langauge to NULL after free it.

From Eric Sunshine's review:

* get_accept_language() returns a pointer to const char instead of strbuf; the
  type of cached_accept_language also has been changed to char* from strbuf*

* write_accept_language(), which is extracted from get_accept_language(),
  respects MAX_SIZE_OF_HEADER.

* The for-loop in write_accept_language() works correctly if lang_begin points
  an empty string.

From Jeff King's advice:

* get_preferred_languages() considers LC_MESSAGES only if NO_GETTEXT is not
  defined.

* Remove the tests for LC_MESSAGES, LANG and LC_ALL.

Yi EungJun (1):
  http: Add Accept-Language header if possible

 http.c                     | 173 +++++++++++++++++++++++++++++++++++++++++++++
 remote-curl.c              |   2 +
 t/t5550-http-fetch-dumb.sh |  32 +++++++++
 3 files changed, 207 insertions(+)

--
2.2.0.375.gcd18ce6.dirty

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

[PATCH v6 1/1] http: Add Accept-Language header if possible

Yi EungJun
From: Yi EungJun <[hidden email]>

Add an Accept-Language header which indicates the user's preferred
languages defined by $LANGUAGE, $LC_ALL, $LC_MESSAGES and $LANG.

Examples:
  LANGUAGE= -> ""
  LANGUAGE=ko:en -> "Accept-Language: ko, en;q=0.9, *;q=0.1"
  LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *;q=0.1"
  LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *;q=0.1"

This gives git servers a chance to display remote error messages in
the user's preferred language.

Limit the number of languages to 1,000 because q-value must not be
smaller than 0.001, and limit the length of Accept-Language header to
4,000 bytes for some HTTP servers which cannot accept such long header.

Signed-off-by: Yi EungJun <[hidden email]>
---
 http.c                     | 173 +++++++++++++++++++++++++++++++++++++++++++++
 remote-curl.c              |   2 +
 t/t5550-http-fetch-dumb.sh |  32 +++++++++
 3 files changed, 207 insertions(+)

diff --git a/http.c b/http.c
index 040f362..7a77708 100644
--- a/http.c
+++ b/http.c
@@ -68,6 +68,8 @@ static struct curl_slist *no_pragma_header;
 
 static struct active_request_slot *active_queue_head;
 
+static char *cached_accept_language;
+
 size_t fread_buffer(char *ptr, size_t eltsize, size_t nmemb, void *buffer_)
 {
  size_t size = eltsize * nmemb;
@@ -515,6 +517,11 @@ void http_cleanup(void)
  cert_auth.password = NULL;
  }
  ssl_cert_password_required = 0;
+
+ if (cached_accept_language) {
+ free(cached_accept_language);
+ cached_accept_language = NULL;
+ }
 }
 
 struct active_request_slot *get_active_slot(void)
@@ -986,6 +993,166 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type,
  strbuf_addstr(charset, "ISO-8859-1");
 }
 
+/*
+ * Guess the user's preferred languages from the value in LANGUAGE environment
+ * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined.
+ *
+ * The result can be a colon-separated list like "ko:ja:en".
+ */
+static const char *get_preferred_languages(void)
+{
+ const char *retval;
+
+ retval = getenv("LANGUAGE");
+ if (retval && *retval)
+ return retval;
+
+#ifndef NO_GETTEXT
+ retval = setlocale(LC_MESSAGES, NULL);
+ if (retval && *retval &&
+ strcmp(retval, "C") &&
+ strcmp(retval, "POSIX"))
+ return retval;
+#endif
+
+ return NULL;
+}
+
+static void write_accept_language(struct strbuf *buf)
+{
+ const char *lang_begin, *pos;
+ int q, max_q;
+ int num_langs;
+ int decimal_places;
+ const int CODESET_OR_MODIFIER = 1;
+ const int LANGUAGE_TAG = 2;
+ const int SEPARATOR = 3;
+ int is_q_factor_required = 0;
+ int parse_state = 0;
+ char q_format[32];
+ /*
+ * MAX_LANGS must not be larger than 1,000. If it is larger than that,
+ * q-value will be smaller than 0.001, the minimum q-value the HTTP
+ * specification allows [1].
+ *
+ * [1]: http://tools.ietf.org/html/rfc7231#section-5.3.1
+ */
+ const int MAX_LANGS = 1000;
+ const int MAX_SIZE_OF_HEADER = 4000;
+ const int MAX_SIZE_OF_ASTERISK_ELEMENT = 11; /* for ", *;q=0.001" */
+ int last_size = 0;
+
+ lang_begin = get_preferred_languages();
+
+ /* Don't add Accept-Language header if no language is preferred. */
+ if (!lang_begin)
+ return;
+
+ /* Count number of preferred lang_begin to decide precision of q-factor. */
+ for (num_langs = 1, pos = lang_begin; *pos; pos++)
+ if (*pos == ':')
+ num_langs++;
+
+ /* Decide the precision for q-factor on number of preferred lang_begin. */
+ num_langs += 1; /* for '*' */
+
+ if (MAX_LANGS < num_langs)
+ num_langs = MAX_LANGS;
+
+ for (max_q = 1, decimal_places = 0;
+ max_q < num_langs;
+ decimal_places++, max_q *= 10)
+ ;
+
+ sprintf(q_format, ";q=0.%%0%dd", decimal_places);
+
+ q = max_q;
+
+ strbuf_addstr(buf, "Accept-Language: ");
+
+ /*
+ * Convert a list of colon-separated locale values [1][2] to a list of
+ * comma-separated language tags [3] which can be used as a value of
+ * Accept-Language header.
+ *
+ * [1]: http://pubs.opengroup.org/onlinepubs/007908799/xbd/envvar.html
+ * [2]: http://www.gnu.org/software/libc/manual/html_node/Using-gettextized-software.html
+ * [3]: http://tools.ietf.org/html/rfc7231#section-5.3.5
+ */
+ for (pos = lang_begin; ; pos++) {
+ if (!*pos || *pos == ':') {
+ if (is_q_factor_required) {
+ /* Put a q-factor only if it is less than 1.0. */
+ if (q < max_q)
+ strbuf_addf(buf, q_format, q);
+
+ if (q > 1)
+ q--;
+
+ last_size = buf->len;
+
+ is_q_factor_required = 0;
+ }
+ parse_state = SEPARATOR;
+ } else if (parse_state == CODESET_OR_MODIFIER)
+ continue;
+ else if (*pos == ' ') /* Ignore whitespace character */
+ continue;
+ else if (*pos == '.' || *pos == '@') /* Remove .codeset and @modifier. */
+ parse_state = CODESET_OR_MODIFIER;
+ else {
+ if (parse_state != LANGUAGE_TAG && q < max_q)
+ strbuf_addstr(buf, ", ");
+ strbuf_addch(buf, *pos == '_' ? '-' : *pos);
+ is_q_factor_required = 1;
+ parse_state = LANGUAGE_TAG;
+ }
+
+ if (buf->len > MAX_SIZE_OF_HEADER - MAX_SIZE_OF_ASTERISK_ELEMENT) {
+ strbuf_remove(buf, last_size, buf->len - last_size);
+ break;
+ }
+
+ if (!*pos)
+ break;
+ }
+
+ /* Don't add Accept-Language header if no language is preferred. */
+ if (q >= max_q) {
+ strbuf_reset(buf);
+ return;
+ }
+
+ /* Add '*' with minimum q-factor greater than 0.0. */
+ strbuf_addstr(buf, ", *");
+ strbuf_addf(buf, q_format, 1);
+}
+
+/*
+ * Get an Accept-Language header which indicates user's preferred languages.
+ *
+ * This function always return non-NULL string as strbuf_detach() does.
+ *
+ * Examples:
+ *   LANGUAGE= -> ""
+ *   LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
+ *   LANGUAGE=ko_KR.UTF-8:sr@latin -> "Accept-Language: ko-KR, sr; q=0.9, *; q=0.1"
+ *   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
+ *   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"
+ *   LANGUAGE= LANG=C -> ""
+ */
+static const char *get_accept_language(void)
+{
+ if (!cached_accept_language) {
+ struct strbuf buf = STRBUF_INIT;
+ write_accept_language(&buf);
+ cached_accept_language = strbuf_detach(&buf, NULL);
+ strbuf_release(&buf);
+ }
+
+ return cached_accept_language;
+}
+
 /* http_request() targets */
 #define HTTP_REQUEST_STRBUF 0
 #define HTTP_REQUEST_FILE 1
@@ -998,6 +1165,7 @@ static int http_request(const char *url,
  struct slot_results results;
  struct curl_slist *headers = NULL;
  struct strbuf buf = STRBUF_INIT;
+ const char *accept_language;
  int ret;
 
  slot = get_active_slot();
@@ -1023,6 +1191,11 @@ static int http_request(const char *url,
  fwrite_buffer);
  }
 
+ accept_language = get_accept_language();
+
+ if (strlen(accept_language) > 0)
+ headers = curl_slist_append(headers, accept_language);
+
  strbuf_addstr(&buf, "Pragma:");
  if (options && options->no_cache)
  strbuf_addstr(&buf, " no-cache");
diff --git a/remote-curl.c b/remote-curl.c
index dd63bc2..04989e5 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -962,6 +962,8 @@ int main(int argc, const char **argv)
  struct strbuf buf = STRBUF_INIT;
  int nongit;
 
+ git_setup_gettext();
+
  git_extract_argv0_path(argv[0]);
  setup_git_directory_gently(&nongit);
  if (argc < 2) {
diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh
index ac71418..1a58b97 100755
--- a/t/t5550-http-fetch-dumb.sh
+++ b/t/t5550-http-fetch-dumb.sh
@@ -196,5 +196,37 @@ test_expect_success 'reencoding is robust to whitespace oddities' '
  grep "this is the error message" stderr
 '
 
+check_language () {
+ echo "Accept-Language: $1" >expect &&
+ GIT_CURL_VERBOSE=1 \
+ LANGUAGE=$2 \
+ git ls-remote "$HTTPD_URL/dumb/repo.git" 2>&1 |
+ tr -d '\015' |
+ sort -u >stderr &&
+ grep -i ^Accept-Language: stderr >actual &&
+ test_cmp expect actual
+}
+
+test_expect_success 'git client sends Accept-Language based on LANGUAGE' '
+ check_language "ko-KR, *;q=0.1" ko_KR.UTF-8'
+
+test_expect_success 'git client sends Accept-Language correctly with unordinary LANGUAGE' '
+ check_language "ko-KR, *;q=0.1" "ko_KR:" &&
+ check_language "ko-KR, *;q=0.1" " ko_KR" &&
+ check_language "ko-KR, en-US;q=0.9, *;q=0.1" "ko_KR: en_US" &&
+ check_language "ko-KR, en-US;q=0.9, *;q=0.1" "ko_KR::en_US" &&
+ check_language "ko-KR, *;q=0.1" ":::ko_KR"'
+
+test_expect_success 'git client sends Accept-Language with many preferred languages' '
+ check_language "ko-KR, en-US;q=0.99, fr-CA;q=0.98, de;q=0.97, sr;q=0.96, \
+ja;q=0.95, zh;q=0.94, sv;q=0.93, pt;q=0.92, nb;q=0.91, *;q=0.01" \
+ ko_KR.EUC-KR:en_US.UTF-8:fr_CA:de.UTF-8@euro:sr@latin:ja:zh:sv:pt:nb
+'
+
+test_expect_success 'git client does not send an empty Accept-Language' '
+ GIT_CURL_VERBOSE=1 LANGUAGE= git ls-remote "$HTTPD_URL/dumb/repo.git" 2>stderr &&
+ ! grep "^Accept-Language:" stderr
+'
+
 stop_httpd
 test_done
--
2.2.0.375.gcd18ce6.dirty

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH v6 1/1] http: Add Accept-Language header if possible

Junio C Hamano
Yi EungJun <[hidden email]> writes:

> From: Yi EungJun <[hidden email]>
>
> Add an Accept-Language header which indicates the user's preferred
> languages defined by $LANGUAGE, $LC_ALL, $LC_MESSAGES and $LANG.
>
> Examples:
>   LANGUAGE= -> ""
>   LANGUAGE=ko:en -> "Accept-Language: ko, en;q=0.9, *;q=0.1"
>   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *;q=0.1"
>   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *;q=0.1"
>
> This gives git servers a chance to display remote error messages in
> the user's preferred language.
>
> Limit the number of languages to 1,000 because q-value must not be
> smaller than 0.001, and limit the length of Accept-Language header to
> 4,000 bytes for some HTTP servers which cannot accept such long header.
>
> Signed-off-by: Yi EungJun <[hidden email]>

Overall, this one is a much more pleasant read than the previous
rounds.

> @@ -986,6 +993,166 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type,
>   strbuf_addstr(charset, "ISO-8859-1");
>  }
>  
> +/*
> + * Guess the user's preferred languages from the value in LANGUAGE environment
> + * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined.
> + *
> + * The result can be a colon-separated list like "ko:ja:en".
> + */
> +static const char *get_preferred_languages(void)
> +{
> + const char *retval;
> +
> + retval = getenv("LANGUAGE");
> + if (retval && *retval)
> + return retval;
> +
> +#ifndef NO_GETTEXT
> + retval = setlocale(LC_MESSAGES, NULL);
> + if (retval && *retval &&
> + strcmp(retval, "C") &&
> + strcmp(retval, "POSIX"))
> + return retval;
> +#endif

A tangent.

I wonder if we want to have something silly like this:

        #ifndef NO_GETTEXT
        #define setlocale(x, y) NULL /* or "C"??? */
        #endif

in a common header (e.g. gettext.h) to avoid sprinkling #ifdefs in
our code.  While I do not think we call setlocale() that often to
warrant such a trick, we already do something very similar to make
git_setup_gettext() a no-op in NO_GETTEXT builds in that header
file, and the change in this patch to remote-curl.c does take
advantage of it already, so...

> +static void write_accept_language(struct strbuf *buf)
> +{
> + const char *lang_begin, *pos;
> + int q, max_q;
> + int num_langs;
> + int decimal_places;
> + const int CODESET_OR_MODIFIER = 1;
> + const int LANGUAGE_TAG = 2;
> + const int SEPARATOR = 3;

Another tangent, but I think we tend to use either #define or enum
for constants, not "const int", in our codebase, for symbolic
constants.  In order to define a set of symbolic constants limited
to a function scope, the "const int" way may be nicer than the other
two methods we have traditionally used.  Perhaps we should promote
such use more widely, write our new code following this example, and
migrate existing ones over time?  I dunno.

> ...
> + /* Decide the precision for q-factor on number of preferred lang_begin. */
> + num_langs += 1; /* for '*' */
> +
> + if (MAX_LANGS < num_langs)
> + num_langs = MAX_LANGS;
> +
> + for (max_q = 1, decimal_places = 0;
> + max_q < num_langs;
> + decimal_places++, max_q *= 10)
> + ;

So, if we have 10 languages (num_langs == 10), decimal_places
becomes 1, max_q becomes 10 ...

> + sprintf(q_format, ";q=0.%%0%dd", decimal_places);

... and we will use "q=0.%01d" as the format.  This is OK because
the first one is given without q= so we use the format only nine
times, counting down from 0.9 to 0.1 in 0.1 increments.

Sounds OK to me (I always miscount this kind of loop and need to
think aloud while doing a sanity check).

> + for (pos = lang_begin; ; pos++) {
> + if (!*pos || *pos == ':') {
> + if (is_q_factor_required) {
> + /* Put a q-factor only if it is less than 1.0. */
> + if (q < max_q)
> + strbuf_addf(buf, q_format, q);
> +
> + if (q > 1)
> + q--;

When does this "if" statement not trigger?  It seems to me that it
will stop decrementing only if you have very many languages (e.g.
num_langs was clipped to MAX_LANGS), and at that point you would not
want to scan and add more languages---is there a reason why you keep
going in such a case and not break out of the loop, i.e.

        if (q-- < 1)
                break;

or something like that?

> + ...
> + if (buf->len > MAX_SIZE_OF_HEADER - MAX_SIZE_OF_ASTERISK_ELEMENT) {
> + strbuf_remove(buf, last_size, buf->len - last_size);
> + break;
> + }
> +
> + if (!*pos)
> + break;

Alternatively use one of these breaks when q goes below 1, perhaps?

> +/*
> + * Get an Accept-Language header which indicates user's preferred languages.
> + *
> + * This function always return non-NULL string as strbuf_detach() does.
> + *
> + * Examples:
> + *   LANGUAGE= -> ""
> + *   LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
> + *   LANGUAGE=ko_KR.UTF-8:sr@latin -> "Accept-Language: ko-KR, sr; q=0.9, *; q=0.1"
> + *   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
> + *   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"
> + *   LANGUAGE= LANG=C -> ""
> + */
> +static const char *get_accept_language(void)
> +{
> + if (!cached_accept_language) {
> + struct strbuf buf = STRBUF_INIT;
> + write_accept_language(&buf);
> + cached_accept_language = strbuf_detach(&buf, NULL);
> + strbuf_release(&buf);

If you detached the associated string from the strbuf, you have
already released the resource from it; no need to release it, I
would think.

> diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh
> index ac71418..1a58b97 100755
> --- a/t/t5550-http-fetch-dumb.sh
> +++ b/t/t5550-http-fetch-dumb.sh
> @@ -196,5 +196,37 @@ test_expect_success 'reencoding is robust to whitespace oddities' '
>   grep "this is the error message" stderr
>  '
>  
> +check_language () {
> + echo "Accept-Language: $1" >expect &&
> + GIT_CURL_VERBOSE=1 \
> + LANGUAGE=$2 \
> + git ls-remote "$HTTPD_URL/dumb/repo.git" 2>&1 |
> + tr -d '\015' |
> + sort -u >stderr &&
> + grep -i ^Accept-Language: stderr >actual &&
> + test_cmp expect actual
> +}

This makes it hard to test a case where no Accept-Language: header
should be issued in the request, because at that point we would be
expecting no matching string in the output.

        case "$2" in
        '')
        >expect
                ;;
        ?*)
        echo "Accept-Language: $1" >expect
                ;;
        esac &&
        git ls-remote "$HTTPD_URL/dumb/repo.git" >output 2>&1 &&
        tr -d '\015' <output |
        sort -u |
        sed -ne '/^Accept-Language:/' >actual &&
        test_cmp expect actual

or something like that, perhaps?

And I can see below that we are not testing that "negative" case.

After writing a new shiny feature, it always is tempting to show off
that it triggers when it is expected to and gives an expected
result, but it is equally important to have tests that make sure
that the feature does not trigger when it should not.


> +test_expect_success 'git client sends Accept-Language based on LANGUAGE' '
> + check_language "ko-KR, *;q=0.1" ko_KR.UTF-8'
> +
> +test_expect_success 'git client sends Accept-Language correctly with unordinary LANGUAGE' '
> + check_language "ko-KR, *;q=0.1" "ko_KR:" &&
> + check_language "ko-KR, *;q=0.1" " ko_KR" &&
> + check_language "ko-KR, en-US;q=0.9, *;q=0.1" "ko_KR: en_US" &&
> + check_language "ko-KR, en-US;q=0.9, *;q=0.1" "ko_KR::en_US" &&
> + check_language "ko-KR, *;q=0.1" ":::ko_KR"'
> +
> +test_expect_success 'git client sends Accept-Language with many preferred languages' '
> + check_language "ko-KR, en-US;q=0.99, fr-CA;q=0.98, de;q=0.97, sr;q=0.96, \
> +ja;q=0.95, zh;q=0.94, sv;q=0.93, pt;q=0.92, nb;q=0.91, *;q=0.01" \
> + ko_KR.EUC-KR:en_US.UTF-8:fr_CA:de.UTF-8@euro:sr@latin:ja:zh:sv:pt:nb
> +'
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH v6 1/1] http: Add Accept-Language header if possible

Eric Sunshine
In reply to this post by Yi EungJun
On Mon, Dec 22, 2014 at 11:44 AM, Yi EungJun <[hidden email]> wrote:

> From: Yi EungJun <[hidden email]>
>
> Add an Accept-Language header which indicates the user's preferred
> languages defined by $LANGUAGE, $LC_ALL, $LC_MESSAGES and $LANG.
>
> Examples:
>   LANGUAGE= -> ""
>   LANGUAGE=ko:en -> "Accept-Language: ko, en;q=0.9, *;q=0.1"
>   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *;q=0.1"
>   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *;q=0.1"
>
> This gives git servers a chance to display remote error messages in
> the user's preferred language.
>
> Limit the number of languages to 1,000 because q-value must not be
> smaller than 0.001, and limit the length of Accept-Language header to
> 4,000 bytes for some HTTP servers which cannot accept such long header.

Just a few comments and observations below. Alone, they are not
necessarily worth a re-roll, but if you happen to re-roll for some
other reason, perhaps take them into consideration.

> Signed-off-by: Yi EungJun <[hidden email]>
> ---
> diff --git a/http.c b/http.c
> index 040f362..7a77708 100644
> --- a/http.c
> +++ b/http.c
> @@ -986,6 +993,166 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type,
>                 strbuf_addstr(charset, "ISO-8859-1");
>  }
>
> +static void write_accept_language(struct strbuf *buf)
> +{
> + [...]
> +       /*
> +        * MAX_LANGS must not be larger than 1,000. If it is larger than that,
> +        * q-value will be smaller than 0.001, the minimum q-value the HTTP
> +        * specification allows [1].
> +        *
> +        * [1]: http://tools.ietf.org/html/rfc7231#section-5.3.1
> +        */
> +       const int MAX_LANGS = 1000;
> +       const int MAX_SIZE_OF_HEADER = 4000;
> +       const int MAX_SIZE_OF_ASTERISK_ELEMENT = 11; /* for ", *;q=0.001" */

These two MAX_SIZE_* constants are never used individually, but rather
only as (MAX_SIZE_OF_HEADER - MAX_SIZE_OF_ASTERISK_ELEMENT). It might
be a bit more readable to compute the final value here, with a
suitable comment, rather than at point-of-use. Perhaps something like:

    /* limit of some HTTP servers is 4000 - strlen(", *;q=0.001") */
    const int MAX_HEADER_SIZE = 4000 - 11;

More below.

> + [...]
> +       /*
> +        * Convert a list of colon-separated locale values [1][2] to a list of
> +        * comma-separated language tags [3] which can be used as a value of
> +        * Accept-Language header.
> + [...]
> +        */
> +       for (pos = lang_begin; ; pos++) {
> +               if (!*pos || *pos == ':') {
> +                       if (is_q_factor_required) {
> +                               /* Put a q-factor only if it is less than 1.0. */
> +                               if (q < max_q)
> +                                       strbuf_addf(buf, q_format, q);
> +
> +                               if (q > 1)
> +                                       q--;
> +
> +                               last_size = buf->len;
> +
> +                               is_q_factor_required = 0;
> +                       }
> +                       parse_state = SEPARATOR;
> +               } else if (parse_state == CODESET_OR_MODIFIER)
> +                       continue;
> +               else if (*pos == ' ') /* Ignore whitespace character */
> +                       continue;
> +               else if (*pos == '.' || *pos == '@') /* Remove .codeset and @modifier. */
> +                       parse_state = CODESET_OR_MODIFIER;
> +               else {
> +                       if (parse_state != LANGUAGE_TAG && q < max_q)
> +                               strbuf_addstr(buf, ", ");
> +                       strbuf_addch(buf, *pos == '_' ? '-' : *pos);
> +                       is_q_factor_required = 1;
> +                       parse_state = LANGUAGE_TAG;
> +               }
> +
> +               if (buf->len > MAX_SIZE_OF_HEADER - MAX_SIZE_OF_ASTERISK_ELEMENT) {
> +                       strbuf_remove(buf, last_size, buf->len - last_size);
> +                       break;
> +               }
> +
> +               if (!*pos)
> +                       break;
> +       }

Although often suitable when parsing complex inputs, state machines
demand high cognitive load. The input you're parsing, on the other
hand, is straightforward and can easily be processed with a simple
sequential parser, which is easier to reason about and review for
correctness. For instance, something like this:

    while (*s) {
        /* collect language tag */
        for (; *s && *s != '.' && *s != '@' && *s != ':'; s++)
            strbuf_addch(buf, *s == '_' ? '-' : *s);

        /* skip .codeset and @modifier */
        while (*s && *s != ':')
            s++;

        strbuf_addf(buf, q_format, q);
        ... other bookkeeping ...

        if (*s == ':')
            s++;
    }

This example is intentionally simplified but illustrates the general
idea. It lacks comma insertion (left as an exercise for the reader)
and empty language tag handling (":en", "en::ko"); and doesn't take
whitespace into consideration since it wasn't clear why your v6 parser
pays attention to embedded spaces, whereas your earlier versions did
not.

> +       /* Don't add Accept-Language header if no language is preferred. */
> +       if (q >= max_q) {
> +               strbuf_reset(buf);
> +               return;
> +       }
> +
> +       /* Add '*' with minimum q-factor greater than 0.0. */
> +       strbuf_addstr(buf, ", *");
> +       strbuf_addf(buf, q_format, 1);
> +}
> +
> +/*
> + * Get an Accept-Language header which indicates user's preferred languages.
> + *
> + * This function always return non-NULL string as strbuf_detach() does.

A couple comments:

It's not necessary to explain the public API in terms of an
implementation detail. Callers of this function don't care and don't
need to know that the value was constructed via strbuf, nor that it is
somehow dependent upon the behavior of the underlying implementation
of strbuf_detach().

This is a somewhat unusual contract. It's much more common and
idiomatic in C to return NULL as an indication of "no preference" (or
"failure") than to return an empty string.

> + *
> + * Examples:
> + *   LANGUAGE= -> ""
> + *   LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
> + *   LANGUAGE=ko_KR.UTF-8:sr@latin -> "Accept-Language: ko-KR, sr; q=0.9, *; q=0.1"
> + *   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
> + *   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"
> + *   LANGUAGE= LANG=C -> ""
> + */
> +static const char *get_accept_language(void)
> +{
> +       if (!cached_accept_language) {
> +               struct strbuf buf = STRBUF_INIT;
> +               write_accept_language(&buf);
> +               cached_accept_language = strbuf_detach(&buf, NULL);
> +               strbuf_release(&buf);

Junio already mentioned that strbuf_release() is unnecessary following
strbuf_detach().

> +       }
> +
> +       return cached_accept_language;
> +}
> +
>  /* http_request() targets */
>  #define HTTP_REQUEST_STRBUF    0
>  #define HTTP_REQUEST_FILE      1
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH v6 1/1] http: Add Accept-Language header if possible

Junio C Hamano
Eric Sunshine <[hidden email]> writes:

> Just a few comments and observations below. Alone, they are not
> necessarily worth a re-roll, but if you happen to re-roll for some
> other reason, perhaps take them into consideration.

I actually think everything you said in this review makes sense and
will make the resulting code a lot better (especially the part on
the parsing loop).

Thanks, as usual, for a careful reading.

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

[PATCH v7 0/1] http: Add Accept-Language header if possible

Yi EungJun
From: Yi EungJun <[hidden email]>

Changes since v6

From Junio C Hamano's review:

* Fix check_language() in t5550-http-fetch-dumb.sh as his suggestion.

From Eric Sunshine's review:

* Rewrite the parser without state.

Yi EungJun (1):
  http: Add Accept-Language header if possible

 http.c                     | 152 +++++++++++++++++++++++++++++++++++++++++++++
 remote-curl.c              |   2 +
 t/t5550-http-fetch-dumb.sh |  42 +++++++++++++
 3 files changed, 196 insertions(+)

--
2.2.0.44.g37b3e56.dirty

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

[PATCH v7 1/1] http: Add Accept-Language header if possible

Yi EungJun
From: Yi EungJun <[hidden email]>

Add an Accept-Language header which indicates the user's preferred
languages defined by $LANGUAGE, $LC_ALL, $LC_MESSAGES and $LANG.

Examples:
  LANGUAGE= -> ""
  LANGUAGE=ko:en -> "Accept-Language: ko, en;q=0.9, *;q=0.1"
  LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *;q=0.1"
  LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *;q=0.1"

This gives git servers a chance to display remote error messages in
the user's preferred language.

Limit the number of languages to 1,000 because q-value must not be
smaller than 0.001, and limit the length of Accept-Language header to
4,000 bytes for some HTTP servers which cannot accept such long header.

Signed-off-by: Yi EungJun <[hidden email]>
---
 http.c                     | 152 +++++++++++++++++++++++++++++++++++++++++++++
 remote-curl.c              |   2 +
 t/t5550-http-fetch-dumb.sh |  42 +++++++++++++
 3 files changed, 196 insertions(+)

diff --git a/http.c b/http.c
index 040f362..349b033 100644
--- a/http.c
+++ b/http.c
@@ -68,6 +68,8 @@ static struct curl_slist *no_pragma_header;
 
 static struct active_request_slot *active_queue_head;
 
+static char *cached_accept_language;
+
 size_t fread_buffer(char *ptr, size_t eltsize, size_t nmemb, void *buffer_)
 {
  size_t size = eltsize * nmemb;
@@ -515,6 +517,11 @@ void http_cleanup(void)
  cert_auth.password = NULL;
  }
  ssl_cert_password_required = 0;
+
+ if (cached_accept_language) {
+ free(cached_accept_language);
+ cached_accept_language = NULL;
+ }
 }
 
 struct active_request_slot *get_active_slot(void)
@@ -986,6 +993,145 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type,
  strbuf_addstr(charset, "ISO-8859-1");
 }
 
+/*
+ * Guess the user's preferred languages from the value in LANGUAGE environment
+ * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined.
+ *
+ * The result can be a colon-separated list like "ko:ja:en".
+ */
+static const char *get_preferred_languages(void)
+{
+ const char *retval;
+
+ retval = getenv("LANGUAGE");
+ if (retval && *retval)
+ return retval;
+
+#ifndef NO_GETTEXT
+ retval = setlocale(LC_MESSAGES, NULL);
+ if (retval && *retval &&
+ strcmp(retval, "C") &&
+ strcmp(retval, "POSIX"))
+ return retval;
+#endif
+
+ return NULL;
+}
+
+static void write_accept_language(struct strbuf *buf)
+{
+ /*
+ * MAX_DECIMAL_PLACES must not be larger than 3. If it is larger than
+ * that, q-value will be smaller than 0.001, the minimum q-value the
+ * HTTP specification allows. See
+ * http://tools.ietf.org/html/rfc7231#section-5.3.1 for q-value.
+ */
+ const int MAX_DECIMAL_PLACES = 3;
+ const int MAX_LANGUAGE_TAGS = 1000;
+ const int MAX_ACCEPT_LANGUAGE_HEADER_SIZE = 4000;
+ struct strbuf *language_tags = NULL;
+ int num_langs;
+ const char *s = get_preferred_languages();
+
+ /* Don't add Accept-Language header if no language is preferred. */
+ if (!s)
+ return;
+
+ /*
+ * Split the colon-separated string of preferred languages into
+ * language_tags array.
+ */
+ do {
+ /* increase language_tags array to add new language tag */
+ REALLOC_ARRAY(language_tags, num_langs + 1);
+ strbuf_init(&language_tags[num_langs], 0);
+
+ /* collect language tag */
+ for (; *s && (isalnum(*s) || *s == '_'); s++)
+ strbuf_addch(&language_tags[num_langs], *s == '_' ? '-' : *s);
+
+ /* skip .codeset, @modifier and any other unnecessary parts */
+ while (*s && *s != ':')
+ s++;
+
+ if (language_tags[num_langs].len > 0) {
+ num_langs++;
+ if (num_langs >= MAX_LANGUAGE_TAGS - 1) /* -1 for '*' */
+ break;
+ }
+ } while (*s++);
+
+ /* write Accept-Language header into buf */
+ if (num_langs >= 1) {
+ int i;
+ int last_buf_len;
+ int max_q;
+ int decimal_places;
+ char q_format[32];
+
+ /* add '*' */
+ REALLOC_ARRAY(language_tags, num_langs + 1);
+ strbuf_init(&language_tags[num_langs], 0);
+ strbuf_addstr(&language_tags[num_langs++], "*");
+
+ /* compute decimal_places */
+ for (max_q = 1, decimal_places = 0;
+ max_q < num_langs && decimal_places <= MAX_DECIMAL_PLACES;
+ decimal_places++, max_q *= 10)
+ ;
+
+ sprintf(q_format, ";q=0.%%0%dd", decimal_places);
+
+ strbuf_addstr(buf, "Accept-Language: ");
+
+ for(i = 0; i < num_langs; i++) {
+ if (language_tags[i].len == 0)
+ continue;
+
+ if (i > 0)
+ strbuf_addstr(buf, ", ");
+
+ strbuf_addstr(buf, strbuf_detach(&language_tags[i], NULL));
+
+ if (i > 0)
+ strbuf_addf(buf, q_format, max_q - i);
+
+ if (buf->len > MAX_ACCEPT_LANGUAGE_HEADER_SIZE) {
+ strbuf_remove(buf, last_buf_len, buf->len - last_buf_len);
+ break;
+ }
+
+ last_buf_len = buf->len;
+ }
+ }
+
+ free(language_tags);
+}
+
+/*
+ * Get an Accept-Language header which indicates user's preferred languages.
+ *
+ * This function always return non-NULL string as strbuf_detach() does.
+ *
+ * Examples:
+ *   LANGUAGE= -> ""
+ *   LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
+ *   LANGUAGE=ko_KR.UTF-8:sr@latin -> "Accept-Language: ko-KR, sr; q=0.9, *; q=0.1"
+ *   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
+ *   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"
+ *   LANGUAGE= LANG=C -> ""
+ */
+static const char *get_accept_language(void)
+{
+ if (!cached_accept_language) {
+ struct strbuf buf = STRBUF_INIT;
+ write_accept_language(&buf);
+ cached_accept_language = strbuf_detach(&buf, NULL);
+ }
+
+ return cached_accept_language;
+}
+
 /* http_request() targets */
 #define HTTP_REQUEST_STRBUF 0
 #define HTTP_REQUEST_FILE 1
@@ -998,6 +1144,7 @@ static int http_request(const char *url,
  struct slot_results results;
  struct curl_slist *headers = NULL;
  struct strbuf buf = STRBUF_INIT;
+ const char *accept_language;
  int ret;
 
  slot = get_active_slot();
@@ -1023,6 +1170,11 @@ static int http_request(const char *url,
  fwrite_buffer);
  }
 
+ accept_language = get_accept_language();
+
+ if (strlen(accept_language) > 0)
+ headers = curl_slist_append(headers, accept_language);
+
  strbuf_addstr(&buf, "Pragma:");
  if (options && options->no_cache)
  strbuf_addstr(&buf, " no-cache");
diff --git a/remote-curl.c b/remote-curl.c
index dd63bc2..04989e5 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -962,6 +962,8 @@ int main(int argc, const char **argv)
  struct strbuf buf = STRBUF_INIT;
  int nongit;
 
+ git_setup_gettext();
+
  git_extract_argv0_path(argv[0]);
  setup_git_directory_gently(&nongit);
  if (argc < 2) {
diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh
index ac71418..e1e2938 100755
--- a/t/t5550-http-fetch-dumb.sh
+++ b/t/t5550-http-fetch-dumb.sh
@@ -196,5 +196,47 @@ test_expect_success 'reencoding is robust to whitespace oddities' '
  grep "this is the error message" stderr
 '
 
+check_language () {
+ case "$2" in
+ '')
+ >expect
+ ;;
+ ?*)
+ echo "Accept-Language: $1" >expect
+ ;;
+ esac &&
+ GIT_CURL_VERBOSE=1 \
+ LANGUAGE=$2 \
+ git ls-remote "$HTTPD_URL/dumb/repo.git" >output 2>&1 &&
+ tr -d '\015' <output |
+ sort -u |
+ sed -ne '/^Accept-Language:/ p' >actual &&
+ test_cmp expect actual
+}
+
+test_expect_success 'git client sends Accept-Language based on LANGUAGE' '
+ check_language "ko-KR, *;q=0.9" ko_KR.UTF-8'
+
+test_expect_success 'git client sends Accept-Language correctly with unordinary LANGUAGE' '
+ check_language "ko-KR, *;q=0.9" "ko_KR:" &&
+ check_language "ko-KR, en-US;q=0.9, *;q=0.8" "ko_KR::en_US" &&
+ check_language "ko-KR, *;q=0.9" ":::ko_KR" &&
+ check_language "ko-KR, en-US;q=0.9, *;q=0.8" "ko_KR!!:en_US" &&
+ check_language "ko-KR, ja-JP;q=0.9, *;q=0.8" "ko_KR en_US:ja_JP"'
+
+test_expect_success 'git client sends Accept-Language with many preferred languages' '
+ check_language "ko-KR, en-US;q=0.9, fr-CA;q=0.8, de;q=0.7, sr;q=0.6, \
+ja;q=0.5, zh;q=0.4, sv;q=0.3, pt;q=0.2, *;q=0.1" \
+ ko_KR.EUC-KR:en_US.UTF-8:fr_CA:de.UTF-8@euro:sr@latin:ja:zh:sv:pt &&
+ check_language "ko-KR, en-US;q=0.99, fr-CA;q=0.98, de;q=0.97, sr;q=0.96, \
+ja;q=0.95, zh;q=0.94, sv;q=0.93, pt;q=0.92, nb;q=0.91, *;q=0.90" \
+ ko_KR.EUC-KR:en_US.UTF-8:fr_CA:de.UTF-8@euro:sr@latin:ja:zh:sv:pt:nb
+'
+
+test_expect_success 'git client does not send an empty Accept-Language' '
+ GIT_CURL_VERBOSE=1 LANGUAGE= git ls-remote "$HTTPD_URL/dumb/repo.git" 2>stderr &&
+ ! grep "^Accept-Language:" stderr
+'
+
 stop_httpd
 test_done
--
2.2.0.44.g37b3e56.dirty

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH v7 1/1] http: Add Accept-Language header if possible

Torsten Bögershausen-2
On 18.01.15 13:26, Yi EungJun wrote:
> From: Yi EungJun <[hidden email]>

> diff --git a/http.c b/http.c
> index 040f362..349b033 100644
> --- a/http.c
> +++ b/http.c
> @@ -68,6 +68,8 @@ static struct curl_slist *no_pragma_header;
>  
>  static struct active_request_slot *active_queue_head;
>  
> +static char *cached_accept_language;
> +
>  size_t fread_buffer(char *ptr, size_t eltsize, size_t nmemb, void *buffer_)
>  {
>   size_t size = eltsize * nmemb;
> @@ -515,6 +517,11 @@ void http_cleanup(void)
>   cert_auth.password = NULL;
>   }
>   ssl_cert_password_required = 0;
> +
> + if (cached_accept_language) {
> + free(cached_accept_language);
> + cached_accept_language = NULL;
> + }

Minor remark:
free(NULL) is legal and does nothing.
We can simplify the code somewhat:

  ssl_cert_password_required = 0;
 
  free(cached_accept_language);
  cached_accept_language = NULL;




>  }
>  
>  struct active_request_slot *get_active_slot(void)
> @@ -986,6 +993,145 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type,
>   strbuf_addstr(charset, "ISO-8859-1");
>  }
>  
> +/*
> + * Guess the user's preferred languages from the value in LANGUAGE environment
> + * variable and LC_MESSAGES locale category if NO_GETTEXT is not defined.
> + *
> + * The result can be a colon-separated list like "ko:ja:en".
> + */
> +static const char *get_preferred_languages(void)
> +{
> + const char *retval;
> +
> + retval = getenv("LANGUAGE");
> + if (retval && *retval)
> + return retval;
> +
> +#ifndef NO_GETTEXT
> + retval = setlocale(LC_MESSAGES, NULL);
> + if (retval && *retval &&
> + strcmp(retval, "C") &&
> + strcmp(retval, "POSIX"))
> + return retval;
> +#endif
> +
> + return NULL;
> +}
> +
> +static void write_accept_language(struct strbuf *buf)
> +{
> + /*
> + * MAX_DECIMAL_PLACES must not be larger than 3. If it is larger than
> + * that, q-value will be smaller than 0.001, the minimum q-value the
> + * HTTP specification allows. See
> + * http://tools.ietf.org/html/rfc7231#section-5.3.1 for q-value.
> + */
> + const int MAX_DECIMAL_PLACES = 3;
> + const int MAX_LANGUAGE_TAGS = 1000;
> + const int MAX_ACCEPT_LANGUAGE_HEADER_SIZE = 4000;
> + struct strbuf *language_tags = NULL;
> + int num_langs;
> + const char *s = get_preferred_languages();
> +
> + /* Don't add Accept-Language header if no language is preferred. */
> + if (!s)
> + return;
> +
> + /*
> + * Split the colon-separated string of preferred languages into
> + * language_tags array.
> + */
> + do {
> + /* increase language_tags array to add new language tag */
> + REALLOC_ARRAY(language_tags, num_langs + 1);
> + strbuf_init(&language_tags[num_langs], 0);
> +
> + /* collect language tag */
> + for (; *s && (isalnum(*s) || *s == '_'); s++)
> + strbuf_addch(&language_tags[num_langs], *s == '_' ? '-' : *s);
> +
> + /* skip .codeset, @modifier and any other unnecessary parts */
> + while (*s && *s != ':')
> + s++;
> +
> + if (language_tags[num_langs].len > 0) {
> + num_langs++;
> + if (num_langs >= MAX_LANGUAGE_TAGS - 1) /* -1 for '*' */
> + break;
> + }
> + } while (*s++);
> +
> + /* write Accept-Language header into buf */
> + if (num_langs >= 1) {
> + int i;
> + int last_buf_len;
> + int max_q;
> + int decimal_places;
> + char q_format[32];
> +
> + /* add '*' */
> + REALLOC_ARRAY(language_tags, num_langs + 1);
> + strbuf_init(&language_tags[num_langs], 0);
> + strbuf_addstr(&language_tags[num_langs++], "*");
> +
> + /* compute decimal_places */
> + for (max_q = 1, decimal_places = 0;
> + max_q < num_langs && decimal_places <= MAX_DECIMAL_PLACES;
> + decimal_places++, max_q *= 10)
> + ;
> +
> + sprintf(q_format, ";q=0.%%0%dd", decimal_places);
> +
> + strbuf_addstr(buf, "Accept-Language: ");
> +
> + for(i = 0; i < num_langs; i++) {
> + if (language_tags[i].len == 0)
> + continue;
> +
> + if (i > 0)
> + strbuf_addstr(buf, ", ");
> +
> + strbuf_addstr(buf, strbuf_detach(&language_tags[i], NULL));
> +
> + if (i > 0)
> + strbuf_addf(buf, q_format, max_q - i);
> +
> + if (buf->len > MAX_ACCEPT_LANGUAGE_HEADER_SIZE) {
> + strbuf_remove(buf, last_buf_len, buf->len - last_buf_len);
> + break;
> + }
> +
> + last_buf_len = buf->len;
> + }
> + }
> +
> + free(language_tags);
> +}
> +
> +/*
> + * Get an Accept-Language header which indicates user's preferred languages.
> + *
> + * This function always return non-NULL string as strbuf_detach() does.
> + *
> + * Examples:
> + *   LANGUAGE= -> ""
> + *   LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
> + *   LANGUAGE=ko_KR.UTF-8:sr@latin -> "Accept-Language: ko-KR, sr; q=0.9, *; q=0.1"
> + *   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
> + *   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"
> + *   LANGUAGE= LANG=C -> ""
> + */
> +static const char *get_accept_language(void)
> +{
> + if (!cached_accept_language) {
> + struct strbuf buf = STRBUF_INIT;
> + write_accept_language(&buf);
> + cached_accept_language = strbuf_detach(&buf, NULL);
> + }
> +
> + return cached_accept_language;
> +}
> +
>  /* http_request() targets */
>  #define HTTP_REQUEST_STRBUF 0
>  #define HTTP_REQUEST_FILE 1
> @@ -998,6 +1144,7 @@ static int http_request(const char *url,
>   struct slot_results results;
>   struct curl_slist *headers = NULL;
>   struct strbuf buf = STRBUF_INIT;
> + const char *accept_language;
>   int ret;
>  
>   slot = get_active_slot();
> @@ -1023,6 +1170,11 @@ static int http_request(const char *url,
>   fwrite_buffer);
>   }
>  
> + accept_language = get_accept_language();
> +
> + if (strlen(accept_language) > 0)
> + headers = curl_slist_append(headers, accept_language);
> +
>   strbuf_addstr(&buf, "Pragma:");
>   if (options && options->no_cache)
>   strbuf_addstr(&buf, " no-cache");
> diff --git a/remote-curl.c b/remote-curl.c
> index dd63bc2..04989e5 100644
> --- a/remote-curl.c
> +++ b/remote-curl.c
> @@ -962,6 +962,8 @@ int main(int argc, const char **argv)
>   struct strbuf buf = STRBUF_INIT;
>   int nongit;
>  
> + git_setup_gettext();
> +
>   git_extract_argv0_path(argv[0]);
>   setup_git_directory_gently(&nongit);
>   if (argc < 2) {
> diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh
> index ac71418..e1e2938 100755
> --- a/t/t5550-http-fetch-dumb.sh
> +++ b/t/t5550-http-fetch-dumb.sh
> @@ -196,5 +196,47 @@ test_expect_success 'reencoding is robust to whitespace oddities' '
>   grep "this is the error message" stderr
>  '
>  
> +check_language () {
> + case "$2" in
> + '')
> + >expect
> + ;;
> + ?*)
> + echo "Accept-Language: $1" >expect
> + ;;
> + esac &&
> + GIT_CURL_VERBOSE=1 \
> + LANGUAGE=$2 \
> + git ls-remote "$HTTPD_URL/dumb/repo.git" >output 2>&1 &&
> + tr -d '\015' <output |
> + sort -u |
> + sed -ne '/^Accept-Language:/ p' >actual &&
> + test_cmp expect actual
> +}
> +
> +test_expect_success 'git client sends Accept-Language based on LANGUAGE' '
> + check_language "ko-KR, *;q=0.9" ko_KR.UTF-8'
> +
> +test_expect_success 'git client sends Accept-Language correctly with unordinary LANGUAGE' '
> + check_language "ko-KR, *;q=0.9" "ko_KR:" &&
> + check_language "ko-KR, en-US;q=0.9, *;q=0.8" "ko_KR::en_US" &&
> + check_language "ko-KR, *;q=0.9" ":::ko_KR" &&
> + check_language "ko-KR, en-US;q=0.9, *;q=0.8" "ko_KR!!:en_US" &&
> + check_language "ko-KR, ja-JP;q=0.9, *;q=0.8" "ko_KR en_US:ja_JP"'
> +
> +test_expect_success 'git client sends Accept-Language with many preferred languages' '
> + check_language "ko-KR, en-US;q=0.9, fr-CA;q=0.8, de;q=0.7, sr;q=0.6, \
> +ja;q=0.5, zh;q=0.4, sv;q=0.3, pt;q=0.2, *;q=0.1" \
> + ko_KR.EUC-KR:en_US.UTF-8:fr_CA:de.UTF-8@euro:sr@latin:ja:zh:sv:pt &&
> + check_language "ko-KR, en-US;q=0.99, fr-CA;q=0.98, de;q=0.97, sr;q=0.96, \
> +ja;q=0.95, zh;q=0.94, sv;q=0.93, pt;q=0.92, nb;q=0.91, *;q=0.90" \
> + ko_KR.EUC-KR:en_US.UTF-8:fr_CA:de.UTF-8@euro:sr@latin:ja:zh:sv:pt:nb
> +'
> +
> +test_expect_success 'git client does not send an empty Accept-Language' '
> + GIT_CURL_VERBOSE=1 LANGUAGE= git ls-remote "$HTTPD_URL/dumb/repo.git" 2>stderr &&
> + ! grep "^Accept-Language:" stderr
> +'
> +
>  stop_httpd
>  test_done
>

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH v6 0/1] http: Add Accept-Language header if possible

Eric Sunshine
In reply to this post by Yi EungJun
On Sunday, January 18, 2015, Yi EungJun <[hidden email]> wrote:

> Add an Accept-Language header which indicates the user's preferred
> languages defined by $LANGUAGE, $LC_ALL, $LC_MESSAGES and $LANG.
>
> Examples:
>   LANGUAGE= -> ""
>   LANGUAGE=ko:en -> "Accept-Language: ko, en;q=0.9, *;q=0.1"
>   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *;q=0.1"
>   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *;q=0.1"
>
> This gives git servers a chance to display remote error messages in
> the user's preferred language.
>
> Limit the number of languages to 1,000 because q-value must not be
> smaller than 0.001, and limit the length of Accept-Language header to
> 4,000 bytes for some HTTP servers which cannot accept such long header.
>
> Signed-off-by: Yi EungJun <[hidden email]>
> ---
> diff --git a/http.c b/http.c
> index 040f362..349b033 100644
> --- a/http.c
> +++ b/http.c
> @@ -986,6 +993,145 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type,
>                 strbuf_addstr(charset, "ISO-8859-1");
>  }
>
> +static void write_accept_language(struct strbuf *buf)
> +{
> +       [...]
> +       const int MAX_DECIMAL_PLACES = 3;
> +       const int MAX_LANGUAGE_TAGS = 1000;
> +       const int MAX_ACCEPT_LANGUAGE_HEADER_SIZE = 4000;
> +       struct strbuf *language_tags = NULL;
> +       int num_langs;

Mental note: 'num_langs' is not initialized.

> +       const char *s = get_preferred_languages();
> +
> +       /* Don't add Accept-Language header if no language is preferred. */
> +       if (!s)
> +               return;
> +
> +       /*
> +        * Split the colon-separated string of preferred languages into
> +        * language_tags array.
> +        */
> +       do {
> +               /* increase language_tags array to add new language tag */
> +               REALLOC_ARRAY(language_tags, num_langs + 1);

'num_langs' is used here uninitialized.

> +               strbuf_init(&language_tags[num_langs], 0);
> +
> +               /* collect language tag */
> +               for (; *s && (isalnum(*s) || *s == '_'); s++)
> +                       strbuf_addch(&language_tags[num_langs], *s == '_' ? '-' : *s);
> +
> +               /* skip .codeset, @modifier and any other unnecessary parts */
> +               while (*s && *s != ':')
> +                       s++;
> +
> +               if (language_tags[num_langs].len > 0) {

Mental note: An empty ("") language tag is never allowed in language_tags[].

 > +                       num_langs++;

This is a little bit ugly. At the top of the loop, you allocate space
in the array for a strbuf and initialize it. However, if the language
tag is empty (""), then 'num_langs' is never incremented, so the next
time through the loop, strbuf_init() is invoked on the same block of
memory (assuming the realloc was a no-op since the allocation size did
not change), overwriting whatever was there and possibly leaking
memory. In this particular case, by examining the parser code closely,
we can see that nothing was added to the strbuf, so nothing is being
leaked the next time around, given the current implementation of
strbuf.

However, this is potentially fragile. A change to the implementation
of strbuf in the future (for instance, if strbuf_init() allocates
memory immediately) could result in a leak here. Moreover, this
no-leak situation only holds true if no text at all has been added to
the strbuf after strbuf_init(). If someone changes the parser in the
future to operate a bit differently so that some text is added and
then removed from the strbuf, even though the end result still has
length is 0, then it will start leaking.

One way to make this more robust would be to have a separate strbuf
for collecting the language tag. When you encounter a non-empty tag,
only then grow the array and initialize the new strbuf in the array.
Finally, use strbuf_swap() to swap the collected language tag into the
new array position. Something like this:

    struct strbuf tag = STRBUF_INIT;
    do {
         for (; *s && (isalnum(*s) || *s == '_'); s++)
             strbuf_addch(&tag, *s == '_' ? '-' : *s);

        [...]

        if (tag.len) {
            num_langs++;
            REALLOC_ARRAY(language_tags, num_langs);
            strbuf_init(&language_tags[num_langs], 0);
            strbuf_swap(&tag, &language_tags[num_langs]);

            if (num_langs >= ...)
                break;
        }
    while (...);
    strbuf_release(&tag);

> +                       if (num_langs >= MAX_LANGUAGE_TAGS - 1) /* -1 for '*' */
> +                               break;
> +               }
> +       } while (*s++);
> +
> +       /* write Accept-Language header into buf */
> +       if (num_langs >= 1) {
> +               int i;
> +               int last_buf_len;

Mental note: 'last_buf_len' is not initialized.

> +               int max_q;
> +               int decimal_places;
> +               char q_format[32];
> +
> +               /* add '*' */
> +               REALLOC_ARRAY(language_tags, num_langs + 1);
> +               strbuf_init(&language_tags[num_langs], 0);
> +               strbuf_addstr(&language_tags[num_langs++], "*");
> +
> +               /* compute decimal_places */
> +               for (max_q = 1, decimal_places = 0;
> +                               max_q < num_langs && decimal_places <= MAX_DECIMAL_PLACES;
> +                               decimal_places++, max_q *= 10)
> +                       ;
> +
> +               sprintf(q_format, ";q=0.%%0%dd", decimal_places);
> +
> +               strbuf_addstr(buf, "Accept-Language: ");
> +
> +               for(i = 0; i < num_langs; i++) {
> +                       if (language_tags[i].len == 0)
> +                               continue;

The parsing code does not allow empty tags ("") in language_tags[], so
this conditional is useless, isn't it?

> +
> +                       if (i > 0)
> +                               strbuf_addstr(buf, ", ");
> +
> +                       strbuf_addstr(buf, strbuf_detach(&language_tags[i], NULL));

This leaks the string detached from 'language_tag[i]' since
strbuf_addstr() does not take ownership of it.

> +                       if (i > 0)
> +                               strbuf_addf(buf, q_format, max_q - i);
> +
> +                       if (buf->len > MAX_ACCEPT_LANGUAGE_HEADER_SIZE) {
> +                               strbuf_remove(buf, last_buf_len, buf->len - last_buf_len);

'last_buf_len' is (potentially) used here uninitialized the first time
through loop.

> +                               break;
> +                       }
> +
> +                       last_buf_len = buf->len;
> +               }
> +       }
> +
> +       free(language_tags);

This _seems_ to be okay since strbuf_detach() was invoked for each
strbuf in language_tags[], however, those strings were in fact leaked
(as noted above), so it's not actually correct.

> +}
> +
> +/*
> + * Get an Accept-Language header which indicates user's preferred languages.
> + *
> + * This function always return non-NULL string as strbuf_detach() does.

Repeating from [1]: It's not good form to describe the published API
in terms of an implementation detail (strbuf_detach). Also, it would
be more idiomatic in C to return NULL rather than empty string.

[1]: http://article.gmane.org/gmane.comp.version-control.git/261810/

> + *
> + * Examples:
> + *   LANGUAGE= -> ""
> + *   LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
> + *   LANGUAGE=ko_KR.UTF-8:sr@latin -> "Accept-Language: ko-KR, sr; q=0.9, *; q=0.1"
> + *   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
> + *   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"
> + *   LANGUAGE= LANG=C -> ""
> + */
> +static const char *get_accept_language(void)
> +{
> +       if (!cached_accept_language) {
> +               struct strbuf buf = STRBUF_INIT;
> +               write_accept_language(&buf);
> +               cached_accept_language = strbuf_detach(&buf, NULL);
> +       }
> +
> +       return cached_accept_language;
> +}
> +
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [hidden email]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
123