1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007, 2008,
4 2009, 2010 Free Software Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 /* Written by Paul Eggert <eggert@twinsun.com> */
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) msgid
41 # define SIZE_MAX ((size_t) -1)
44 #define INT_BITS (sizeof (int) * CHAR_BIT)
46 struct quoting_options
48 /* Basic quoting style. */
49 enum quoting_style style;
51 /* Additional flags. Bitwise combination of enum quoting_flags. */
54 /* Quote the characters indicated by this bit vector even if the
55 quoting style would not normally require them to be quoted. */
56 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
58 /* The left quote for custom_quoting_style. */
59 char const *left_quote;
61 /* The right quote for custom_quoting_style. */
62 char const *right_quote;
65 /* Names of quoting styles. */
66 char const *const quoting_style_args[] =
79 /* Correspondences to quoting style names. */
80 enum quoting_style const quoting_style_vals[] =
82 literal_quoting_style,
84 shell_always_quoting_style,
86 c_maybe_quoting_style,
92 /* The default quoting options. */
93 static struct quoting_options default_quoting_options;
95 /* Allocate a new set of quoting options, with contents initially identical
96 to O if O is not null, or to the default if O is null.
97 It is the caller's responsibility to free the result. */
98 struct quoting_options *
99 clone_quoting_options (struct quoting_options *o)
102 struct quoting_options *p = xmemdup (o ? o : &default_quoting_options,
108 /* Get the value of O's quoting style. If O is null, use the default. */
110 get_quoting_style (struct quoting_options *o)
112 return (o ? o : &default_quoting_options)->style;
115 /* In O (or in the default if O is null),
116 set the value of the quoting style to S. */
118 set_quoting_style (struct quoting_options *o, enum quoting_style s)
120 (o ? o : &default_quoting_options)->style = s;
123 /* In O (or in the default if O is null),
124 set the value of the quoting options for character C to I.
125 Return the old value. Currently, the only values defined for I are
126 0 (the default) and 1 (which means to quote the character even if
127 it would not otherwise be quoted). */
129 set_char_quoting (struct quoting_options *o, char c, int i)
131 unsigned char uc = c;
133 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
134 int shift = uc % INT_BITS;
135 int r = (*p >> shift) & 1;
136 *p ^= ((i & 1) ^ r) << shift;
140 /* In O (or in the default if O is null),
141 set the value of the quoting options flag to I, which can be a
142 bitwise combination of enum quoting_flags, or 0 for default
143 behavior. Return the old value. */
145 set_quoting_flags (struct quoting_options *o, int i)
149 o = &default_quoting_options;
156 set_custom_quoting (struct quoting_options *o,
157 char const *left_quote, char const *right_quote)
160 o = &default_quoting_options;
161 o->style = custom_quoting_style;
162 if (!left_quote || !right_quote)
164 o->left_quote = left_quote;
165 o->right_quote = right_quote;
168 /* Return quoting options for STYLE, with no extra quoting. */
169 static struct quoting_options
170 quoting_options_from_style (enum quoting_style style)
172 struct quoting_options o;
175 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
179 /* MSGID approximates a quotation mark. Return its translation if it
180 has one; otherwise, return either it or "\"", depending on S. */
182 gettext_quote (char const *msgid, enum quoting_style s)
184 char const *translation = _(msgid);
185 if (translation == msgid && s == clocale_quoting_style)
190 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
191 argument ARG (of size ARGSIZE), using QUOTING_STYLE, FLAGS, and
192 QUOTE_THESE_TOO to control quoting.
193 Terminate the output with a null character, and return the written
194 size of the output, not counting the terminating null.
195 If BUFFERSIZE is too small to store the output string, return the
196 value that would have been returned had BUFFERSIZE been large enough.
197 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
199 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
200 ARGSIZE, O), except it breaks O into its component pieces and is
201 not careful about errno. */
204 quotearg_buffer_restyled (char *buffer, size_t buffersize,
205 char const *arg, size_t argsize,
206 enum quoting_style quoting_style, int flags,
207 unsigned int const *quote_these_too,
208 char const *left_quote,
209 char const *right_quote)
213 char const *quote_string = 0;
214 size_t quote_string_len = 0;
215 bool backslash_escapes = false;
216 bool unibyte_locale = MB_CUR_MAX == 1;
217 bool elide_outer_quotes = (flags & QA_ELIDE_OUTER_QUOTES) != 0;
222 if (len < buffersize) \
228 switch (quoting_style)
230 case c_maybe_quoting_style:
231 quoting_style = c_quoting_style;
232 elide_outer_quotes = true;
234 case c_quoting_style:
235 if (!elide_outer_quotes)
237 backslash_escapes = true;
239 quote_string_len = 1;
242 case escape_quoting_style:
243 backslash_escapes = true;
244 elide_outer_quotes = false;
247 case locale_quoting_style:
248 case clocale_quoting_style:
249 case custom_quoting_style:
251 if (quoting_style != custom_quoting_style)
254 Get translations for open and closing quotation marks.
256 The message catalog should translate "`" to a left
257 quotation mark suitable for the locale, and similarly for
258 "'". If the catalog has no translation,
259 locale_quoting_style quotes `like this', and
260 clocale_quoting_style quotes "like this".
262 For example, an American English Unicode locale should
263 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
264 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
265 MARK). A British English Unicode locale should instead
266 translate these to U+2018 (LEFT SINGLE QUOTATION MARK)
267 and U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
269 If you don't know what to put here, please see
270 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
271 and use glyphs suitable for your language. */
272 left_quote = gettext_quote (N_("`"), quoting_style);
273 right_quote = gettext_quote (N_("'"), quoting_style);
275 if (!elide_outer_quotes)
276 for (quote_string = left_quote; *quote_string; quote_string++)
277 STORE (*quote_string);
278 backslash_escapes = true;
279 quote_string = right_quote;
280 quote_string_len = strlen (quote_string);
284 case shell_quoting_style:
285 quoting_style = shell_always_quoting_style;
286 elide_outer_quotes = true;
288 case shell_always_quoting_style:
289 if (!elide_outer_quotes)
292 quote_string_len = 1;
295 case literal_quoting_style:
296 elide_outer_quotes = false;
303 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
307 bool is_right_quote = false;
309 if (backslash_escapes
311 && i + quote_string_len <= argsize
312 && memcmp (arg + i, quote_string, quote_string_len) == 0)
314 if (elide_outer_quotes)
315 goto force_outer_quoting_style;
316 is_right_quote = true;
323 if (backslash_escapes)
325 if (elide_outer_quotes)
326 goto force_outer_quoting_style;
328 /* If quote_string were to begin with digits, we'd need to
329 test for the end of the arg as well. However, it's
330 hard to imagine any locale that would use digits in
331 quotes, and set_custom_quoting is documented not to
333 if (i + 1 < argsize && '0' <= arg[i + 1] && arg[i + 1] <= '9')
339 /* We don't have to worry that this last '0' will be
340 backslash-escaped because, again, quote_string should
341 not start with it and because quote_these_too is
342 documented as not accepting it. */
344 else if (flags & QA_ELIDE_NULL_BYTES)
349 switch (quoting_style)
351 case shell_always_quoting_style:
352 if (elide_outer_quotes)
353 goto force_outer_quoting_style;
356 case c_quoting_style:
357 if ((flags & QA_SPLIT_TRIGRAPHS)
358 && i + 2 < argsize && arg[i + 1] == '?')
362 case '(': case ')': case '-': case '/':
363 case '<': case '=': case '>':
364 /* Escape the second '?' in what would otherwise be
366 if (elide_outer_quotes)
367 goto force_outer_quoting_style;
386 case '\a': esc = 'a'; goto c_escape;
387 case '\b': esc = 'b'; goto c_escape;
388 case '\f': esc = 'f'; goto c_escape;
389 case '\n': esc = 'n'; goto c_and_shell_escape;
390 case '\r': esc = 'r'; goto c_and_shell_escape;
391 case '\t': esc = 't'; goto c_and_shell_escape;
392 case '\v': esc = 'v'; goto c_escape;
394 /* No need to escape the escape if we are trying to elide
395 outer quotes and nothing else is problematic. */
396 if (backslash_escapes && elide_outer_quotes && quote_string_len)
400 if (quoting_style == shell_always_quoting_style
401 && elide_outer_quotes)
402 goto force_outer_quoting_style;
405 if (backslash_escapes)
412 case '{': case '}': /* sometimes special if isolated */
413 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
421 case '!': /* special in bash */
422 case '"': case '$': case '&':
423 case '(': case ')': case '*': case ';':
425 case '=': /* sometimes special in 0th or (with "set -k") later args */
427 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
429 /* A shell special character. In theory, '$' and '`' could
430 be the first bytes of multibyte characters, which means
431 we should check them with mbrtowc, but in practice this
432 doesn't happen so it's not worth worrying about. */
433 if (quoting_style == shell_always_quoting_style
434 && elide_outer_quotes)
435 goto force_outer_quoting_style;
439 if (quoting_style == shell_always_quoting_style)
441 if (elide_outer_quotes)
442 goto force_outer_quoting_style;
449 case '%': case '+': case ',': case '-': case '.': case '/':
450 case '0': case '1': case '2': case '3': case '4': case '5':
451 case '6': case '7': case '8': case '9': case ':':
452 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
453 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
454 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
455 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
456 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
457 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
458 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
459 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
460 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
461 /* These characters don't cause problems, no matter what the
462 quoting style is. They cannot start multibyte sequences.
463 A digit or a special letter would cause trouble if it
464 appeared at the beginning of quote_string because we'd then
465 escape by prepending a backslash. However, it's hard to
466 imagine any locale that would use digits or letters as
467 quotes, and set_custom_quoting is documented not to accept
468 them. Also, a digit or a special letter would cause
469 trouble if it appeared in quote_these_too, but that's also
470 documented as not accepting them. */
474 /* If we have a multibyte sequence, copy it until we reach
475 its end, find an error, or come back to the initial shift
476 state. For C-like styles, if the sequence has
477 unprintable characters, escape the whole sequence, since
478 we can't easily escape single characters within it. */
480 /* Length of multibyte sequence found so far. */
488 printable = isprint (c) != 0;
493 memset (&mbstate, 0, sizeof mbstate);
497 if (argsize == SIZE_MAX)
498 argsize = strlen (arg);
503 size_t bytes = mbrtowc (&w, &arg[i + m],
504 argsize - (i + m), &mbstate);
507 else if (bytes == (size_t) -1)
512 else if (bytes == (size_t) -2)
515 while (i + m < argsize && arg[i + m])
521 /* Work around a bug with older shells that "see" a '\'
522 that is really the 2nd byte of a multibyte character.
523 In practice the problem is limited to ASCII
524 chars >= '@' that are shell special chars. */
525 if ('[' == 0x5b && elide_outer_quotes
526 && quoting_style == shell_always_quoting_style)
529 for (j = 1; j < bytes; j++)
530 switch (arg[i + m + j])
532 case '[': case '\\': case '^':
534 goto force_outer_quoting_style;
546 while (! mbsinit (&mbstate));
549 if (1 < m || (backslash_escapes && ! printable))
551 /* Output a multibyte sequence, or an escaped
552 unprintable unibyte character. */
557 if (backslash_escapes && ! printable)
559 if (elide_outer_quotes)
560 goto force_outer_quoting_style;
562 STORE ('0' + (c >> 6));
563 STORE ('0' + ((c >> 3) & 7));
566 else if (is_right_quote)
569 is_right_quote = false;
582 if (! ((backslash_escapes || elide_outer_quotes)
584 && quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))
589 if (elide_outer_quotes)
590 goto force_outer_quoting_style;
597 if (len == 0 && quoting_style == shell_always_quoting_style
598 && elide_outer_quotes)
599 goto force_outer_quoting_style;
601 if (quote_string && !elide_outer_quotes)
602 for (; *quote_string; quote_string++)
603 STORE (*quote_string);
605 if (len < buffersize)
609 force_outer_quoting_style:
610 /* Don't reuse quote_these_too, since the addition of outer quotes
611 sufficiently quotes the specified characters. */
612 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
614 flags & ~QA_ELIDE_OUTER_QUOTES, NULL,
615 left_quote, right_quote);
618 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
619 argument ARG (of size ARGSIZE), using O to control quoting.
620 If O is null, use the default.
621 Terminate the output with a null character, and return the written
622 size of the output, not counting the terminating null.
623 If BUFFERSIZE is too small to store the output string, return the
624 value that would have been returned had BUFFERSIZE been large enough.
625 If ARGSIZE is SIZE_MAX, use the string length of the argument for
628 quotearg_buffer (char *buffer, size_t buffersize,
629 char const *arg, size_t argsize,
630 struct quoting_options const *o)
632 struct quoting_options const *p = o ? o : &default_quoting_options;
634 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
635 p->style, p->flags, p->quote_these_too,
636 p->left_quote, p->right_quote);
641 /* Equivalent to quotearg_alloc (ARG, ARGSIZE, NULL, O). */
643 quotearg_alloc (char const *arg, size_t argsize,
644 struct quoting_options const *o)
646 return quotearg_alloc_mem (arg, argsize, NULL, o);
649 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
650 allocated storage containing the quoted string, and store the
651 resulting size into *SIZE, if non-NULL. The result can contain
652 embedded null bytes only if ARGSIZE is not SIZE_MAX, SIZE is not
653 NULL, and set_quoting_flags has not set the null byte elision
656 quotearg_alloc_mem (char const *arg, size_t argsize, size_t *size,
657 struct quoting_options const *o)
659 struct quoting_options const *p = o ? o : &default_quoting_options;
661 /* Elide embedded null bytes if we can't return a size. */
662 int flags = p->flags | (size ? 0 : QA_ELIDE_NULL_BYTES);
663 size_t bufsize = quotearg_buffer_restyled (0, 0, arg, argsize, p->style,
664 flags, p->quote_these_too,
667 char *buf = xcharalloc (bufsize);
668 quotearg_buffer_restyled (buf, bufsize, arg, argsize, p->style, flags,
670 p->left_quote, p->right_quote);
677 /* A storage slot with size and pointer to a value. */
684 /* Preallocate a slot 0 buffer, so that the caller can always quote
685 one small component of a "memory exhausted" message in slot 0. */
686 static char slot0[256];
687 static unsigned int nslots = 1;
688 static struct slotvec slotvec0 = {sizeof slot0, slot0};
689 static struct slotvec *slotvec = &slotvec0;
694 struct slotvec *sv = slotvec;
696 for (i = 1; i < nslots; i++)
698 if (sv[0].val != slot0)
701 slotvec0.size = sizeof slot0;
702 slotvec0.val = slot0;
712 /* Use storage slot N to return a quoted version of argument ARG.
713 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
714 null-terminated string.
715 OPTIONS specifies the quoting options.
716 The returned value points to static storage that can be
717 reused by the next call to this function with the same value of N.
718 N must be nonnegative. N is deliberately declared with type "int"
719 to allow for future extensions (using negative values). */
721 quotearg_n_options (int n, char const *arg, size_t argsize,
722 struct quoting_options const *options)
727 struct slotvec *sv = slotvec;
734 /* FIXME: technically, the type of n1 should be `unsigned int',
735 but that evokes an unsuppressible warning from gcc-4.0.1 and
736 older. If gcc ever provides an option to suppress that warning,
737 revert to the original type, so that the test in xalloc_oversized
738 is once again performed only at compile time. */
740 bool preallocated = (sv == &slotvec0);
742 if (xalloc_oversized (n1, sizeof *sv))
745 slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv);
748 memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv);
753 size_t size = sv[n].size;
754 char *val = sv[n].val;
755 /* Elide embedded null bytes since we don't return a size. */
756 int flags = options->flags | QA_ELIDE_NULL_BYTES;
757 size_t qsize = quotearg_buffer_restyled (val, size, arg, argsize,
758 options->style, flags,
759 options->quote_these_too,
761 options->right_quote);
765 sv[n].size = size = qsize + 1;
768 sv[n].val = val = xcharalloc (size);
769 quotearg_buffer_restyled (val, size, arg, argsize, options->style,
770 flags, options->quote_these_too,
772 options->right_quote);
781 quotearg_n (int n, char const *arg)
783 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
787 quotearg_n_mem (int n, char const *arg, size_t argsize)
789 return quotearg_n_options (n, arg, argsize, &default_quoting_options);
793 quotearg (char const *arg)
795 return quotearg_n (0, arg);
799 quotearg_mem (char const *arg, size_t argsize)
801 return quotearg_n_mem (0, arg, argsize);
805 quotearg_n_style (int n, enum quoting_style s, char const *arg)
807 struct quoting_options const o = quoting_options_from_style (s);
808 return quotearg_n_options (n, arg, SIZE_MAX, &o);
812 quotearg_n_style_mem (int n, enum quoting_style s,
813 char const *arg, size_t argsize)
815 struct quoting_options const o = quoting_options_from_style (s);
816 return quotearg_n_options (n, arg, argsize, &o);
820 quotearg_style (enum quoting_style s, char const *arg)
822 return quotearg_n_style (0, s, arg);
826 quotearg_style_mem (enum quoting_style s, char const *arg, size_t argsize)
828 return quotearg_n_style_mem (0, s, arg, argsize);
832 quotearg_char_mem (char const *arg, size_t argsize, char ch)
834 struct quoting_options options;
835 options = default_quoting_options;
836 set_char_quoting (&options, ch, 1);
837 return quotearg_n_options (0, arg, argsize, &options);
841 quotearg_char (char const *arg, char ch)
843 return quotearg_char_mem (arg, SIZE_MAX, ch);
847 quotearg_colon (char const *arg)
849 return quotearg_char (arg, ':');
853 quotearg_colon_mem (char const *arg, size_t argsize)
855 return quotearg_char_mem (arg, argsize, ':');
859 quotearg_n_custom (int n, char const *left_quote,
860 char const *right_quote, char const *arg)
862 return quotearg_n_custom_mem (n, left_quote, right_quote, arg,
867 quotearg_n_custom_mem (int n, char const *left_quote,
868 char const *right_quote,
869 char const *arg, size_t argsize)
871 struct quoting_options o = default_quoting_options;
872 set_custom_quoting (&o, left_quote, right_quote);
873 return quotearg_n_options (n, arg, argsize, &o);
877 quotearg_custom (char const *left_quote, char const *right_quote,
880 return quotearg_n_custom (0, left_quote, right_quote, arg);
884 quotearg_custom_mem (char const *left_quote, char const *right_quote,
885 char const *arg, size_t argsize)
887 return quotearg_n_custom_mem (0, left_quote, right_quote, arg,