1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007,
4 2008 Free Software Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 /* Written by Paul Eggert <eggert@twinsun.com> */
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) msgid
41 # define SIZE_MAX ((size_t) -1)
44 #define INT_BITS (sizeof (int) * CHAR_BIT)
46 struct quoting_options
48 /* Basic quoting style. */
49 enum quoting_style style;
51 /* Additional flags. Bitwise combination of enum quoting_flags. */
54 /* Quote the characters indicated by this bit vector even if the
55 quoting style would not normally require them to be quoted. */
56 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
58 /* The left quote for custom_quoting_style. */
59 char const *left_quote;
61 /* The right quote for custom_quoting_style. */
62 char const *right_quote;
65 /* Names of quoting styles. */
66 char const *const quoting_style_args[] =
79 /* Correspondences to quoting style names. */
80 enum quoting_style const quoting_style_vals[] =
82 literal_quoting_style,
84 shell_always_quoting_style,
86 c_maybe_quoting_style,
92 /* The default quoting options. */
93 static struct quoting_options default_quoting_options;
95 /* Allocate a new set of quoting options, with contents initially identical
96 to O if O is not null, or to the default if O is null.
97 It is the caller's responsibility to free the result. */
98 struct quoting_options *
99 clone_quoting_options (struct quoting_options *o)
102 struct quoting_options *p = xmemdup (o ? o : &default_quoting_options,
108 /* Get the value of O's quoting style. If O is null, use the default. */
110 get_quoting_style (struct quoting_options *o)
112 return (o ? o : &default_quoting_options)->style;
115 /* In O (or in the default if O is null),
116 set the value of the quoting style to S. */
118 set_quoting_style (struct quoting_options *o, enum quoting_style s)
120 (o ? o : &default_quoting_options)->style = s;
123 /* In O (or in the default if O is null),
124 set the value of the quoting options for character C to I.
125 Return the old value. Currently, the only values defined for I are
126 0 (the default) and 1 (which means to quote the character even if
127 it would not otherwise be quoted). */
129 set_char_quoting (struct quoting_options *o, char c, int i)
131 unsigned char uc = c;
133 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
134 int shift = uc % INT_BITS;
135 int r = (*p >> shift) & 1;
136 *p ^= ((i & 1) ^ r) << shift;
140 /* In O (or in the default if O is null),
141 set the value of the quoting options flag to I, which can be a
142 bitwise combination of enum quoting_flags, or 0 for default
143 behavior. Return the old value. */
145 set_quoting_flags (struct quoting_options *o, int i)
149 o = &default_quoting_options;
156 set_custom_quoting (struct quoting_options *o,
157 char const *left_quote, char const *right_quote)
160 o = &default_quoting_options;
161 o->style = custom_quoting_style;
162 if (!left_quote || !right_quote)
164 o->left_quote = left_quote;
165 o->right_quote = right_quote;
168 /* Return quoting options for STYLE, with no extra quoting. */
169 static struct quoting_options
170 quoting_options_from_style (enum quoting_style style)
172 struct quoting_options o;
175 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
179 /* MSGID approximates a quotation mark. Return its translation if it
180 has one; otherwise, return either it or "\"", depending on S. */
182 gettext_quote (char const *msgid, enum quoting_style s)
184 char const *translation = _(msgid);
185 if (translation == msgid && s == clocale_quoting_style)
190 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
191 argument ARG (of size ARGSIZE), using QUOTING_STYLE, FLAGS, and
192 QUOTE_THESE_TOO to control quoting.
193 Terminate the output with a null character, and return the written
194 size of the output, not counting the terminating null.
195 If BUFFERSIZE is too small to store the output string, return the
196 value that would have been returned had BUFFERSIZE been large enough.
197 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
199 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
200 ARGSIZE, O), except it breaks O into its component pieces and is
201 not careful about errno. */
204 quotearg_buffer_restyled (char *buffer, size_t buffersize,
205 char const *arg, size_t argsize,
206 enum quoting_style quoting_style, int flags,
207 unsigned int const *quote_these_too,
208 char const *left_quote,
209 char const *right_quote)
213 char const *quote_string = 0;
214 size_t quote_string_len = 0;
215 bool backslash_escapes = false;
216 bool unibyte_locale = MB_CUR_MAX == 1;
217 bool elide_outer_quotes = (flags & QA_ELIDE_OUTER_QUOTES) != 0;
222 if (len < buffersize) \
228 switch (quoting_style)
230 case c_maybe_quoting_style:
231 quoting_style = c_quoting_style;
232 elide_outer_quotes = true;
234 case c_quoting_style:
235 if (!elide_outer_quotes)
237 backslash_escapes = true;
239 quote_string_len = 1;
242 case escape_quoting_style:
243 backslash_escapes = true;
244 elide_outer_quotes = false;
247 case locale_quoting_style:
248 case clocale_quoting_style:
249 case custom_quoting_style:
251 if (quoting_style != custom_quoting_style)
254 Get translations for open and closing quotation marks.
256 The message catalog should translate "`" to a left
257 quotation mark suitable for the locale, and similarly for
258 "'". If the catalog has no translation,
259 locale_quoting_style quotes `like this', and
260 clocale_quoting_style quotes "like this".
262 For example, an American English Unicode locale should
263 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
264 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
265 MARK). A British English Unicode locale should instead
266 translate these to U+2018 (LEFT SINGLE QUOTATION MARK)
267 and U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
269 If you don't know what to put here, please see
270 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
271 and use glyphs suitable for your language. */
272 left_quote = gettext_quote (N_("`"), quoting_style);
273 right_quote = gettext_quote (N_("'"), quoting_style);
275 if (!elide_outer_quotes)
276 for (quote_string = left_quote; *quote_string; quote_string++)
277 STORE (*quote_string);
278 backslash_escapes = true;
279 quote_string = right_quote;
280 quote_string_len = strlen (quote_string);
284 case shell_quoting_style:
285 quoting_style = shell_always_quoting_style;
286 elide_outer_quotes = true;
288 case shell_always_quoting_style:
289 if (!elide_outer_quotes)
292 quote_string_len = 1;
295 case literal_quoting_style:
296 elide_outer_quotes = false;
303 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
307 bool is_right_quote = false;
309 if (backslash_escapes
311 && i + quote_string_len <= argsize
312 && memcmp (arg + i, quote_string, quote_string_len) == 0)
314 if (elide_outer_quotes)
315 goto force_outer_quoting_style;
316 is_right_quote = true;
323 if (backslash_escapes)
325 if (elide_outer_quotes)
326 goto force_outer_quoting_style;
328 /* If quote_string were to begin with digits, we'd need to
329 test for the end of the arg as well. However, it's
330 hard to imagine any locale that would use digits in
331 quotes, and set_custom_quoting is documented not to
333 if (i + 1 < argsize && '0' <= arg[i + 1] && arg[i + 1] <= '9')
340 else if (flags & QA_ELIDE_NULL_BYTES)
345 switch (quoting_style)
347 case shell_always_quoting_style:
348 if (elide_outer_quotes)
349 goto force_outer_quoting_style;
352 case c_quoting_style:
353 if ((flags & QA_SPLIT_TRIGRAPHS)
354 && i + 2 < argsize && arg[i + 1] == '?')
358 case '(': case ')': case '-': case '/':
359 case '<': case '=': case '>':
360 /* Escape the second '?' in what would otherwise be
362 if (elide_outer_quotes)
363 goto force_outer_quoting_style;
382 case '\a': esc = 'a'; goto c_escape;
383 case '\b': esc = 'b'; goto c_escape;
384 case '\f': esc = 'f'; goto c_escape;
385 case '\n': esc = 'n'; goto c_and_shell_escape;
386 case '\r': esc = 'r'; goto c_and_shell_escape;
387 case '\t': esc = 't'; goto c_and_shell_escape;
388 case '\v': esc = 'v'; goto c_escape;
390 /* No need to escape the escape if we are trying to elide
391 outer quotes and nothing else is problematic. */
392 if (backslash_escapes && elide_outer_quotes && quote_string_len)
396 if (quoting_style == shell_always_quoting_style
397 && elide_outer_quotes)
398 goto force_outer_quoting_style;
401 if (backslash_escapes)
408 case '{': case '}': /* sometimes special if isolated */
409 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
417 case '!': /* special in bash */
418 case '"': case '$': case '&':
419 case '(': case ')': case '*': case ';':
421 case '=': /* sometimes special in 0th or (with "set -k") later args */
423 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
425 /* A shell special character. In theory, '$' and '`' could
426 be the first bytes of multibyte characters, which means
427 we should check them with mbrtowc, but in practice this
428 doesn't happen so it's not worth worrying about. */
429 if (quoting_style == shell_always_quoting_style
430 && elide_outer_quotes)
431 goto force_outer_quoting_style;
435 if (quoting_style == shell_always_quoting_style)
437 if (elide_outer_quotes)
438 goto force_outer_quoting_style;
445 case '%': case '+': case ',': case '-': case '.': case '/':
446 case '0': case '1': case '2': case '3': case '4': case '5':
447 case '6': case '7': case '8': case '9': case ':':
448 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
449 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
450 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
451 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
452 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
453 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
454 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
455 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
456 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
457 /* These characters don't cause problems, no matter what the
458 quoting style is. They cannot start multibyte sequences.
459 A digit or a special letter would cause trouble if it
460 appeared at the beginning of quote_string because we'd then
461 escape by prepending a backslash. However, it's hard to
462 imagine any locale that would use digits or letters as
463 quotes, and set_custom_quoting is documented not to accept
468 /* If we have a multibyte sequence, copy it until we reach
469 its end, find an error, or come back to the initial shift
470 state. For C-like styles, if the sequence has
471 unprintable characters, escape the whole sequence, since
472 we can't easily escape single characters within it. */
474 /* Length of multibyte sequence found so far. */
482 printable = isprint (c) != 0;
487 memset (&mbstate, 0, sizeof mbstate);
491 if (argsize == SIZE_MAX)
492 argsize = strlen (arg);
497 size_t bytes = mbrtowc (&w, &arg[i + m],
498 argsize - (i + m), &mbstate);
501 else if (bytes == (size_t) -1)
506 else if (bytes == (size_t) -2)
509 while (i + m < argsize && arg[i + m])
515 /* Work around a bug with older shells that "see" a '\'
516 that is really the 2nd byte of a multibyte character.
517 In practice the problem is limited to ASCII
518 chars >= '@' that are shell special chars. */
519 if ('[' == 0x5b && elide_outer_quotes
520 && quoting_style == shell_always_quoting_style)
523 for (j = 1; j < bytes; j++)
524 switch (arg[i + m + j])
526 case '[': case '\\': case '^':
528 goto force_outer_quoting_style;
540 while (! mbsinit (&mbstate));
543 if (1 < m || (backslash_escapes && ! printable))
545 /* Output a multibyte sequence, or an escaped
546 unprintable unibyte character. */
551 if (backslash_escapes && ! printable)
553 if (elide_outer_quotes)
554 goto force_outer_quoting_style;
556 STORE ('0' + (c >> 6));
557 STORE ('0' + ((c >> 3) & 7));
560 else if (is_right_quote)
563 is_right_quote = false;
576 if (! ((backslash_escapes || elide_outer_quotes)
578 && quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))
583 if (elide_outer_quotes)
584 goto force_outer_quoting_style;
591 if (len == 0 && quoting_style == shell_always_quoting_style
592 && elide_outer_quotes)
593 goto force_outer_quoting_style;
595 if (quote_string && !elide_outer_quotes)
596 for (; *quote_string; quote_string++)
597 STORE (*quote_string);
599 if (len < buffersize)
603 force_outer_quoting_style:
604 /* Don't reuse quote_these_too, since the addition of outer quotes
605 sufficiently quotes the specified characters. */
606 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
608 flags & ~QA_ELIDE_OUTER_QUOTES, NULL,
609 left_quote, right_quote);
612 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
613 argument ARG (of size ARGSIZE), using O to control quoting.
614 If O is null, use the default.
615 Terminate the output with a null character, and return the written
616 size of the output, not counting the terminating null.
617 If BUFFERSIZE is too small to store the output string, return the
618 value that would have been returned had BUFFERSIZE been large enough.
619 If ARGSIZE is SIZE_MAX, use the string length of the argument for
622 quotearg_buffer (char *buffer, size_t buffersize,
623 char const *arg, size_t argsize,
624 struct quoting_options const *o)
626 struct quoting_options const *p = o ? o : &default_quoting_options;
628 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
629 p->style, p->flags, p->quote_these_too,
630 p->left_quote, p->right_quote);
635 /* Equivalent to quotearg_alloc (ARG, ARGSIZE, NULL, O). */
637 quotearg_alloc (char const *arg, size_t argsize,
638 struct quoting_options const *o)
640 return quotearg_alloc_mem (arg, argsize, NULL, o);
643 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
644 allocated storage containing the quoted string, and store the
645 resulting size into *SIZE, if non-NULL. The result can contain
646 embedded null bytes only if ARGSIZE is not SIZE_MAX, SIZE is not
647 NULL, and set_quoting_flags has not set the null byte elision
650 quotearg_alloc_mem (char const *arg, size_t argsize, size_t *size,
651 struct quoting_options const *o)
653 struct quoting_options const *p = o ? o : &default_quoting_options;
655 /* Elide embedded null bytes if we can't return a size. */
656 int flags = p->flags | (size ? 0 : QA_ELIDE_NULL_BYTES);
657 size_t bufsize = quotearg_buffer_restyled (0, 0, arg, argsize, p->style,
658 flags, p->quote_these_too,
661 char *buf = xcharalloc (bufsize);
662 quotearg_buffer_restyled (buf, bufsize, arg, argsize, p->style, flags,
664 p->left_quote, p->right_quote);
671 /* A storage slot with size and pointer to a value. */
678 /* Preallocate a slot 0 buffer, so that the caller can always quote
679 one small component of a "memory exhausted" message in slot 0. */
680 static char slot0[256];
681 static unsigned int nslots = 1;
682 static struct slotvec slotvec0 = {sizeof slot0, slot0};
683 static struct slotvec *slotvec = &slotvec0;
688 struct slotvec *sv = slotvec;
690 for (i = 1; i < nslots; i++)
692 if (sv[0].val != slot0)
695 slotvec0.size = sizeof slot0;
696 slotvec0.val = slot0;
706 /* Use storage slot N to return a quoted version of argument ARG.
707 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
708 null-terminated string.
709 OPTIONS specifies the quoting options.
710 The returned value points to static storage that can be
711 reused by the next call to this function with the same value of N.
712 N must be nonnegative. N is deliberately declared with type "int"
713 to allow for future extensions (using negative values). */
715 quotearg_n_options (int n, char const *arg, size_t argsize,
716 struct quoting_options const *options)
721 struct slotvec *sv = slotvec;
728 /* FIXME: technically, the type of n1 should be `unsigned int',
729 but that evokes an unsuppressible warning from gcc-4.0.1 and
730 older. If gcc ever provides an option to suppress that warning,
731 revert to the original type, so that the test in xalloc_oversized
732 is once again performed only at compile time. */
734 bool preallocated = (sv == &slotvec0);
736 if (xalloc_oversized (n1, sizeof *sv))
739 slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv);
742 memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv);
747 size_t size = sv[n].size;
748 char *val = sv[n].val;
749 /* Elide embedded null bytes since we don't return a size. */
750 int flags = options->flags | QA_ELIDE_NULL_BYTES;
751 size_t qsize = quotearg_buffer_restyled (val, size, arg, argsize,
752 options->style, flags,
753 options->quote_these_too,
755 options->right_quote);
759 sv[n].size = size = qsize + 1;
762 sv[n].val = val = xcharalloc (size);
763 quotearg_buffer_restyled (val, size, arg, argsize, options->style,
764 flags, options->quote_these_too,
766 options->right_quote);
775 quotearg_n (int n, char const *arg)
777 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
781 quotearg_n_mem (int n, char const *arg, size_t argsize)
783 return quotearg_n_options (n, arg, argsize, &default_quoting_options);
787 quotearg (char const *arg)
789 return quotearg_n (0, arg);
793 quotearg_mem (char const *arg, size_t argsize)
795 return quotearg_n_mem (0, arg, argsize);
799 quotearg_n_style (int n, enum quoting_style s, char const *arg)
801 struct quoting_options const o = quoting_options_from_style (s);
802 return quotearg_n_options (n, arg, SIZE_MAX, &o);
806 quotearg_n_style_mem (int n, enum quoting_style s,
807 char const *arg, size_t argsize)
809 struct quoting_options const o = quoting_options_from_style (s);
810 return quotearg_n_options (n, arg, argsize, &o);
814 quotearg_style (enum quoting_style s, char const *arg)
816 return quotearg_n_style (0, s, arg);
820 quotearg_style_mem (enum quoting_style s, char const *arg, size_t argsize)
822 return quotearg_n_style_mem (0, s, arg, argsize);
826 quotearg_char_mem (char const *arg, size_t argsize, char ch)
828 struct quoting_options options;
829 options = default_quoting_options;
830 set_char_quoting (&options, ch, 1);
831 return quotearg_n_options (0, arg, argsize, &options);
835 quotearg_char (char const *arg, char ch)
837 return quotearg_char_mem (arg, SIZE_MAX, ch);
841 quotearg_colon (char const *arg)
843 return quotearg_char (arg, ':');
847 quotearg_colon_mem (char const *arg, size_t argsize)
849 return quotearg_char_mem (arg, argsize, ':');
853 quotearg_n_custom (int n, char const *left_quote,
854 char const *right_quote, char const *arg)
856 return quotearg_n_custom_mem (n, left_quote, right_quote, arg,
861 quotearg_n_custom_mem (int n, char const *left_quote,
862 char const *right_quote,
863 char const *arg, size_t argsize)
865 struct quoting_options o = default_quoting_options;
866 set_custom_quoting (&o, left_quote, right_quote);
867 return quotearg_n_options (n, arg, argsize, &o);
871 quotearg_custom (char const *left_quote, char const *right_quote,
874 return quotearg_n_custom (0, left_quote, right_quote, arg);
878 quotearg_custom_mem (char const *left_quote, char const *right_quote,
879 char const *arg, size_t argsize)
881 return quotearg_n_custom_mem (0, left_quote, right_quote, arg,