1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998-2002, 2004-2011 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 /* Written by Paul Eggert <eggert@twinsun.com> */
36 #define _(msgid) gettext (msgid)
37 #define N_(msgid) msgid
40 # define SIZE_MAX ((size_t) -1)
43 #define INT_BITS (sizeof (int) * CHAR_BIT)
45 struct quoting_options
47 /* Basic quoting style. */
48 enum quoting_style style;
50 /* Additional flags. Bitwise combination of enum quoting_flags. */
53 /* Quote the characters indicated by this bit vector even if the
54 quoting style would not normally require them to be quoted. */
55 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
57 /* The left quote for custom_quoting_style. */
58 char const *left_quote;
60 /* The right quote for custom_quoting_style. */
61 char const *right_quote;
64 /* Names of quoting styles. */
65 char const *const quoting_style_args[] =
78 /* Correspondences to quoting style names. */
79 enum quoting_style const quoting_style_vals[] =
81 literal_quoting_style,
83 shell_always_quoting_style,
85 c_maybe_quoting_style,
91 /* The default quoting options. */
92 static struct quoting_options default_quoting_options;
94 /* Allocate a new set of quoting options, with contents initially identical
95 to O if O is not null, or to the default if O is null.
96 It is the caller's responsibility to free the result. */
97 struct quoting_options *
98 clone_quoting_options (struct quoting_options *o)
101 struct quoting_options *p = xmemdup (o ? o : &default_quoting_options,
107 /* Get the value of O's quoting style. If O is null, use the default. */
109 get_quoting_style (struct quoting_options *o)
111 return (o ? o : &default_quoting_options)->style;
114 /* In O (or in the default if O is null),
115 set the value of the quoting style to S. */
117 set_quoting_style (struct quoting_options *o, enum quoting_style s)
119 (o ? o : &default_quoting_options)->style = s;
122 /* In O (or in the default if O is null),
123 set the value of the quoting options for character C to I.
124 Return the old value. Currently, the only values defined for I are
125 0 (the default) and 1 (which means to quote the character even if
126 it would not otherwise be quoted). */
128 set_char_quoting (struct quoting_options *o, char c, int i)
130 unsigned char uc = c;
132 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
133 int shift = uc % INT_BITS;
134 int r = (*p >> shift) & 1;
135 *p ^= ((i & 1) ^ r) << shift;
139 /* In O (or in the default if O is null),
140 set the value of the quoting options flag to I, which can be a
141 bitwise combination of enum quoting_flags, or 0 for default
142 behavior. Return the old value. */
144 set_quoting_flags (struct quoting_options *o, int i)
148 o = &default_quoting_options;
155 set_custom_quoting (struct quoting_options *o,
156 char const *left_quote, char const *right_quote)
159 o = &default_quoting_options;
160 o->style = custom_quoting_style;
161 if (!left_quote || !right_quote)
163 o->left_quote = left_quote;
164 o->right_quote = right_quote;
167 /* Return quoting options for STYLE, with no extra quoting. */
168 static struct quoting_options
169 quoting_options_from_style (enum quoting_style style)
171 struct quoting_options o = { 0 };
172 if (style == custom_quoting_style)
178 /* MSGID approximates a quotation mark. Return its translation if it
179 has one; otherwise, return either it or "\"", depending on S. */
181 gettext_quote (char const *msgid, enum quoting_style s)
183 char const *translation = _(msgid);
184 if (translation == msgid && s == clocale_quoting_style)
189 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
190 argument ARG (of size ARGSIZE), using QUOTING_STYLE, FLAGS, and
191 QUOTE_THESE_TOO to control quoting.
192 Terminate the output with a null character, and return the written
193 size of the output, not counting the terminating null.
194 If BUFFERSIZE is too small to store the output string, return the
195 value that would have been returned had BUFFERSIZE been large enough.
196 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
198 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
199 ARGSIZE, O), except it breaks O into its component pieces and is
200 not careful about errno. */
203 quotearg_buffer_restyled (char *buffer, size_t buffersize,
204 char const *arg, size_t argsize,
205 enum quoting_style quoting_style, int flags,
206 unsigned int const *quote_these_too,
207 char const *left_quote,
208 char const *right_quote)
212 char const *quote_string = 0;
213 size_t quote_string_len = 0;
214 bool backslash_escapes = false;
215 bool unibyte_locale = MB_CUR_MAX == 1;
216 bool elide_outer_quotes = (flags & QA_ELIDE_OUTER_QUOTES) != 0;
221 if (len < buffersize) \
227 switch (quoting_style)
229 case c_maybe_quoting_style:
230 quoting_style = c_quoting_style;
231 elide_outer_quotes = true;
233 case c_quoting_style:
234 if (!elide_outer_quotes)
236 backslash_escapes = true;
238 quote_string_len = 1;
241 case escape_quoting_style:
242 backslash_escapes = true;
243 elide_outer_quotes = false;
246 case locale_quoting_style:
247 case clocale_quoting_style:
248 case custom_quoting_style:
250 if (quoting_style != custom_quoting_style)
253 Get translations for open and closing quotation marks.
255 The message catalog should translate "`" to a left
256 quotation mark suitable for the locale, and similarly for
257 "'". If the catalog has no translation,
258 locale_quoting_style quotes `like this', and
259 clocale_quoting_style quotes "like this".
261 For example, an American English Unicode locale should
262 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
263 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
264 MARK). A British English Unicode locale should instead
265 translate these to U+2018 (LEFT SINGLE QUOTATION MARK)
266 and U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
268 If you don't know what to put here, please see
269 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
270 and use glyphs suitable for your language. */
271 left_quote = gettext_quote (N_("`"), quoting_style);
272 right_quote = gettext_quote (N_("'"), quoting_style);
274 if (!elide_outer_quotes)
275 for (quote_string = left_quote; *quote_string; quote_string++)
276 STORE (*quote_string);
277 backslash_escapes = true;
278 quote_string = right_quote;
279 quote_string_len = strlen (quote_string);
283 case shell_quoting_style:
284 quoting_style = shell_always_quoting_style;
285 elide_outer_quotes = true;
287 case shell_always_quoting_style:
288 if (!elide_outer_quotes)
291 quote_string_len = 1;
294 case literal_quoting_style:
295 elide_outer_quotes = false;
302 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
306 bool is_right_quote = false;
308 if (backslash_escapes
310 && i + quote_string_len <= argsize
311 && memcmp (arg + i, quote_string, quote_string_len) == 0)
313 if (elide_outer_quotes)
314 goto force_outer_quoting_style;
315 is_right_quote = true;
322 if (backslash_escapes)
324 if (elide_outer_quotes)
325 goto force_outer_quoting_style;
327 /* If quote_string were to begin with digits, we'd need to
328 test for the end of the arg as well. However, it's
329 hard to imagine any locale that would use digits in
330 quotes, and set_custom_quoting is documented not to
332 if (i + 1 < argsize && '0' <= arg[i + 1] && arg[i + 1] <= '9')
338 /* We don't have to worry that this last '0' will be
339 backslash-escaped because, again, quote_string should
340 not start with it and because quote_these_too is
341 documented as not accepting it. */
343 else if (flags & QA_ELIDE_NULL_BYTES)
348 switch (quoting_style)
350 case shell_always_quoting_style:
351 if (elide_outer_quotes)
352 goto force_outer_quoting_style;
355 case c_quoting_style:
356 if ((flags & QA_SPLIT_TRIGRAPHS)
357 && i + 2 < argsize && arg[i + 1] == '?')
361 case '(': case ')': case '-': case '/':
362 case '<': case '=': case '>':
363 /* Escape the second '?' in what would otherwise be
365 if (elide_outer_quotes)
366 goto force_outer_quoting_style;
385 case '\a': esc = 'a'; goto c_escape;
386 case '\b': esc = 'b'; goto c_escape;
387 case '\f': esc = 'f'; goto c_escape;
388 case '\n': esc = 'n'; goto c_and_shell_escape;
389 case '\r': esc = 'r'; goto c_and_shell_escape;
390 case '\t': esc = 't'; goto c_and_shell_escape;
391 case '\v': esc = 'v'; goto c_escape;
393 /* No need to escape the escape if we are trying to elide
394 outer quotes and nothing else is problematic. */
395 if (backslash_escapes && elide_outer_quotes && quote_string_len)
399 if (quoting_style == shell_always_quoting_style
400 && elide_outer_quotes)
401 goto force_outer_quoting_style;
404 if (backslash_escapes)
411 case '{': case '}': /* sometimes special if isolated */
412 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
420 case '!': /* special in bash */
421 case '"': case '$': case '&':
422 case '(': case ')': case '*': case ';':
424 case '=': /* sometimes special in 0th or (with "set -k") later args */
426 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
428 /* A shell special character. In theory, '$' and '`' could
429 be the first bytes of multibyte characters, which means
430 we should check them with mbrtowc, but in practice this
431 doesn't happen so it's not worth worrying about. */
432 if (quoting_style == shell_always_quoting_style
433 && elide_outer_quotes)
434 goto force_outer_quoting_style;
438 if (quoting_style == shell_always_quoting_style)
440 if (elide_outer_quotes)
441 goto force_outer_quoting_style;
448 case '%': case '+': case ',': case '-': case '.': case '/':
449 case '0': case '1': case '2': case '3': case '4': case '5':
450 case '6': case '7': case '8': case '9': case ':':
451 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
452 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
453 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
454 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
455 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
456 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
457 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
458 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
459 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
460 /* These characters don't cause problems, no matter what the
461 quoting style is. They cannot start multibyte sequences.
462 A digit or a special letter would cause trouble if it
463 appeared at the beginning of quote_string because we'd then
464 escape by prepending a backslash. However, it's hard to
465 imagine any locale that would use digits or letters as
466 quotes, and set_custom_quoting is documented not to accept
467 them. Also, a digit or a special letter would cause
468 trouble if it appeared in quote_these_too, but that's also
469 documented as not accepting them. */
473 /* If we have a multibyte sequence, copy it until we reach
474 its end, find an error, or come back to the initial shift
475 state. For C-like styles, if the sequence has
476 unprintable characters, escape the whole sequence, since
477 we can't easily escape single characters within it. */
479 /* Length of multibyte sequence found so far. */
487 printable = isprint (c) != 0;
492 memset (&mbstate, 0, sizeof mbstate);
496 if (argsize == SIZE_MAX)
497 argsize = strlen (arg);
502 size_t bytes = mbrtowc (&w, &arg[i + m],
503 argsize - (i + m), &mbstate);
506 else if (bytes == (size_t) -1)
511 else if (bytes == (size_t) -2)
514 while (i + m < argsize && arg[i + m])
520 /* Work around a bug with older shells that "see" a '\'
521 that is really the 2nd byte of a multibyte character.
522 In practice the problem is limited to ASCII
523 chars >= '@' that are shell special chars. */
524 if ('[' == 0x5b && elide_outer_quotes
525 && quoting_style == shell_always_quoting_style)
528 for (j = 1; j < bytes; j++)
529 switch (arg[i + m + j])
531 case '[': case '\\': case '^':
533 goto force_outer_quoting_style;
545 while (! mbsinit (&mbstate));
548 if (1 < m || (backslash_escapes && ! printable))
550 /* Output a multibyte sequence, or an escaped
551 unprintable unibyte character. */
556 if (backslash_escapes && ! printable)
558 if (elide_outer_quotes)
559 goto force_outer_quoting_style;
561 STORE ('0' + (c >> 6));
562 STORE ('0' + ((c >> 3) & 7));
565 else if (is_right_quote)
568 is_right_quote = false;
581 if (! ((backslash_escapes || elide_outer_quotes)
583 && quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))
588 if (elide_outer_quotes)
589 goto force_outer_quoting_style;
596 if (len == 0 && quoting_style == shell_always_quoting_style
597 && elide_outer_quotes)
598 goto force_outer_quoting_style;
600 if (quote_string && !elide_outer_quotes)
601 for (; *quote_string; quote_string++)
602 STORE (*quote_string);
604 if (len < buffersize)
608 force_outer_quoting_style:
609 /* Don't reuse quote_these_too, since the addition of outer quotes
610 sufficiently quotes the specified characters. */
611 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
613 flags & ~QA_ELIDE_OUTER_QUOTES, NULL,
614 left_quote, right_quote);
617 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
618 argument ARG (of size ARGSIZE), using O to control quoting.
619 If O is null, use the default.
620 Terminate the output with a null character, and return the written
621 size of the output, not counting the terminating null.
622 If BUFFERSIZE is too small to store the output string, return the
623 value that would have been returned had BUFFERSIZE been large enough.
624 If ARGSIZE is SIZE_MAX, use the string length of the argument for
627 quotearg_buffer (char *buffer, size_t buffersize,
628 char const *arg, size_t argsize,
629 struct quoting_options const *o)
631 struct quoting_options const *p = o ? o : &default_quoting_options;
633 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
634 p->style, p->flags, p->quote_these_too,
635 p->left_quote, p->right_quote);
640 /* Equivalent to quotearg_alloc (ARG, ARGSIZE, NULL, O). */
642 quotearg_alloc (char const *arg, size_t argsize,
643 struct quoting_options const *o)
645 return quotearg_alloc_mem (arg, argsize, NULL, o);
648 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
649 allocated storage containing the quoted string, and store the
650 resulting size into *SIZE, if non-NULL. The result can contain
651 embedded null bytes only if ARGSIZE is not SIZE_MAX, SIZE is not
652 NULL, and set_quoting_flags has not set the null byte elision
655 quotearg_alloc_mem (char const *arg, size_t argsize, size_t *size,
656 struct quoting_options const *o)
658 struct quoting_options const *p = o ? o : &default_quoting_options;
660 /* Elide embedded null bytes if we can't return a size. */
661 int flags = p->flags | (size ? 0 : QA_ELIDE_NULL_BYTES);
662 size_t bufsize = quotearg_buffer_restyled (0, 0, arg, argsize, p->style,
663 flags, p->quote_these_too,
666 char *buf = xcharalloc (bufsize);
667 quotearg_buffer_restyled (buf, bufsize, arg, argsize, p->style, flags,
669 p->left_quote, p->right_quote);
676 /* A storage slot with size and pointer to a value. */
683 /* Preallocate a slot 0 buffer, so that the caller can always quote
684 one small component of a "memory exhausted" message in slot 0. */
685 static char slot0[256];
686 static unsigned int nslots = 1;
687 static struct slotvec slotvec0 = {sizeof slot0, slot0};
688 static struct slotvec *slotvec = &slotvec0;
693 struct slotvec *sv = slotvec;
695 for (i = 1; i < nslots; i++)
697 if (sv[0].val != slot0)
700 slotvec0.size = sizeof slot0;
701 slotvec0.val = slot0;
711 /* Use storage slot N to return a quoted version of argument ARG.
712 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
713 null-terminated string.
714 OPTIONS specifies the quoting options.
715 The returned value points to static storage that can be
716 reused by the next call to this function with the same value of N.
717 N must be nonnegative. N is deliberately declared with type "int"
718 to allow for future extensions (using negative values). */
720 quotearg_n_options (int n, char const *arg, size_t argsize,
721 struct quoting_options const *options)
726 struct slotvec *sv = slotvec;
733 /* FIXME: technically, the type of n1 should be `unsigned int',
734 but that evokes an unsuppressible warning from gcc-4.0.1 and
735 older. If gcc ever provides an option to suppress that warning,
736 revert to the original type, so that the test in xalloc_oversized
737 is once again performed only at compile time. */
739 bool preallocated = (sv == &slotvec0);
741 if (xalloc_oversized (n1, sizeof *sv))
744 slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv);
747 memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv);
752 size_t size = sv[n].size;
753 char *val = sv[n].val;
754 /* Elide embedded null bytes since we don't return a size. */
755 int flags = options->flags | QA_ELIDE_NULL_BYTES;
756 size_t qsize = quotearg_buffer_restyled (val, size, arg, argsize,
757 options->style, flags,
758 options->quote_these_too,
760 options->right_quote);
764 sv[n].size = size = qsize + 1;
767 sv[n].val = val = xcharalloc (size);
768 quotearg_buffer_restyled (val, size, arg, argsize, options->style,
769 flags, options->quote_these_too,
771 options->right_quote);
780 quotearg_n (int n, char const *arg)
782 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
786 quotearg_n_mem (int n, char const *arg, size_t argsize)
788 return quotearg_n_options (n, arg, argsize, &default_quoting_options);
792 quotearg (char const *arg)
794 return quotearg_n (0, arg);
798 quotearg_mem (char const *arg, size_t argsize)
800 return quotearg_n_mem (0, arg, argsize);
804 quotearg_n_style (int n, enum quoting_style s, char const *arg)
806 struct quoting_options const o = quoting_options_from_style (s);
807 return quotearg_n_options (n, arg, SIZE_MAX, &o);
811 quotearg_n_style_mem (int n, enum quoting_style s,
812 char const *arg, size_t argsize)
814 struct quoting_options const o = quoting_options_from_style (s);
815 return quotearg_n_options (n, arg, argsize, &o);
819 quotearg_style (enum quoting_style s, char const *arg)
821 return quotearg_n_style (0, s, arg);
825 quotearg_style_mem (enum quoting_style s, char const *arg, size_t argsize)
827 return quotearg_n_style_mem (0, s, arg, argsize);
831 quotearg_char_mem (char const *arg, size_t argsize, char ch)
833 struct quoting_options options;
834 options = default_quoting_options;
835 set_char_quoting (&options, ch, 1);
836 return quotearg_n_options (0, arg, argsize, &options);
840 quotearg_char (char const *arg, char ch)
842 return quotearg_char_mem (arg, SIZE_MAX, ch);
846 quotearg_colon (char const *arg)
848 return quotearg_char (arg, ':');
852 quotearg_colon_mem (char const *arg, size_t argsize)
854 return quotearg_char_mem (arg, argsize, ':');
858 quotearg_n_custom (int n, char const *left_quote,
859 char const *right_quote, char const *arg)
861 return quotearg_n_custom_mem (n, left_quote, right_quote, arg,
866 quotearg_n_custom_mem (int n, char const *left_quote,
867 char const *right_quote,
868 char const *arg, size_t argsize)
870 struct quoting_options o = default_quoting_options;
871 set_custom_quoting (&o, left_quote, right_quote);
872 return quotearg_n_options (n, arg, argsize, &o);
876 quotearg_custom (char const *left_quote, char const *right_quote,
879 return quotearg_n_custom (0, left_quote, right_quote, arg);
883 quotearg_custom_mem (char const *left_quote, char const *right_quote,
884 char const *arg, size_t argsize)
886 return quotearg_n_custom_mem (0, left_quote, right_quote, arg,