1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007,
4 2008 Free Software Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 /* Written by Paul Eggert <eggert@twinsun.com> */
37 #define _(msgid) gettext (msgid)
38 #define N_(msgid) msgid
41 # define SIZE_MAX ((size_t) -1)
44 #define INT_BITS (sizeof (int) * CHAR_BIT)
46 struct quoting_options
48 /* Basic quoting style. */
49 enum quoting_style style;
51 /* Additional flags. Bitwise combination of enum quoting_flags. */
54 /* Quote the characters indicated by this bit vector even if the
55 quoting style would not normally require them to be quoted. */
56 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
59 /* Names of quoting styles. */
60 char const *const quoting_style_args[] =
73 /* Correspondences to quoting style names. */
74 enum quoting_style const quoting_style_vals[] =
76 literal_quoting_style,
78 shell_always_quoting_style,
80 c_maybe_quoting_style,
86 /* The default quoting options. */
87 static struct quoting_options default_quoting_options;
89 /* Allocate a new set of quoting options, with contents initially identical
90 to O if O is not null, or to the default if O is null.
91 It is the caller's responsibility to free the result. */
92 struct quoting_options *
93 clone_quoting_options (struct quoting_options *o)
96 struct quoting_options *p = xmemdup (o ? o : &default_quoting_options,
102 /* Get the value of O's quoting style. If O is null, use the default. */
104 get_quoting_style (struct quoting_options *o)
106 return (o ? o : &default_quoting_options)->style;
109 /* In O (or in the default if O is null),
110 set the value of the quoting style to S. */
112 set_quoting_style (struct quoting_options *o, enum quoting_style s)
114 (o ? o : &default_quoting_options)->style = s;
117 /* In O (or in the default if O is null),
118 set the value of the quoting options for character C to I.
119 Return the old value. Currently, the only values defined for I are
120 0 (the default) and 1 (which means to quote the character even if
121 it would not otherwise be quoted). */
123 set_char_quoting (struct quoting_options *o, char c, int i)
125 unsigned char uc = c;
127 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
128 int shift = uc % INT_BITS;
129 int r = (*p >> shift) & 1;
130 *p ^= ((i & 1) ^ r) << shift;
134 /* In O (or in the default if O is null),
135 set the value of the quoting options flag to I, which can be a
136 bitwise combination of enum quoting_flags, or 0 for default
137 behavior. Return the old value. */
139 set_quoting_flags (struct quoting_options *o, int i)
143 o = &default_quoting_options;
149 /* Return quoting options for STYLE, with no extra quoting. */
150 static struct quoting_options
151 quoting_options_from_style (enum quoting_style style)
153 struct quoting_options o;
156 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
160 /* MSGID approximates a quotation mark. Return its translation if it
161 has one; otherwise, return either it or "\"", depending on S. */
163 gettext_quote (char const *msgid, enum quoting_style s)
165 char const *translation = _(msgid);
166 if (translation == msgid && s == clocale_quoting_style)
171 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
172 argument ARG (of size ARGSIZE), using QUOTING_STYLE, FLAGS, and
173 QUOTE_THESE_TOO to control quoting.
174 Terminate the output with a null character, and return the written
175 size of the output, not counting the terminating null.
176 If BUFFERSIZE is too small to store the output string, return the
177 value that would have been returned had BUFFERSIZE been large enough.
178 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
180 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
181 ARGSIZE, O), except it breaks O into its component pieces and is
182 not careful about errno. */
185 quotearg_buffer_restyled (char *buffer, size_t buffersize,
186 char const *arg, size_t argsize,
187 enum quoting_style quoting_style, int flags,
188 unsigned int const *quote_these_too)
192 char const *quote_string = 0;
193 size_t quote_string_len = 0;
194 bool backslash_escapes = false;
195 bool unibyte_locale = MB_CUR_MAX == 1;
196 bool elide_outer_quotes = (flags & QA_ELIDE_OUTER_QUOTES) != 0;
201 if (len < buffersize) \
207 switch (quoting_style)
209 case c_maybe_quoting_style:
210 quoting_style = c_quoting_style;
211 elide_outer_quotes = true;
213 case c_quoting_style:
214 if (!elide_outer_quotes)
216 backslash_escapes = true;
218 quote_string_len = 1;
221 case escape_quoting_style:
222 backslash_escapes = true;
223 elide_outer_quotes = false;
226 case locale_quoting_style:
227 case clocale_quoting_style:
230 Get translations for open and closing quotation marks.
232 The message catalog should translate "`" to a left
233 quotation mark suitable for the locale, and similarly for
234 "'". If the catalog has no translation,
235 locale_quoting_style quotes `like this', and
236 clocale_quoting_style quotes "like this".
238 For example, an American English Unicode locale should
239 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
240 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
241 MARK). A British English Unicode locale should instead
242 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
243 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
245 If you don't know what to put here, please see
246 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
247 and use glyphs suitable for your language. */
249 char const *left = gettext_quote (N_("`"), quoting_style);
250 char const *right = gettext_quote (N_("'"), quoting_style);
251 if (!elide_outer_quotes)
252 for (quote_string = left; *quote_string; quote_string++)
253 STORE (*quote_string);
254 backslash_escapes = true;
255 quote_string = right;
256 quote_string_len = strlen (quote_string);
260 case shell_quoting_style:
261 quoting_style = shell_always_quoting_style;
262 elide_outer_quotes = true;
264 case shell_always_quoting_style:
265 if (!elide_outer_quotes)
268 quote_string_len = 1;
271 case literal_quoting_style:
272 elide_outer_quotes = false;
279 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
283 bool is_right_quote = false;
285 if (backslash_escapes
287 && i + quote_string_len <= argsize
288 && memcmp (arg + i, quote_string, quote_string_len) == 0)
290 if (elide_outer_quotes)
291 goto force_outer_quoting_style;
292 is_right_quote = true;
299 if (backslash_escapes)
301 if (elide_outer_quotes)
302 goto force_outer_quoting_style;
304 if (i + 1 < argsize && '0' <= arg[i + 1] && arg[i + 1] <= '9')
311 else if (flags & QA_ELIDE_NULL_BYTES)
316 switch (quoting_style)
318 case shell_always_quoting_style:
319 if (elide_outer_quotes)
320 goto force_outer_quoting_style;
323 case c_quoting_style:
324 if ((flags & QA_SPLIT_TRIGRAPHS)
325 && i + 2 < argsize && arg[i + 1] == '?')
329 case '(': case ')': case '-': case '/':
330 case '<': case '=': case '>':
331 /* Escape the second '?' in what would otherwise be
333 if (elide_outer_quotes)
334 goto force_outer_quoting_style;
353 case '\a': esc = 'a'; goto c_escape;
354 case '\b': esc = 'b'; goto c_escape;
355 case '\f': esc = 'f'; goto c_escape;
356 case '\n': esc = 'n'; goto c_and_shell_escape;
357 case '\r': esc = 'r'; goto c_and_shell_escape;
358 case '\t': esc = 't'; goto c_and_shell_escape;
359 case '\v': esc = 'v'; goto c_escape;
361 /* No need to escape the escape if we are trying to elide
362 outer quotes and nothing else is problematic. */
363 if (backslash_escapes && elide_outer_quotes && quote_string_len)
367 if (quoting_style == shell_always_quoting_style
368 && elide_outer_quotes)
369 goto force_outer_quoting_style;
372 if (backslash_escapes)
379 case '{': case '}': /* sometimes special if isolated */
380 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
388 case '!': /* special in bash */
389 case '"': case '$': case '&':
390 case '(': case ')': case '*': case ';':
392 case '=': /* sometimes special in 0th or (with "set -k") later args */
394 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
396 /* A shell special character. In theory, '$' and '`' could
397 be the first bytes of multibyte characters, which means
398 we should check them with mbrtowc, but in practice this
399 doesn't happen so it's not worth worrying about. */
400 if (quoting_style == shell_always_quoting_style
401 && elide_outer_quotes)
402 goto force_outer_quoting_style;
406 if (quoting_style == shell_always_quoting_style)
408 if (elide_outer_quotes)
409 goto force_outer_quoting_style;
416 case '%': case '+': case ',': case '-': case '.': case '/':
417 case '0': case '1': case '2': case '3': case '4': case '5':
418 case '6': case '7': case '8': case '9': case ':':
419 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
420 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
421 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
422 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
423 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
424 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
425 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
426 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
427 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
428 /* These characters don't cause problems, no matter what the
429 quoting style is. They cannot start multibyte sequences. */
433 /* If we have a multibyte sequence, copy it until we reach
434 its end, find an error, or come back to the initial shift
435 state. For C-like styles, if the sequence has
436 unprintable characters, escape the whole sequence, since
437 we can't easily escape single characters within it. */
439 /* Length of multibyte sequence found so far. */
447 printable = isprint (c) != 0;
452 memset (&mbstate, 0, sizeof mbstate);
456 if (argsize == SIZE_MAX)
457 argsize = strlen (arg);
462 size_t bytes = mbrtowc (&w, &arg[i + m],
463 argsize - (i + m), &mbstate);
466 else if (bytes == (size_t) -1)
471 else if (bytes == (size_t) -2)
474 while (i + m < argsize && arg[i + m])
480 /* Work around a bug with older shells that "see" a '\'
481 that is really the 2nd byte of a multibyte character.
482 In practice the problem is limited to ASCII
483 chars >= '@' that are shell special chars. */
484 if ('[' == 0x5b && elide_outer_quotes
485 && quoting_style == shell_always_quoting_style)
488 for (j = 1; j < bytes; j++)
489 switch (arg[i + m + j])
491 case '[': case '\\': case '^':
493 goto force_outer_quoting_style;
505 while (! mbsinit (&mbstate));
508 if (1 < m || (backslash_escapes && ! printable))
510 /* Output a multibyte sequence, or an escaped
511 unprintable unibyte character. */
516 if (backslash_escapes && ! printable)
518 if (elide_outer_quotes)
519 goto force_outer_quoting_style;
521 STORE ('0' + (c >> 6));
522 STORE ('0' + ((c >> 3) & 7));
525 else if (is_right_quote)
528 is_right_quote = false;
541 if (! ((backslash_escapes || elide_outer_quotes)
543 && quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))
548 if (elide_outer_quotes)
549 goto force_outer_quoting_style;
556 if (len == 0 && quoting_style == shell_always_quoting_style
557 && elide_outer_quotes)
558 goto force_outer_quoting_style;
560 if (quote_string && !elide_outer_quotes)
561 for (; *quote_string; quote_string++)
562 STORE (*quote_string);
564 if (len < buffersize)
568 force_outer_quoting_style:
569 /* Don't reuse quote_these_too, since the addition of outer quotes
570 sufficiently quotes the specified characters. */
571 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
573 flags & ~QA_ELIDE_OUTER_QUOTES, NULL);
576 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
577 argument ARG (of size ARGSIZE), using O to control quoting.
578 If O is null, use the default.
579 Terminate the output with a null character, and return the written
580 size of the output, not counting the terminating null.
581 If BUFFERSIZE is too small to store the output string, return the
582 value that would have been returned had BUFFERSIZE been large enough.
583 If ARGSIZE is SIZE_MAX, use the string length of the argument for
586 quotearg_buffer (char *buffer, size_t buffersize,
587 char const *arg, size_t argsize,
588 struct quoting_options const *o)
590 struct quoting_options const *p = o ? o : &default_quoting_options;
592 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
593 p->style, p->flags, p->quote_these_too);
598 /* Equivalent to quotearg_alloc (ARG, ARGSIZE, NULL, O). */
600 quotearg_alloc (char const *arg, size_t argsize,
601 struct quoting_options const *o)
603 return quotearg_alloc_mem (arg, argsize, NULL, o);
606 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
607 allocated storage containing the quoted string, and store the
608 resulting size into *SIZE, if non-NULL. The result can contain
609 embedded null bytes only if ARGSIZE is not SIZE_MAX, SIZE is not
610 NULL, and set_quoting_flags has not set the null byte elision
613 quotearg_alloc_mem (char const *arg, size_t argsize, size_t *size,
614 struct quoting_options const *o)
616 struct quoting_options const *p = o ? o : &default_quoting_options;
618 /* Elide embedded null bytes if we can't return a size. */
619 int flags = p->flags | (size ? 0 : QA_ELIDE_NULL_BYTES);
620 size_t bufsize = quotearg_buffer_restyled (0, 0, arg, argsize, p->style,
621 flags, p->quote_these_too) + 1;
622 char *buf = xcharalloc (bufsize);
623 quotearg_buffer_restyled (buf, bufsize, arg, argsize, p->style, flags,
631 /* A storage slot with size and pointer to a value. */
638 /* Preallocate a slot 0 buffer, so that the caller can always quote
639 one small component of a "memory exhausted" message in slot 0. */
640 static char slot0[256];
641 static unsigned int nslots = 1;
642 static struct slotvec slotvec0 = {sizeof slot0, slot0};
643 static struct slotvec *slotvec = &slotvec0;
648 struct slotvec *sv = slotvec;
650 for (i = 1; i < nslots; i++)
652 if (sv[0].val != slot0)
655 slotvec0.size = sizeof slot0;
656 slotvec0.val = slot0;
666 /* Use storage slot N to return a quoted version of argument ARG.
667 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
668 null-terminated string.
669 OPTIONS specifies the quoting options.
670 The returned value points to static storage that can be
671 reused by the next call to this function with the same value of N.
672 N must be nonnegative. N is deliberately declared with type "int"
673 to allow for future extensions (using negative values). */
675 quotearg_n_options (int n, char const *arg, size_t argsize,
676 struct quoting_options const *options)
681 struct slotvec *sv = slotvec;
688 /* FIXME: technically, the type of n1 should be `unsigned int',
689 but that evokes an unsuppressible warning from gcc-4.0.1 and
690 older. If gcc ever provides an option to suppress that warning,
691 revert to the original type, so that the test in xalloc_oversized
692 is once again performed only at compile time. */
694 bool preallocated = (sv == &slotvec0);
696 if (xalloc_oversized (n1, sizeof *sv))
699 slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv);
702 memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv);
707 size_t size = sv[n].size;
708 char *val = sv[n].val;
709 /* Elide embedded null bytes since we don't return a size. */
710 int flags = options->flags | QA_ELIDE_NULL_BYTES;
711 size_t qsize = quotearg_buffer_restyled (val, size, arg, argsize,
712 options->style, flags,
713 options->quote_these_too);
717 sv[n].size = size = qsize + 1;
720 sv[n].val = val = xcharalloc (size);
721 quotearg_buffer_restyled (val, size, arg, argsize, options->style,
722 flags, options->quote_these_too);
731 quotearg_n (int n, char const *arg)
733 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
737 quotearg_n_mem (int n, char const *arg, size_t argsize)
739 return quotearg_n_options (n, arg, argsize, &default_quoting_options);
743 quotearg (char const *arg)
745 return quotearg_n (0, arg);
749 quotearg_mem (char const *arg, size_t argsize)
751 return quotearg_n_mem (0, arg, argsize);
755 quotearg_n_style (int n, enum quoting_style s, char const *arg)
757 struct quoting_options const o = quoting_options_from_style (s);
758 return quotearg_n_options (n, arg, SIZE_MAX, &o);
762 quotearg_n_style_mem (int n, enum quoting_style s,
763 char const *arg, size_t argsize)
765 struct quoting_options const o = quoting_options_from_style (s);
766 return quotearg_n_options (n, arg, argsize, &o);
770 quotearg_style (enum quoting_style s, char const *arg)
772 return quotearg_n_style (0, s, arg);
776 quotearg_style_mem (enum quoting_style s, char const *arg, size_t argsize)
778 return quotearg_n_style_mem (0, s, arg, argsize);
782 quotearg_char_mem (char const *arg, size_t argsize, char ch)
784 struct quoting_options options;
785 options = default_quoting_options;
786 set_char_quoting (&options, ch, 1);
787 return quotearg_n_options (0, arg, argsize, &options);
791 quotearg_char (char const *arg, char ch)
793 return quotearg_char_mem (arg, SIZE_MAX, ch);
797 quotearg_colon (char const *arg)
799 return quotearg_char (arg, ':');
803 quotearg_colon_mem (char const *arg, size_t argsize)
805 return quotearg_char_mem (arg, argsize, ':');