1 /* quotearg.c - quote arguments for output
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006 Free
4 Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
20 /* Written by Paul Eggert <eggert@twinsun.com> */
36 #define _(msgid) gettext (msgid)
37 #define N_(msgid) msgid
41 /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */
49 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
50 other macros are defined only for documentation and to satisfy C
54 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
55 # define iswprint(wc) isprint ((unsigned char) (wc))
59 #if !defined mbsinit && !HAVE_MBSINIT
60 # define mbsinit(ps) 1
67 # if !defined iswprint && !HAVE_ISWPRINT
68 # define iswprint(wc) 1
73 # define SIZE_MAX ((size_t) -1)
76 #define INT_BITS (sizeof (int) * CHAR_BIT)
78 struct quoting_options
80 /* Basic quoting style. */
81 enum quoting_style style;
83 /* Quote the characters indicated by this bit vector even if the
84 quoting style would not normally require them to be quoted. */
85 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
88 /* Names of quoting styles. */
89 char const *const quoting_style_args[] =
101 /* Correspondences to quoting style names. */
102 enum quoting_style const quoting_style_vals[] =
104 literal_quoting_style,
106 shell_always_quoting_style,
108 escape_quoting_style,
109 locale_quoting_style,
110 clocale_quoting_style
113 /* The default quoting options. */
114 static struct quoting_options default_quoting_options;
116 /* Allocate a new set of quoting options, with contents initially identical
117 to O if O is not null, or to the default if O is null.
118 It is the caller's responsibility to free the result. */
119 struct quoting_options *
120 clone_quoting_options (struct quoting_options *o)
123 struct quoting_options *p = xmalloc (sizeof *p);
124 *p = *(o ? o : &default_quoting_options);
129 /* Get the value of O's quoting style. If O is null, use the default. */
131 get_quoting_style (struct quoting_options *o)
133 return (o ? o : &default_quoting_options)->style;
136 /* In O (or in the default if O is null),
137 set the value of the quoting style to S. */
139 set_quoting_style (struct quoting_options *o, enum quoting_style s)
141 (o ? o : &default_quoting_options)->style = s;
144 /* In O (or in the default if O is null),
145 set the value of the quoting options for character C to I.
146 Return the old value. Currently, the only values defined for I are
147 0 (the default) and 1 (which means to quote the character even if
148 it would not otherwise be quoted). */
150 set_char_quoting (struct quoting_options *o, char c, int i)
152 unsigned char uc = c;
154 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
155 int shift = uc % INT_BITS;
156 int r = (*p >> shift) & 1;
157 *p ^= ((i & 1) ^ r) << shift;
161 /* MSGID approximates a quotation mark. Return its translation if it
162 has one; otherwise, return either it or "\"", depending on S. */
164 gettext_quote (char const *msgid, enum quoting_style s)
166 char const *translation = _(msgid);
167 if (translation == msgid && s == clocale_quoting_style)
172 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
173 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
174 non-quoting-style part of O to control quoting.
175 Terminate the output with a null character, and return the written
176 size of the output, not counting the terminating null.
177 If BUFFERSIZE is too small to store the output string, return the
178 value that would have been returned had BUFFERSIZE been large enough.
179 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
181 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
182 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
183 style specified by O, and O may not be null. */
186 quotearg_buffer_restyled (char *buffer, size_t buffersize,
187 char const *arg, size_t argsize,
188 enum quoting_style quoting_style,
189 struct quoting_options const *o)
193 char const *quote_string = 0;
194 size_t quote_string_len = 0;
195 bool backslash_escapes = false;
196 bool unibyte_locale = MB_CUR_MAX == 1;
201 if (len < buffersize) \
207 switch (quoting_style)
209 case c_quoting_style:
211 backslash_escapes = true;
213 quote_string_len = 1;
216 case escape_quoting_style:
217 backslash_escapes = true;
220 case locale_quoting_style:
221 case clocale_quoting_style:
224 Get translations for open and closing quotation marks.
226 The message catalog should translate "`" to a left
227 quotation mark suitable for the locale, and similarly for
228 "'". If the catalog has no translation,
229 locale_quoting_style quotes `like this', and
230 clocale_quoting_style quotes "like this".
232 For example, an American English Unicode locale should
233 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
234 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
235 MARK). A British English Unicode locale should instead
236 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
237 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
239 If you don't know what to put here, please see
240 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
241 and use glyphs suitable for your language. */
243 char const *left = gettext_quote (N_("`"), quoting_style);
244 char const *right = gettext_quote (N_("'"), quoting_style);
245 for (quote_string = left; *quote_string; quote_string++)
246 STORE (*quote_string);
247 backslash_escapes = true;
248 quote_string = right;
249 quote_string_len = strlen (quote_string);
253 case shell_always_quoting_style:
256 quote_string_len = 1;
263 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
268 if (backslash_escapes
270 && i + quote_string_len <= argsize
271 && memcmp (arg + i, quote_string, quote_string_len) == 0)
278 if (backslash_escapes)
288 switch (quoting_style)
290 case shell_quoting_style:
291 goto use_shell_always_quoting_style;
293 case c_quoting_style:
294 if (i + 2 < argsize && arg[i + 1] == '?')
298 case '(': case ')': case '-': case '/':
299 case '<': case '=': case '>':
300 /* Escape the second '?' in what would otherwise be
319 case '\a': esc = 'a'; goto c_escape;
320 case '\b': esc = 'b'; goto c_escape;
321 case '\f': esc = 'f'; goto c_escape;
322 case '\n': esc = 'n'; goto c_and_shell_escape;
323 case '\r': esc = 'r'; goto c_and_shell_escape;
324 case '\t': esc = 't'; goto c_and_shell_escape;
325 case '\v': esc = 'v'; goto c_escape;
326 case '\\': esc = c; goto c_and_shell_escape;
329 if (quoting_style == shell_quoting_style)
330 goto use_shell_always_quoting_style;
332 if (backslash_escapes)
339 case '{': case '}': /* sometimes special if isolated */
340 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
348 case '!': /* special in bash */
349 case '"': case '$': case '&':
350 case '(': case ')': case '*': case ';':
352 case '=': /* sometimes special in 0th or (with "set -k") later args */
354 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
356 /* A shell special character. In theory, '$' and '`' could
357 be the first bytes of multibyte characters, which means
358 we should check them with mbrtowc, but in practice this
359 doesn't happen so it's not worth worrying about. */
360 if (quoting_style == shell_quoting_style)
361 goto use_shell_always_quoting_style;
365 switch (quoting_style)
367 case shell_quoting_style:
368 goto use_shell_always_quoting_style;
370 case shell_always_quoting_style:
381 case '%': case '+': case ',': case '-': case '.': case '/':
382 case '0': case '1': case '2': case '3': case '4': case '5':
383 case '6': case '7': case '8': case '9': case ':':
384 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
385 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
386 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
387 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
388 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
389 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
390 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
391 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
392 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
393 /* These characters don't cause problems, no matter what the
394 quoting style is. They cannot start multibyte sequences. */
398 /* If we have a multibyte sequence, copy it until we reach
399 its end, find an error, or come back to the initial shift
400 state. For C-like styles, if the sequence has
401 unprintable characters, escape the whole sequence, since
402 we can't easily escape single characters within it. */
404 /* Length of multibyte sequence found so far. */
412 printable = isprint (c) != 0;
417 memset (&mbstate, 0, sizeof mbstate);
421 if (argsize == SIZE_MAX)
422 argsize = strlen (arg);
427 size_t bytes = mbrtowc (&w, &arg[i + m],
428 argsize - (i + m), &mbstate);
431 else if (bytes == (size_t) -1)
436 else if (bytes == (size_t) -2)
439 while (i + m < argsize && arg[i + m])
445 /* Work around a bug with older shells that "see" a '\'
446 that is really the 2nd byte of a multibyte character.
447 In practice the problem is limited to ASCII
448 chars >= '@' that are shell special chars. */
449 if ('[' == 0x5b && quoting_style == shell_quoting_style)
452 for (j = 1; j < bytes; j++)
453 switch (arg[i + m + j])
455 case '[': case '\\': case '^':
457 goto use_shell_always_quoting_style;
469 while (! mbsinit (&mbstate));
472 if (1 < m || (backslash_escapes && ! printable))
474 /* Output a multibyte sequence, or an escaped
475 unprintable unibyte character. */
480 if (backslash_escapes && ! printable)
483 STORE ('0' + (c >> 6));
484 STORE ('0' + ((c >> 3) & 7));
498 if (! (backslash_escapes
499 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
509 if (i == 0 && quoting_style == shell_quoting_style)
510 goto use_shell_always_quoting_style;
513 for (; *quote_string; quote_string++)
514 STORE (*quote_string);
516 if (len < buffersize)
520 use_shell_always_quoting_style:
521 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
522 shell_always_quoting_style, o);
525 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
526 argument ARG (of size ARGSIZE), using O to control quoting.
527 If O is null, use the default.
528 Terminate the output with a null character, and return the written
529 size of the output, not counting the terminating null.
530 If BUFFERSIZE is too small to store the output string, return the
531 value that would have been returned had BUFFERSIZE been large enough.
532 If ARGSIZE is SIZE_MAX, use the string length of the argument for
535 quotearg_buffer (char *buffer, size_t buffersize,
536 char const *arg, size_t argsize,
537 struct quoting_options const *o)
539 struct quoting_options const *p = o ? o : &default_quoting_options;
541 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
547 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
548 allocated storage containing the quoted string. */
550 quotearg_alloc (char const *arg, size_t argsize,
551 struct quoting_options const *o)
554 size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
555 char *buf = xmalloc (bufsize);
556 quotearg_buffer (buf, bufsize, arg, argsize, o);
561 /* Use storage slot N to return a quoted version of argument ARG.
562 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
563 null-terminated string.
564 OPTIONS specifies the quoting options.
565 The returned value points to static storage that can be
566 reused by the next call to this function with the same value of N.
567 N must be nonnegative. N is deliberately declared with type "int"
568 to allow for future extensions (using negative values). */
570 quotearg_n_options (int n, char const *arg, size_t argsize,
571 struct quoting_options const *options)
575 /* Preallocate a slot 0 buffer, so that the caller can always quote
576 one small component of a "memory exhausted" message in slot 0. */
577 static char slot0[256];
578 static unsigned int nslots = 1;
585 static struct slotvec slotvec0 = {sizeof slot0, slot0};
586 static struct slotvec *slotvec = &slotvec0;
593 /* FIXME: technically, the type of n1 should be `unsigned int',
594 but that evokes an unsuppressible warning from gcc-4.0.1 and
595 older. If gcc ever provides an option to suppress that warning,
596 revert to the original type, so that the test in xalloc_oversized
597 is once again performed only at compile time. */
600 if (xalloc_oversized (n1, sizeof *slotvec))
603 if (slotvec == &slotvec0)
605 slotvec = xmalloc (sizeof *slotvec);
608 slotvec = xrealloc (slotvec, n1 * sizeof *slotvec);
609 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
614 size_t size = slotvec[n].size;
615 char *val = slotvec[n].val;
616 size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
620 slotvec[n].size = size = qsize + 1;
623 slotvec[n].val = val = xmalloc (size);
624 quotearg_buffer (val, size, arg, argsize, options);
633 quotearg_n (int n, char const *arg)
635 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
639 quotearg (char const *arg)
641 return quotearg_n (0, arg);
644 /* Return quoting options for STYLE, with no extra quoting. */
645 static struct quoting_options
646 quoting_options_from_style (enum quoting_style style)
648 struct quoting_options o;
650 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
655 quotearg_n_style (int n, enum quoting_style s, char const *arg)
657 struct quoting_options const o = quoting_options_from_style (s);
658 return quotearg_n_options (n, arg, SIZE_MAX, &o);
662 quotearg_n_style_mem (int n, enum quoting_style s,
663 char const *arg, size_t argsize)
665 struct quoting_options const o = quoting_options_from_style (s);
666 return quotearg_n_options (n, arg, argsize, &o);
670 quotearg_style (enum quoting_style s, char const *arg)
672 return quotearg_n_style (0, s, arg);
676 quotearg_char (char const *arg, char ch)
678 struct quoting_options options;
679 options = default_quoting_options;
680 set_char_quoting (&options, ch, 1);
681 return quotearg_n_options (0, arg, SIZE_MAX, &options);
685 quotearg_colon (char const *arg)
687 return quotearg_char (arg, ':');