1 /* quotearg.c - quote arguments for output
2 Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Paul Eggert <eggert@twinsun.com> */
25 # include <stddef.h> /* For the definition of size_t on windows w/MSVC. */
27 #include <sys/types.h>
35 # define _(text) gettext (text)
48 # define UCHAR_MAX ((unsigned char) -1)
51 #if HAVE_C_BACKSLASH_A
52 # define ALERT_CHAR '\a'
54 # define ALERT_CHAR '\7'
72 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
73 # define mbsinit(ps) 1
76 /* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
77 other macros are defined only for documentation and to satisfy C
81 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
82 # define mbsinit(ps) 1
83 # define iswprint(wc) ISPRINT ((unsigned char) (wc))
90 # if !defined iswprint && !HAVE_ISWPRINT
91 # define iswprint(wc) 1
95 #define INT_BITS (sizeof (int) * CHAR_BIT)
97 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
98 # define IN_CTYPE_DOMAIN(c) 1
100 # define IN_CTYPE_DOMAIN(c) isascii(c)
103 /* Undefine to protect against the definition in wctype.h of solaris2.6. */
105 #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
107 struct quoting_options
109 /* Basic quoting style. */
110 enum quoting_style style;
112 /* Quote the characters indicated by this bit vector even if the
113 quoting style would not normally require them to be quoted. */
114 int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
117 /* Names of quoting styles. */
118 char const *const quoting_style_args[] =
130 /* Correspondences to quoting style names. */
131 enum quoting_style const quoting_style_vals[] =
133 literal_quoting_style,
135 shell_always_quoting_style,
137 escape_quoting_style,
138 locale_quoting_style,
139 clocale_quoting_style
142 /* The default quoting options. */
143 static struct quoting_options default_quoting_options;
145 /* Allocate a new set of quoting options, with contents initially identical
146 to O if O is not null, or to the default if O is null.
147 It is the caller's responsibility to free the result. */
148 struct quoting_options *
149 clone_quoting_options (struct quoting_options *o)
151 struct quoting_options *p
152 = (struct quoting_options *) xmalloc (sizeof (struct quoting_options));
153 *p = *(o ? o : &default_quoting_options);
157 /* Get the value of O's quoting style. If O is null, use the default. */
159 get_quoting_style (struct quoting_options *o)
161 return (o ? o : &default_quoting_options)->style;
164 /* In O (or in the default if O is null),
165 set the value of the quoting style to S. */
167 set_quoting_style (struct quoting_options *o, enum quoting_style s)
169 (o ? o : &default_quoting_options)->style = s;
172 /* In O (or in the default if O is null),
173 set the value of the quoting options for character C to I.
174 Return the old value. Currently, the only values defined for I are
175 0 (the default) and 1 (which means to quote the character even if
176 it would not otherwise be quoted). */
178 set_char_quoting (struct quoting_options *o, char c, int i)
180 unsigned char uc = c;
181 int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
182 int shift = uc % INT_BITS;
183 int r = (*p >> shift) & 1;
184 *p ^= ((i & 1) ^ r) << shift;
188 /* MSGID approximates a quotation mark. Return its translation if it
189 has one; otherwise, return either it or "\"", depending on S. */
191 gettext_quote (char const *msgid, enum quoting_style s)
193 char const *translation = _(msgid);
194 if (translation == msgid && s == clocale_quoting_style)
199 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
200 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
201 non-quoting-style part of O to control quoting.
202 Terminate the output with a null character, and return the written
203 size of the output, not counting the terminating null.
204 If BUFFERSIZE is too small to store the output string, return the
205 value that would have been returned had BUFFERSIZE been large enough.
206 If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
208 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
209 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
210 style specified by O, and O may not be null. */
213 quotearg_buffer_restyled (char *buffer, size_t buffersize,
214 char const *arg, size_t argsize,
215 enum quoting_style quoting_style,
216 struct quoting_options const *o)
220 char const *quote_string = 0;
221 size_t quote_string_len = 0;
222 int backslash_escapes = 0;
223 int unibyte_locale = MB_CUR_MAX == 1;
228 if (len < buffersize) \
234 switch (quoting_style)
236 case c_quoting_style:
238 backslash_escapes = 1;
240 quote_string_len = 1;
243 case escape_quoting_style:
244 backslash_escapes = 1;
247 case locale_quoting_style:
248 case clocale_quoting_style:
250 /* Get translations for open and closing quotation marks.
252 The message catalog should translate "`" to a left
253 quotation mark suitable for the locale, and similarly for
254 "'". If the catalog has no translation,
255 locale_quoting_style quotes `like this', and
256 clocale_quoting_style quotes "like this".
258 For example, an American English Unicode locale should
259 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
260 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
261 MARK). A British English Unicode locale should instead
262 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
263 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
265 char const *left = gettext_quote (N_("`"), quoting_style);
266 char const *right = gettext_quote (N_("'"), quoting_style);
267 for (quote_string = left; *quote_string; quote_string++)
268 STORE (*quote_string);
269 backslash_escapes = 1;
270 quote_string = right;
271 quote_string_len = strlen (quote_string);
275 case shell_always_quoting_style:
278 quote_string_len = 1;
285 for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++)
290 if (backslash_escapes
292 && i + quote_string_len <= argsize
293 && memcmp (arg + i, quote_string, quote_string_len) == 0)
300 switch (quoting_style)
302 case shell_quoting_style:
303 goto use_shell_always_quoting_style;
305 case c_quoting_style:
306 if (i + 2 < argsize && arg[i + 1] == '?')
310 case '(': case ')': case '-': case '/':
311 case '<': case '=': case '>':
312 /* Escape the second '?' in what would otherwise be
328 case ALERT_CHAR: esc = 'a'; goto c_escape;
329 case '\b': esc = 'b'; goto c_escape;
330 case '\f': esc = 'f'; goto c_escape;
331 case '\n': esc = 'n'; goto c_and_shell_escape;
332 case '\r': esc = 'r'; goto c_and_shell_escape;
333 case '\t': esc = 't'; goto c_and_shell_escape;
334 case '\v': esc = 'v'; goto c_escape;
335 case '\\': esc = c; goto c_and_shell_escape;
338 if (quoting_style == shell_quoting_style)
339 goto use_shell_always_quoting_style;
341 if (backslash_escapes)
353 case '!': /* special in bash */
354 case '"': case '$': case '&':
355 case '(': case ')': case '*': case ';':
356 case '<': case '>': case '[':
357 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
359 /* A shell special character. In theory, '$' and '`' could
360 be the first bytes of multibyte characters, which means
361 we should check them with mbrtowc, but in practice this
362 doesn't happen so it's not worth worrying about. */
363 if (quoting_style == shell_quoting_style)
364 goto use_shell_always_quoting_style;
368 switch (quoting_style)
370 case shell_quoting_style:
371 goto use_shell_always_quoting_style;
373 case shell_always_quoting_style:
384 case '%': case '+': case ',': case '-': case '.': case '/':
385 case '0': case '1': case '2': case '3': case '4': case '5':
386 case '6': case '7': case '8': case '9': case ':': case '=':
387 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
388 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
389 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
390 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
391 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
392 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
393 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
394 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
395 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
397 /* These characters don't cause problems, no matter what the
398 quoting style is. They cannot start multibyte sequences. */
402 /* If we have a multibyte sequence, copy it until we reach
403 its end, find an error, or come back to the initial shift
404 state. For C-like styles, if the sequence has
405 unprintable characters, escape the whole sequence, since
406 we can't easily escape single characters within it. */
408 /* Length of multibyte sequence found so far. */
416 printable = ISPRINT (c);
421 memset (&mbstate, 0, sizeof mbstate);
425 if (argsize == (size_t) -1)
426 argsize = strlen (arg);
431 size_t bytes = mbrtowc (&w, &arg[i + m],
432 argsize - (i + m), &mbstate);
435 else if (bytes == (size_t) -1)
440 else if (bytes == (size_t) -2)
443 while (i + m < argsize && arg[i + m])
454 while (! mbsinit (&mbstate));
457 if (1 < m || (backslash_escapes && ! printable))
459 /* Output a multibyte sequence, or an escaped
460 unprintable unibyte character. */
465 if (backslash_escapes && ! printable)
468 STORE ('0' + (c >> 6));
469 STORE ('0' + ((c >> 3) & 7));
483 if (! (backslash_escapes
484 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
495 for (; *quote_string; quote_string++)
496 STORE (*quote_string);
498 if (len < buffersize)
502 use_shell_always_quoting_style:
503 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
504 shell_always_quoting_style, o);
507 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
508 argument ARG (of size ARGSIZE), using O to control quoting.
509 If O is null, use the default.
510 Terminate the output with a null character, and return the written
511 size of the output, not counting the terminating null.
512 If BUFFERSIZE is too small to store the output string, return the
513 value that would have been returned had BUFFERSIZE been large enough.
514 If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
516 quotearg_buffer (char *buffer, size_t buffersize,
517 char const *arg, size_t argsize,
518 struct quoting_options const *o)
520 struct quoting_options const *p = o ? o : &default_quoting_options;
521 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
525 /* Use storage slot N to return a quoted version of the string ARG.
526 OPTIONS specifies the quoting options.
527 The returned value points to static storage that can be
528 reused by the next call to this function with the same value of N.
529 N must be nonnegative. N is deliberately declared with type "int"
530 to allow for future extensions (using negative values). */
532 quotearg_n_options (int n, char const *arg,
533 struct quoting_options const *options)
535 /* Preallocate a slot 0 buffer, so that the caller can always quote
536 one small component of a "memory exhausted" message in slot 0. */
537 static char slot0[256];
538 static unsigned int nslots = 1;
544 static struct slotvec slotvec0 = {sizeof slot0, slot0};
545 static struct slotvec *slotvec = &slotvec0;
550 size_t s = n1 * sizeof (struct slotvec);
551 if (! (0 < n1 && n1 == s / sizeof (struct slotvec)))
553 if (slotvec == &slotvec0)
555 slotvec = (struct slotvec *) xmalloc (sizeof (struct slotvec));
558 slotvec = (struct slotvec *) xrealloc (slotvec, s);
559 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec));
564 size_t size = slotvec[n].size;
565 char *val = slotvec[n].val;
566 size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options);
570 slotvec[n].size = size = qsize + 1;
571 slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
572 quotearg_buffer (val, size, arg, (size_t) -1, options);
580 quotearg_n (unsigned int n, char const *arg)
582 return quotearg_n_options (n, arg, &default_quoting_options);
586 quotearg (char const *arg)
588 return quotearg_n (0, arg);
592 quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg)
594 struct quoting_options o;
596 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
597 return quotearg_n_options (n, arg, &o);
601 quotearg_style (enum quoting_style s, char const *arg)
603 return quotearg_n_style (0, s, arg);
607 quotearg_char (char const *arg, char ch)
609 struct quoting_options options;
610 options = default_quoting_options;
611 set_char_quoting (&options, ch, 1);
612 return quotearg_n_options (0, arg, &options);
616 quotearg_colon (char const *arg)
618 return quotearg_char (arg, ':');