1 /* quotearg.c - quote arguments for output
2 Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Paul Eggert <eggert@twinsun.com> */
24 #include <sys/types.h>
32 # define _(text) gettext (text)
45 # define UCHAR_MAX ((unsigned char) -1)
48 #if HAVE_C_BACKSLASH_A
49 # define ALERT_CHAR '\a'
51 # define ALERT_CHAR '\7'
66 #if HAVE_MBRTOWC && HAVE_WCHAR_H
68 # if !HAVE_MBSTATE_T_OBJECT
69 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
72 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
73 # define mbsinit(ps) 1
74 # define iswprint(wc) ISPRINT ((unsigned char) (wc))
81 # if !defined iswprint && !HAVE_ISWPRINT
82 # define iswprint(wc) 1
86 #define INT_BITS (sizeof (int) * CHAR_BIT)
88 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
89 /* Undefine to protect against the definition in wctype.h of solaris2.6. */
93 # define ISASCII(c) isascii (c)
95 /* Undefine to protect against the definition in wctype.h of solaris2.6. */
97 #define ISPRINT(c) (ISASCII (c) && isprint (c))
99 struct quoting_options
101 /* Basic quoting style. */
102 enum quoting_style style;
104 /* Quote the characters indicated by this bit vector even if the
105 quoting style would not normally require them to be quoted. */
106 int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
109 /* Names of quoting styles. */
110 char const *const quoting_style_args[] =
121 /* Correspondences to quoting style names. */
122 enum quoting_style const quoting_style_vals[] =
124 literal_quoting_style,
126 shell_always_quoting_style,
128 escape_quoting_style,
132 /* The default quoting options. */
133 static struct quoting_options default_quoting_options;
135 /* Allocate a new set of quoting options, with contents initially identical
136 to O if O is not null, or to the default if O is null.
137 It is the caller's responsibility to free the result. */
138 struct quoting_options *
139 clone_quoting_options (struct quoting_options *o)
141 struct quoting_options *p
142 = (struct quoting_options *) xmalloc (sizeof (struct quoting_options));
143 *p = *(o ? o : &default_quoting_options);
147 /* Get the value of O's quoting style. If O is null, use the default. */
149 get_quoting_style (struct quoting_options *o)
151 return (o ? o : &default_quoting_options)->style;
154 /* In O (or in the default if O is null),
155 set the value of the quoting style to S. */
157 set_quoting_style (struct quoting_options *o, enum quoting_style s)
159 (o ? o : &default_quoting_options)->style = s;
162 /* In O (or in the default if O is null),
163 set the value of the quoting options for character C to I.
164 Return the old value. Currently, the only values defined for I are
165 0 (the default) and 1 (which means to quote the character even if
166 it would not otherwise be quoted). */
168 set_char_quoting (struct quoting_options *o, char c, int i)
170 unsigned char uc = c;
171 int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
172 int shift = uc % INT_BITS;
173 int r = (*p >> shift) & 1;
174 *p ^= ((i & 1) ^ r) << shift;
178 /* Return the translation of MSGID if there is one, and
179 DEFAULT_TRANSLATION otherwise. */
181 gettext_default (char const *msgid, char const *default_translation)
183 char const *translation = _(msgid);
184 return translation == msgid ? default_translation : translation;
187 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
188 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
189 non-quoting-style part of O to control quoting.
190 Terminate the output with a null character, and return the written
191 size of the output, not counting the terminating null.
192 If BUFFERSIZE is too small to store the output string, return the
193 value that would have been returned had BUFFERSIZE been large enough.
194 If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
196 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
197 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
198 style specified by O, and O may not be null. */
201 quotearg_buffer_restyled (char *buffer, size_t buffersize,
202 char const *arg, size_t argsize,
203 enum quoting_style quoting_style,
204 struct quoting_options const *o)
208 char const *quote_string = 0;
209 size_t quote_string_len = 0;
210 int backslash_escapes = 0;
215 if (len < buffersize) \
221 switch (quoting_style)
223 case c_quoting_style:
225 backslash_escapes = 1;
227 quote_string_len = 1;
230 case escape_quoting_style:
231 backslash_escapes = 1;
234 case locale_quoting_style:
236 /* Get translations for open and closing quotation marks.
238 The message catalog should translate "{LEFT QUOTATION
239 MARK}" to a left quotation mark suitable for the locale,
240 and similarly for "{RIGHT QUOTATION MARK}". If the catalog
241 has no translation, the code below uses a neutral
242 (vertical) quotation mark instead, as it is the most
243 appropriate for the C locale.
245 For example, an American English Unicode locale should
246 translate the string "{LEFT QUOTATION MARK}" to the
247 character U+201C (LEFT DOUBLE QUOTATION MARK), and should
248 translate the string "{RIGHT QUOTATION MARK}" to the
249 character U+201D (RIGHT DOUBLE QUOTATION MARK). A British
250 English Unicode locale should instead translate these to
251 U+2018 (LEFT SINGLE QUOTATION MARK) and U+2019 (RIGHT
252 SINGLE QUOTATION MARK), respectively. */
254 static char const quotation_mark[] = "\"";
255 char const *left = gettext_default (N_("{LEFT QUOTATION MARK}"),
257 char const *right = gettext_default (N_("{RIGHT QUOTATION MARK}"),
259 for (quote_string = left; *quote_string; quote_string++)
260 STORE (*quote_string);
261 backslash_escapes = 1;
262 quote_string = right;
263 quote_string_len = strlen (quote_string);
267 case shell_always_quoting_style:
270 quote_string_len = 1;
277 for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++)
282 if (backslash_escapes
284 && i + quote_string_len <= argsize
285 && memcmp (arg + i, quote_string, quote_string_len) == 0)
292 switch (quoting_style)
294 case shell_quoting_style:
295 goto use_shell_always_quoting_style;
297 case c_quoting_style:
298 if (i + 2 < argsize && arg[i + 1] == '?')
302 case '(': case ')': case '-': case '/':
303 case '<': case '=': case '>':
304 /* Escape the second '?' in what would otherwise be
320 case ALERT_CHAR: esc = 'a'; goto c_escape;
321 case '\b': esc = 'b'; goto c_escape;
322 case '\f': esc = 'f'; goto c_escape;
323 case '\n': esc = 'n'; goto c_and_shell_escape;
324 case '\r': esc = 'r'; goto c_and_shell_escape;
325 case '\t': esc = 't'; goto c_and_shell_escape;
326 case '\v': esc = 'v'; goto c_escape;
327 case '\\': esc = c; goto c_and_shell_escape;
330 if (quoting_style == shell_quoting_style)
331 goto use_shell_always_quoting_style;
333 if (backslash_escapes)
345 case '!': /* special in bash */
346 case '"': case '$': case '&':
347 case '(': case ')': case '*': case ';':
348 case '<': case '>': case '[':
349 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
351 /* A shell special character. In theory, '$' and '`' could
352 be the first bytes of multibyte characters, which means
353 we should check them with mbrtowc, but in practice this
354 doesn't happen so it's not worth worrying about. */
355 if (quoting_style == shell_quoting_style)
356 goto use_shell_always_quoting_style;
360 switch (quoting_style)
362 case shell_quoting_style:
363 goto use_shell_always_quoting_style;
365 case shell_always_quoting_style:
376 case '%': case '+': case ',': case '-': case '.': case '/':
377 case '0': case '1': case '2': case '3': case '4': case '5':
378 case '6': case '7': case '8': case '9': case ':': case '=':
379 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
380 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
381 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
382 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
383 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
384 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
385 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
386 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
387 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
389 /* These characters don't cause problems, no matter what the
390 quoting style is. They cannot start multibyte sequences. */
394 /* If we have a multibyte sequence, copy it until we reach
395 its end, find an error, or come back to the initial shift
396 state. For C-like styles, if the sequence has
397 unprintable characters, escape the whole sequence, since
398 we can't easily escape single characters within it. */
400 /* Length of multibyte sequence found so far. */
405 memset (&mbstate, 0, sizeof mbstate);
407 if (argsize == (size_t) -1)
408 argsize = strlen (arg);
413 size_t bytes = mbrtowc (&w, &arg[i + m],
414 argsize - (i + m), &mbstate);
417 else if (bytes == (size_t) -1)
422 else if (bytes == (size_t) -2)
425 while (i + m < argsize && arg[i + m])
436 while (! mbsinit (&mbstate));
440 /* Escape a unibyte character like a multibyte
441 sequence if using backslash escapes, and if the
442 character is not printable. */
443 m = backslash_escapes && ! ISPRINT (c);
449 /* Output a multibyte sequence, or an escaped
450 unprintable unibyte character. */
451 size_t imax = i + m - 1;
455 if (backslash_escapes && ! printable)
458 STORE ('0' + (c >> 6));
459 STORE ('0' + ((c >> 3) & 7));
473 if (! (backslash_escapes
474 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
485 for (; *quote_string; quote_string++)
486 STORE (*quote_string);
488 if (len < buffersize)
492 use_shell_always_quoting_style:
493 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
494 shell_always_quoting_style, o);
497 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
498 argument ARG (of size ARGSIZE), using O to control quoting.
499 If O is null, use the default.
500 Terminate the output with a null character, and return the written
501 size of the output, not counting the terminating null.
502 If BUFFERSIZE is too small to store the output string, return the
503 value that would have been returned had BUFFERSIZE been large enough.
504 If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
506 quotearg_buffer (char *buffer, size_t buffersize,
507 char const *arg, size_t argsize,
508 struct quoting_options const *o)
510 struct quoting_options const *p = o ? o : &default_quoting_options;
511 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
515 /* Use storage slot N to return a quoted version of the string ARG.
516 OPTIONS specifies the quoting options.
517 The returned value points to static storage that can be
518 reused by the next call to this function with the same value of N.
519 N must be nonnegative. N is deliberately declared with type "int"
520 to allow for future extensions (using negative values). */
522 quotearg_n_options (int n, char const *arg,
523 struct quoting_options const *options)
525 static unsigned int nslots;
526 static struct slotvec
535 size_t s = n1 * sizeof (struct slotvec);
536 if (! (0 < n1 && n1 == s / sizeof (struct slotvec)))
538 slotvec = (struct slotvec *) xrealloc (slotvec, s);
539 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec));
544 size_t size = slotvec[n].size;
545 char *val = slotvec[n].val;
546 size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options);
550 slotvec[n].size = size = qsize + 1;
551 slotvec[n].val = val = xrealloc (val, size);
552 quotearg_buffer (val, size, arg, (size_t) -1, options);
560 quotearg_n (unsigned int n, char const *arg)
562 return quotearg_n_options (n, arg, &default_quoting_options);
566 quotearg (char const *arg)
568 return quotearg_n (0, arg);
572 quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg)
574 struct quoting_options o;
576 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
577 return quotearg_n_options (n, arg, &o);
581 quotearg_style (enum quoting_style s, char const *arg)
583 return quotearg_n_style (0, s, arg);
587 quotearg_char (char const *arg, char ch)
589 struct quoting_options options;
590 options = default_quoting_options;
591 set_char_quoting (&options, ch, 1);
592 return quotearg_n_options (0, arg, &options);
596 quotearg_colon (char const *arg)
598 return quotearg_char (arg, ':');