dbus-shell.c

00001 /* -*- mode: C; c-file-style: "gnu" -*- */
00002 /* dbus-shell.c Shell command line utility functions.
00003  *
00004  * Copyright (C) 2002, 2003  Red Hat, Inc.
00005  * Copyright (C) 2003 CodeFactory AB
00006  *
00007  * Licensed under the Academic Free License version 2.1
00008  *
00009  * This program is free software; you can redistribute it and/or modify
00010  * it under the terms of the GNU General Public License as published by
00011  * the Free Software Foundation; either version 2 of the License, or
00012  * (at your option) any later version.
00013  *
00014  * This program is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  * GNU General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU General Public License
00020  * along with this program; if not, write to the Free Software
00021  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00022  *
00023  */
00024 
00025 #include <string.h>
00026 #include "dbus-internals.h"
00027 #include "dbus-list.h"
00028 #include "dbus-memory.h"
00029 #include "dbus-protocol.h"
00030 #include "dbus-shell.h"
00031 #include "dbus-string.h"
00032 
00033 /* Single quotes preserve the literal string exactly. escape
00034  * sequences are not allowed; not even \' - if you want a '
00035  * in the quoted text, you have to do something like 'foo'\''bar'
00036  *
00037  * Double quotes allow $ ` " \ and newline to be escaped with backslash.
00038  * Otherwise double quotes preserve things literally.
00039  */
00040 
00041 static dbus_bool_t
00042 unquote_string_inplace (char* str, char** end)
00043 {
00044   char* dest;
00045   char* s;
00046   char quote_char;
00047   
00048   dest = s = str;
00049 
00050   quote_char = *s;
00051   
00052   if (!(*s == '"' || *s == '\''))
00053     {
00054       *end = str;
00055       return FALSE;
00056     }
00057 
00058   /* Skip the initial quote mark */
00059   ++s;
00060 
00061   if (quote_char == '"')
00062     {
00063       while (*s)
00064         {
00065           _dbus_assert(s > dest); /* loop invariant */
00066       
00067           switch (*s)
00068             {
00069             case '"':
00070               /* End of the string, return now */
00071               *dest = '\0';
00072               ++s;
00073               *end = s;
00074               return TRUE;
00075 
00076             case '\\':
00077               /* Possible escaped quote or \ */
00078               ++s;
00079               switch (*s)
00080                 {
00081                 case '"':
00082                 case '\\':
00083                 case '`':
00084                 case '$':
00085                 case '\n':
00086                   *dest = *s;
00087                   ++s;
00088                   ++dest;
00089                   break;
00090 
00091                 default:
00092                   /* not an escaped char */
00093                   *dest = '\\';
00094                   ++dest;
00095                   /* ++s already done. */
00096                   break;
00097                 }
00098               break;
00099 
00100             default:
00101               *dest = *s;
00102               ++dest;
00103               ++s;
00104               break;
00105             }
00106 
00107           _dbus_assert(s > dest); /* loop invariant */
00108         }
00109     }
00110   else
00111     {
00112       while (*s)
00113         {
00114           _dbus_assert(s > dest); /* loop invariant */
00115           
00116           if (*s == '\'')
00117             {
00118               /* End of the string, return now */
00119               *dest = '\0';
00120               ++s;
00121               *end = s;
00122               return TRUE;
00123             }
00124           else
00125             {
00126               *dest = *s;
00127               ++dest;
00128               ++s;
00129             }
00130 
00131           _dbus_assert(s > dest); /* loop invariant */
00132         }
00133     }
00134   
00135   /* If we reach here this means the close quote was never encountered */
00136 
00137   *dest = '\0';
00138   
00139   *end = s;
00140   return FALSE;
00141 }
00142 
00153 char*
00154 _dbus_shell_quote (const char *unquoted_string)
00155 {
00156   /* We always use single quotes, because the algorithm is cheesier.
00157    * We could use double if we felt like it, that might be more
00158    * human-readable.
00159    */
00160 
00161   const char *p;
00162   char *ret;
00163   DBusString dest;
00164   
00165   _dbus_string_init (&dest);
00166 
00167   p = unquoted_string;
00168 
00169   /* could speed this up a lot by appending chunks of text at a
00170    * time.
00171    */
00172   while (*p)
00173     {
00174       /* Replace literal ' with a close ', a \', and a open ' */
00175       if (*p == '\'')
00176         {
00177           if (!_dbus_string_append (&dest, "'\\''"))
00178             {
00179               _dbus_string_free (&dest);
00180               return NULL;
00181             }
00182         }
00183       else
00184         {
00185           if (!_dbus_string_append_byte (&dest, *p))
00186             {
00187               _dbus_string_free (&dest);
00188               return NULL;
00189             }
00190         }
00191 
00192       ++p;
00193     }
00194 
00195   /* close the quote */
00196   if (_dbus_string_append_byte (&dest, '\''))
00197     {
00198       ret = _dbus_strdup (_dbus_string_get_data (&dest));
00199       _dbus_string_free (&dest);
00200 
00201       return ret;
00202     }
00203 
00204   _dbus_string_free (&dest);
00205 
00206   return NULL;
00207 }
00208 
00233 char*
00234 _dbus_shell_unquote (const char *quoted_string)
00235 {
00236   char *unquoted;
00237   char *end;
00238   char *start;
00239   char *ret;
00240   DBusString retval;
00241 
00242   unquoted = _dbus_strdup (quoted_string);
00243   if (unquoted == NULL)
00244     return NULL;
00245 
00246   start = unquoted;
00247   end = unquoted;
00248   if (!_dbus_string_init (&retval))
00249     {
00250       dbus_free (unquoted);
00251       return NULL;
00252     }
00253 
00254   /* The loop allows cases such as
00255    * "foo"blah blah'bar'woo foo"baz"la la la\'\''foo'
00256    */
00257   while (*start)
00258     {
00259       /* Append all non-quoted chars, honoring backslash escape
00260        */
00261       
00262       while (*start && !(*start == '"' || *start == '\''))
00263         {
00264           if (*start == '\\')
00265             {
00266               /* all characters can get escaped by backslash,
00267                * except newline, which is removed if it follows
00268                * a backslash outside of quotes
00269                */
00270               
00271               ++start;
00272               if (*start)
00273                 {
00274                   if (*start != '\n')
00275                     {
00276                       if (!_dbus_string_append_byte (&retval, *start))
00277                         goto error;
00278                     }
00279                   ++start;
00280                 }
00281             }
00282           else
00283             {
00284               if (!_dbus_string_append_byte (&retval, *start))
00285                 goto error;
00286               ++start;
00287             }
00288         }
00289 
00290       if (*start)
00291         {
00292           if (!unquote_string_inplace (start, &end))
00293             goto error;
00294           else
00295             {
00296               if (!_dbus_string_append (&retval, start))
00297                 goto error;
00298               start = end;
00299             }
00300         }
00301     }
00302 
00303   ret = _dbus_strdup (_dbus_string_get_data (&retval));
00304   if (!ret)
00305     goto error;
00306 
00307   dbus_free (unquoted);
00308   _dbus_string_free (&retval);
00309   
00310   return ret;
00311   
00312  error:
00313   dbus_free (unquoted);
00314   _dbus_string_free (&retval);
00315   return NULL;
00316 }
00317 
00318 /* _dbus_shell_parse_argv() does a semi-arbitrary weird subset of the way
00319  * the shell parses a command line. We don't do variable expansion,
00320  * don't understand that operators are tokens, don't do tilde expansion,
00321  * don't do command substitution, no arithmetic expansion, IFS gets ignored,
00322  * don't do filename globs, don't remove redirection stuff, etc.
00323  *
00324  * READ THE UNIX98 SPEC on "Shell Command Language" before changing
00325  * the behavior of this code.
00326  *
00327  * Steps to parsing the argv string:
00328  *
00329  *  - tokenize the string (but since we ignore operators,
00330  *    our tokenization may diverge from what the shell would do)
00331  *    note that tokenization ignores the internals of a quoted
00332  *    word and it always splits on spaces, not on IFS even
00333  *    if we used IFS. We also ignore "end of input indicator"
00334  *    (I guess this is control-D?)
00335  *
00336  *    Tokenization steps, from UNIX98 with operator stuff removed,
00337  *    are:
00338  * 
00339  *    1) "If the current character is backslash, single-quote or
00340  *        double-quote (\, ' or ") and it is not quoted, it will affect
00341  *        quoting for subsequent characters up to the end of the quoted
00342  *        text. The rules for quoting are as described in Quoting
00343  *        . During token recognition no substitutions will be actually
00344  *        performed, and the result token will contain exactly the
00345  *        characters that appear in the input (except for newline
00346  *        character joining), unmodified, including any embedded or
00347  *        enclosing quotes or substitution operators, between the quote
00348  *        mark and the end of the quoted text. The token will not be
00349  *        delimited by the end of the quoted field."
00350  *
00351  *    2) "If the current character is an unquoted newline character,
00352  *        the current token will be delimited."
00353  *
00354  *    3) "If the current character is an unquoted blank character, any
00355  *        token containing the previous character is delimited and the
00356  *        current character will be discarded."
00357  *
00358  *    4) "If the previous character was part of a word, the current
00359  *        character will be appended to that word."
00360  *
00361  *    5) "If the current character is a "#", it and all subsequent
00362  *        characters up to, but excluding, the next newline character
00363  *        will be discarded as a comment. The newline character that
00364  *        ends the line is not considered part of the comment. The
00365  *        "#" starts a comment only when it is at the beginning of a
00366  *        token. Since the search for the end-of-comment does not
00367  *        consider an escaped newline character specially, a comment
00368  *        cannot be continued to the next line."
00369  *
00370  *    6) "The current character will be used as the start of a new word."
00371  *
00372  *
00373  *  - for each token (word), perform portions of word expansion, namely
00374  *    field splitting (using default whitespace IFS) and quote
00375  *    removal.  Field splitting may increase the number of words.
00376  *    Quote removal does not increase the number of words.
00377  *
00378  *   "If the complete expansion appropriate for a word results in an
00379  *   empty field, that empty field will be deleted from the list of
00380  *   fields that form the completely expanded command, unless the
00381  *   original word contained single-quote or double-quote characters."
00382  *    - UNIX98 spec
00383  *
00384  *
00385  */
00386 
00387 static dbus_bool_t
00388 delimit_token (DBusString *token,
00389                DBusList **retval,
00390                DBusError *error)
00391 {
00392   char *str;
00393 
00394   str = _dbus_strdup (_dbus_string_get_data (token));
00395   if (!str)
00396     {
00397       _DBUS_SET_OOM (error);
00398       return FALSE;
00399     }
00400 
00401   if (!_dbus_list_append (retval, str))
00402     {
00403       dbus_free (str);
00404       _DBUS_SET_OOM (error);
00405       return FALSE;
00406     }
00407 
00408   return TRUE;
00409 }
00410 
00411 static DBusList*
00412 tokenize_command_line (const char *command_line, DBusError *error)
00413 {
00414   char current_quote;
00415   const char *p;
00416   DBusString current_token;
00417   DBusList *retval = NULL;
00418   dbus_bool_t quoted;;
00419 
00420   current_quote = '\0';
00421   quoted = FALSE;
00422   p = command_line;
00423 
00424   if (!_dbus_string_init (&current_token))
00425     {
00426       _DBUS_SET_OOM (error);
00427       return NULL;
00428     }
00429 
00430   while (*p)
00431     {
00432       if (current_quote == '\\')
00433         {
00434           if (*p == '\n')
00435             {
00436               /* we append nothing; backslash-newline become nothing */
00437             }
00438           else
00439             {
00440               if (!_dbus_string_append_byte (&current_token, '\\') || 
00441                   !_dbus_string_append_byte (&current_token, *p))
00442                 {
00443                   _DBUS_SET_OOM (error);
00444                   goto error;
00445                 }
00446             }
00447 
00448           current_quote = '\0';
00449         }
00450       else if (current_quote == '#')
00451         {
00452           /* Discard up to and including next newline */
00453           while (*p && *p != '\n')
00454             ++p;
00455 
00456           current_quote = '\0';
00457           
00458           if (*p == '\0')
00459             break;
00460         }
00461       else if (current_quote)
00462         {
00463           if (*p == current_quote &&
00464               /* check that it isn't an escaped double quote */
00465               !(current_quote == '"' && quoted))
00466             {
00467               /* close the quote */
00468               current_quote = '\0';
00469             }
00470 
00471           /* Everything inside quotes, and the close quote,
00472            * gets appended literally.
00473            */
00474 
00475           if (!_dbus_string_append_byte (&current_token, *p))
00476             {
00477               _DBUS_SET_OOM (error);
00478               goto error;
00479             }
00480         }
00481       else
00482         {
00483           switch (*p)
00484             {
00485             case '\n':
00486               if (!delimit_token (&current_token, &retval, error))
00487                 goto error;
00488 
00489                 _dbus_string_free (&current_token);
00490 
00491                 if (!_dbus_string_init (&current_token))
00492                   {
00493                     _DBUS_SET_OOM (error);
00494                     goto init_error;
00495                   }
00496 
00497               break;
00498 
00499             case ' ':
00500             case '\t':
00501               /* If the current token contains the previous char, delimit
00502                * the current token. A nonzero length
00503                * token should always contain the previous char.
00504                */
00505               if (_dbus_string_get_length (&current_token) > 0)
00506                 {
00507                   if (!delimit_token (&current_token, &retval, error))
00508                     goto error;
00509 
00510                   _dbus_string_free (&current_token);
00511 
00512                   if (!_dbus_string_init (&current_token))
00513                     {
00514                       _DBUS_SET_OOM (error);
00515                       goto init_error;
00516                     }
00517 
00518                 }
00519               
00520               /* discard all unquoted blanks (don't add them to a token) */
00521               break;
00522 
00523 
00524               /* single/double quotes are appended to the token,
00525                * escapes are maybe appended next time through the loop,
00526                * comment chars are never appended.
00527                */
00528               
00529             case '\'':
00530             case '"':
00531               if (!_dbus_string_append_byte (&current_token, *p))
00532                 {
00533                   _DBUS_SET_OOM (error);
00534                   goto error;
00535                 }
00536 
00537               /* FALL THRU */
00538               
00539             case '#':
00540             case '\\':
00541               current_quote = *p;
00542               break;
00543 
00544             default:
00545               /* Combines rules 4) and 6) - if we have a token, append to it,
00546                * otherwise create a new token.
00547                */
00548               if (!_dbus_string_append_byte (&current_token, *p))
00549                 {
00550                   _DBUS_SET_OOM (error);
00551                   goto error;
00552                 }
00553               break;
00554             }
00555         }
00556 
00557       /* We need to count consecutive backslashes mod 2, 
00558        * to detect escaped doublequotes.
00559        */
00560       if (*p != '\\')
00561         quoted = FALSE;
00562       else
00563         quoted = !quoted;
00564 
00565       ++p;
00566     }
00567 
00568   if (!delimit_token (&current_token, &retval, error))
00569     goto error;
00570 
00571   if (current_quote)
00572     {
00573       dbus_set_error_const (error, DBUS_ERROR_INVALID_ARGS, "Unclosed quotes in command line");
00574       goto error;
00575     }
00576 
00577   if (retval == NULL)
00578     {
00579       dbus_set_error_const (error, DBUS_ERROR_INVALID_ARGS, "No tokens found in command line");
00580       goto error;
00581     }
00582  
00583   _dbus_string_free (&current_token);
00584  
00585   return retval;
00586 
00587  error:
00588   _dbus_string_free (&current_token);
00589 
00590  init_error:
00591   if (retval)
00592     {
00593       _dbus_list_foreach (&retval, (DBusForeachFunction) dbus_free, NULL);
00594       _dbus_list_clear (&retval);
00595     }
00596 
00597   return NULL;
00598 }
00599 
00617 dbus_bool_t
00618 _dbus_shell_parse_argv (const char *command_line,
00619                         int        *argcp,
00620                         char     ***argvp,
00621                         DBusError  *error)
00622 {
00623   /* Code based on poptParseArgvString() from libpopt */
00624   int argc = 0;
00625   char **argv = NULL;
00626   DBusList *tokens = NULL;
00627   int i;
00628   DBusList *tmp_list;
00629   
00630   if (!command_line)
00631     {
00632       _dbus_verbose ("Command line is NULL\n");
00633       return FALSE;
00634     }
00635 
00636   tokens = tokenize_command_line (command_line, error);
00637   if (tokens == NULL)
00638     {
00639       _dbus_verbose ("No tokens for command line '%s'\n", command_line);
00640       return FALSE;
00641     }
00642 
00643   /* Because we can't have introduced any new blank space into the
00644    * tokens (we didn't do any new expansions), we don't need to
00645    * perform field splitting. If we were going to honor IFS or do any
00646    * expansions, we would have to do field splitting on each word
00647    * here. Also, if we were going to do any expansion we would need to
00648    * remove any zero-length words that didn't contain quotes
00649    * originally; but since there's no expansion we know all words have
00650    * nonzero length, unless they contain quotes.
00651    * 
00652    * So, we simply remove quotes, and don't do any field splitting or
00653    * empty word removal, since we know there was no way to introduce
00654    * such things.
00655    */
00656 
00657   argc = _dbus_list_get_length (&tokens);
00658   argv = dbus_new (char *, argc + 1);
00659   if (!argv)
00660     {
00661       _DBUS_SET_OOM (error);
00662       goto error;
00663     }
00664 
00665   i = 0;
00666   tmp_list = tokens;
00667   while (tmp_list)
00668     {
00669       argv[i] = _dbus_shell_unquote (tmp_list->data);
00670 
00671       if (!argv[i])
00672         {
00673           int j;
00674           for (j = 0; j < i; j++)
00675             dbus_free(argv[j]);
00676 
00677           dbus_free (argv);
00678           _DBUS_SET_OOM (error);
00679           goto error;
00680         }
00681 
00682       tmp_list = _dbus_list_get_next_link (&tokens, tmp_list);
00683       ++i;
00684     }
00685   argv[argc] = NULL;
00686   
00687   _dbus_list_foreach (&tokens, (DBusForeachFunction) dbus_free, NULL);
00688   _dbus_list_clear (&tokens);
00689   
00690   if (argcp)
00691     *argcp = argc;
00692 
00693   if (argvp)
00694     *argvp = argv;
00695   else
00696     dbus_free_string_array (argv);
00697 
00698   return TRUE;
00699 
00700  error:
00701   _dbus_list_foreach (&tokens, (DBusForeachFunction) dbus_free, NULL);
00702   _dbus_list_clear (&tokens);
00703 
00704   return FALSE;
00705 
00706 }

Generated on Fri Sep 21 18:12:13 2007 for D-Bus by  doxygen 1.5.1