From ce84ba7260b476c73c212bab850724196d723d13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Chlo=C3=A9=20Vulquin?= Date: Wed, 29 May 2024 21:00:52 +0200 Subject: [PATCH 01/11] Add exec/2: posix_spawn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is an initial MVP with quite a few things still missing (such as: better error messages, documentation, tests). Despite this, it is already feature-complete on POSIX platforms (on Windows it currently reports an "unsupported on this platform" error). The signature is `anything | exec(path; [args…])`. `path` and all `args` must be strings. `anything` will be converted to a string if it isn't a string in memory, then piped into the process' stdin. The output is all stdout of the process. The exit code is not reported. Technically, "path" can be a simple name and `$PATH` will be searched. This is because the underlying function is `posix_spawnp`. This can bec hanged easily. The process does not have access to environment variables. This can be changed as well. Piping between programs works. Here's an example to try it out: `tostring | exec("seq"; [.]) | exec("wc"; "-l")` Expected output when inputting numbers is that number, but it notably goes through seq, then line-counting. --- src/builtin.c | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) diff --git a/src/builtin.c b/src/builtin.c index 7d21bfb111..72b0f2610d 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -34,6 +34,10 @@ void *alloca (size_t); #include #ifdef WIN32 #include +#else +#include +#include +#include #endif #include "builtin.h" #include "compile.h" @@ -1750,6 +1754,161 @@ static jv f_have_decnum(jq_state *jq, jv a) { #endif } +#ifdef WIN32 +static jv f_exec(jq_state *jq, jv input, jv path, jv args) { + jv_free(input), jv_free(path), jv_free(args); + return jv_invalid_with_msg(jv_string("exec not supported on this platform")); +} +#else +static jv f_exec(jq_state *jq, jv input, jv path, jv args) { + int ret = 0; + + /* argument validation */ + if (jv_get_kind(path) != JV_KIND_STRING) { + jv_free(input), jv_free(path), jv_free(args); + return type_error(path, "exec/2: path must be string"); + } + + // extract args into const char ** on the stack + if (jv_get_kind(args) != JV_KIND_ARRAY) { + jv_free(input), jv_free(path), jv_free(args); + return type_error(args, "exec/2: args must be array"); + } + + // validate args array before using it to avoid having to clean up + // a partially populated argv + jv_array_foreach(args, i, s) { + if (jv_get_kind(s) != JV_KIND_STRING) ret++; + jv_free(s); + } + if (ret) { + jv_free(input), jv_free(path), jv_free(args); + return type_error(args, "exec/2: args must only contain strings"); + } + + const size_t argc = jv_array_length(jv_copy(args)) + 1; + // this can't be a * const because of how we initialize it + char * argv[argc + 1]; + jv_array_foreach(args, i, s) { + argv[i + 1] = strdup(jv_string_value(s)); + jv_free(s); + } + argv[0] = strdup(jv_string_value(path)); + argv[argc] = 0; + jv_free(path); + + /* setting up pipes */ + int fin[2] = {0, 0}, fout[2] = {0, 0}; + posix_spawn_file_actions_t fda; + if ((ret = posix_spawn_file_actions_init(&fda))) { + jv_free(args), jv_free(input); + return jv_invalid_with_msg(jv_string("exec/2: could not initialize fd actions")); + } + + // TODO: better error reporting + if ((ret = pipe(fin))) { + jv_free(args), jv_free(input); + return jv_invalid_with_msg(jv_string("exec/2: PLACEHOLDER")); + } + if ((ret = posix_spawn_file_actions_addclose(&fda, fin[1]))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + return jv_invalid_with_msg(jv_string("exec/2: PLACEHOLDER")); + } + if ((ret = posix_spawn_file_actions_adddup2(&fda, fin[0], 0))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + return jv_invalid_with_msg(jv_string("exec/2: PLACEHOLDER")); + } + if ((ret = posix_spawn_file_actions_addclose(&fda, fin[0]))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + return jv_invalid_with_msg(jv_string("exec/2: PLACEHOLDER")); + } + + // TODO: better error reporting + if ((ret = pipe(fout))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + return jv_invalid_with_msg(jv_string("exec/2: PLACEHOLDER")); + } + if ((ret = posix_spawn_file_actions_addclose(&fda, fout[0]))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + close(fout[0]), close(fout[1]); + return jv_invalid_with_msg(jv_string("exec/2: PLACEHOLDER")); + } + if ((ret = posix_spawn_file_actions_adddup2(&fda, fout[1], 1))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + close(fout[0]), close(fout[1]); + return jv_invalid_with_msg(jv_string("exec/2: PLACEHOLDER")); + } + if ((ret = posix_spawn_file_actions_addclose(&fda, fout[1]))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + close(fout[0]), close(fout[1]); + return jv_invalid_with_msg(jv_string("exec/2: PLACEHOLDER")); + } + + /* execute */ + pid_t pid; + // NOTE: the warning on argv should be fine, posix_spawnp doesn't mutate those to my knowledge + if (posix_spawnp(&pid, argv[0], &fda, + NULL, argv, NULL)) { + close(fin[0]), close(fin[1]); + close(fout[0]), close(fout[1]); + jv_free(input); + return jv_invalid_with_msg(jv_string("exec/2: PLACEHOLDER")); + } + for (size_t i = 0; i < argc; i++) { + free(argv[i]); + } + close(fin[0]), close(fout[1]); + jv_free(args); + if ((ret = posix_spawn_file_actions_destroy(&fda))) { + // TODO: what should we do here? this is technically harmless + } + + /* send and receive data */ + // TODO: error checking on the writes + switch (jv_get_kind(input)) { + case JV_KIND_INVALID: + case JV_KIND_NULL: + close(fin[1]); + jv_free(input); + break; // do not pipe invalid / null + case JV_KIND_STRING: + write(fin[1], jv_string_value(input), jv_string_length_bytes(jv_copy(input))); + close(fin[1]); + jv_free(input); + break; + default: { + jv s = jv_dump_string(input, 0); + write(fin[1], jv_string_value(s), jv_string_length_bytes(jv_copy(s))); + close(fin[1]); + jv_free(s); + break; + } + } + + jv output = jv_string_empty(0); + char *buf = malloc(1024); + ssize_t bytes; + while ((bytes = read(fout[0], buf, 1024)) > 0) { + output = jv_string_append_buf(output, buf, bytes); + } + close(fout[0]); + free(buf); + + // TODO: parse output into json? probably not. + + // TODO: check waitpid output + waitpid(pid, &ret, 0); + return output; +} +#endif + #define LIBM_DD(name) \ {f_ ## name, #name, 1}, #define LIBM_DD_NO(name) LIBM_DD(name) @@ -1829,6 +1988,7 @@ BINOPS {f_current_line, "input_line_number", 1}, {f_have_decnum, "have_decnum", 1}, {f_have_decnum, "have_literal_numbers", 1}, + {f_exec, "exec", 3}, }; #undef LIBM_DDDD_NO #undef LIBM_DDD_NO From a4c197a40aa93d676f280ff9944e8b726a6327b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Chlo=C3=A9=20Vulquin?= Date: Fri, 31 May 2024 12:32:05 +0200 Subject: [PATCH 02/11] exec/2: add error checking --- src/builtin.c | 118 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 97 insertions(+), 21 deletions(-) diff --git a/src/builtin.c b/src/builtin.c index 72b0f2610d..dd07a94986 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -35,6 +35,7 @@ void *alloca (size_t); #ifdef WIN32 #include #else +#include #include #include #include @@ -1766,13 +1767,13 @@ static jv f_exec(jq_state *jq, jv input, jv path, jv args) { /* argument validation */ if (jv_get_kind(path) != JV_KIND_STRING) { jv_free(input), jv_free(path), jv_free(args); - return type_error(path, "exec/2: path must be string"); + return type_error(path, "exec/2 requires a string path"); } // extract args into const char ** on the stack if (jv_get_kind(args) != JV_KIND_ARRAY) { jv_free(input), jv_free(path), jv_free(args); - return type_error(args, "exec/2: args must be array"); + return type_error(args, "exec/2 requires an array of arguments"); } // validate args array before using it to avoid having to clean up @@ -1783,11 +1784,10 @@ static jv f_exec(jq_state *jq, jv input, jv path, jv args) { } if (ret) { jv_free(input), jv_free(path), jv_free(args); - return type_error(args, "exec/2: args must only contain strings"); + return type_error(args, "exec/2 only supports string arguments"); } const size_t argc = jv_array_length(jv_copy(args)) + 1; - // this can't be a * const because of how we initialize it char * argv[argc + 1]; jv_array_foreach(args, i, s) { argv[i + 1] = strdup(jv_string_value(s)); @@ -1802,54 +1802,123 @@ static jv f_exec(jq_state *jq, jv input, jv path, jv args) { posix_spawn_file_actions_t fda; if ((ret = posix_spawn_file_actions_init(&fda))) { jv_free(args), jv_free(input); - return jv_invalid_with_msg(jv_string("exec/2: could not initialize fd actions")); + return jv_invalid_with_msg(jv_string("exec/2 could not initialize fd actions")); } - // TODO: better error reporting + /** stdin **/ if ((ret = pipe(fin))) { jv_free(args), jv_free(input); - return jv_invalid_with_msg(jv_string("exec/2: PLACEHOLDER")); + switch (errno) { + case EMFILE: + return jv_invalid_with_msg(jv_string("exec/2 couldn't pipe() because of a per-process limit")); + case ENFILE: + return jv_invalid_with_msg(jv_string("exec/2 couldn't pipe() because of a system-wide limit")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't pipe()")); + } } if ((ret = posix_spawn_file_actions_addclose(&fda, fin[1]))) { jv_free(args), jv_free(input); close(fin[0]), close(fin[1]); - return jv_invalid_with_msg(jv_string("exec/2: PLACEHOLDER")); + switch (errno) { + case EBADF: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close an invalid file descriptor")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 tried to operate on an invalid file_actions object")); + case ENOMEM: + return jv_invalid_with_msg(jv_string("exec/2 ran out of memory while instructing the process to close a file descriptor")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close a file descriptor")); + } } if ((ret = posix_spawn_file_actions_adddup2(&fda, fin[0], 0))) { jv_free(args), jv_free(input); close(fin[0]), close(fin[1]); - return jv_invalid_with_msg(jv_string("exec/2: PLACEHOLDER")); + switch (errno) { + case EBADF: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to dup2 an invalid file descriptor")); + case ENOMEM: + return jv_invalid_with_msg(jv_string("exec/2 ran out of memory while instructing the process to dup2 a file descriptor")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 tried to operate on an invalid file_actions object")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to dup2 a file descriptor")); + } } if ((ret = posix_spawn_file_actions_addclose(&fda, fin[0]))) { jv_free(args), jv_free(input); close(fin[0]), close(fin[1]); - return jv_invalid_with_msg(jv_string("exec/2: PLACEHOLDER")); + switch (errno) { + case EBADF: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close an invalid file descriptor")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 tried to operate on an invalid file_actions object")); + case ENOMEM: + return jv_invalid_with_msg(jv_string("exec/2 ran out of memory while instructing the process to close a file descriptor")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close a file descriptor")); + } } - // TODO: better error reporting + /** stdout **/ if ((ret = pipe(fout))) { jv_free(args), jv_free(input); close(fin[0]), close(fin[1]); - return jv_invalid_with_msg(jv_string("exec/2: PLACEHOLDER")); + switch (errno) { + case EMFILE: + return jv_invalid_with_msg(jv_string("exec/2 couldn't pipe() because of a per-process limit")); + case ENFILE: + return jv_invalid_with_msg(jv_string("exec/2 couldn't pipe() because of a system-wide limit")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't pipe()")); + } } if ((ret = posix_spawn_file_actions_addclose(&fda, fout[0]))) { jv_free(args), jv_free(input); close(fin[0]), close(fin[1]); close(fout[0]), close(fout[1]); - return jv_invalid_with_msg(jv_string("exec/2: PLACEHOLDER")); + switch (errno) { + case EBADF: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close an invalid file descriptor")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 tried to operate on an invalid file_actions object")); + case ENOMEM: + return jv_invalid_with_msg(jv_string("exec/2 ran out of memory while instructing the process to close a file descriptor")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close a file descriptor")); + } } if ((ret = posix_spawn_file_actions_adddup2(&fda, fout[1], 1))) { jv_free(args), jv_free(input); close(fin[0]), close(fin[1]); close(fout[0]), close(fout[1]); - return jv_invalid_with_msg(jv_string("exec/2: PLACEHOLDER")); + switch (errno) { + case EBADF: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to dup2 an invalid file descriptor")); + case ENOMEM: + return jv_invalid_with_msg(jv_string("exec/2 ran out of memory while instructing the process to dup2 a file descriptor")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 tried to operate on an invalid file_actions object")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to dup2 a file descriptor")); + } } if ((ret = posix_spawn_file_actions_addclose(&fda, fout[1]))) { jv_free(args), jv_free(input); close(fin[0]), close(fin[1]); close(fout[0]), close(fout[1]); - return jv_invalid_with_msg(jv_string("exec/2: PLACEHOLDER")); + switch (errno) { + case EBADF: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close an invalid file descriptor")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 tried to operate on an invalid file_actions object")); + case ENOMEM: + return jv_invalid_with_msg(jv_string("exec/2 ran out of memory while instructing the process to close a file descriptor")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close a file descriptor")); + } } + // TODO: also set up an stderr pipe and pipe it into null, so it can be used later /* execute */ pid_t pid; @@ -1859,7 +1928,12 @@ static jv f_exec(jq_state *jq, jv input, jv path, jv args) { close(fin[0]), close(fin[1]); close(fout[0]), close(fout[1]); jv_free(input); - return jv_invalid_with_msg(jv_string("exec/2: PLACEHOLDER")); + switch (errno) { + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 failed to run posix_spawn due to an invalid file_actions object")); + default: + return jv_invalid_with_msg(jv_string("exec/2 failed to run posix_spawn")); + } } for (size_t i = 0; i < argc; i++) { free(argv[i]); @@ -1867,11 +1941,11 @@ static jv f_exec(jq_state *jq, jv input, jv path, jv args) { close(fin[0]), close(fout[1]); jv_free(args); if ((ret = posix_spawn_file_actions_destroy(&fda))) { - // TODO: what should we do here? this is technically harmless + // NOTE: posix_spawn_file_actions_destroy isn't checked for errors + // because it's non-fatal } /* send and receive data */ - // TODO: error checking on the writes switch (jv_get_kind(input)) { case JV_KIND_INVALID: case JV_KIND_NULL: @@ -1879,12 +1953,14 @@ static jv f_exec(jq_state *jq, jv input, jv path, jv args) { jv_free(input); break; // do not pipe invalid / null case JV_KIND_STRING: + // NOTE: write isn't checked for errors because it's non-fatal write(fin[1], jv_string_value(input), jv_string_length_bytes(jv_copy(input))); close(fin[1]); jv_free(input); break; default: { jv s = jv_dump_string(input, 0); + // NOTE: write isn't checked for errors because it's non-fatal write(fin[1], jv_string_value(s), jv_string_length_bytes(jv_copy(s))); close(fin[1]); jv_free(s); @@ -1898,12 +1974,12 @@ static jv f_exec(jq_state *jq, jv input, jv path, jv args) { while ((bytes = read(fout[0], buf, 1024)) > 0) { output = jv_string_append_buf(output, buf, bytes); } + // NOTE: if we want to check the read for failures, it'd be done here close(fout[0]); free(buf); - // TODO: parse output into json? probably not. - - // TODO: check waitpid output + // NOTE: if we want to check waitpid for failures, be careful since + // it may be short-lived waitpid(pid, &ret, 0); return output; } From 0a3848a66ce11deaa061941d0f4a4cdd30716657 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Chlo=C3=A9=20Vulquin?= Date: Fri, 31 May 2024 12:46:10 +0200 Subject: [PATCH 03/11] exec/2: use guarded jv_mem functions --- src/builtin.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/builtin.c b/src/builtin.c index dd07a94986..038e0e5116 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -1790,10 +1790,10 @@ static jv f_exec(jq_state *jq, jv input, jv path, jv args) { const size_t argc = jv_array_length(jv_copy(args)) + 1; char * argv[argc + 1]; jv_array_foreach(args, i, s) { - argv[i + 1] = strdup(jv_string_value(s)); + argv[i + 1] = jv_mem_strdup(jv_string_value(s)); jv_free(s); } - argv[0] = strdup(jv_string_value(path)); + argv[0] = jv_mem_strdup(jv_string_value(path)); argv[argc] = 0; jv_free(path); @@ -1936,7 +1936,7 @@ static jv f_exec(jq_state *jq, jv input, jv path, jv args) { } } for (size_t i = 0; i < argc; i++) { - free(argv[i]); + jv_mem_free(argv[i]); } close(fin[0]), close(fout[1]); jv_free(args); @@ -1969,14 +1969,14 @@ static jv f_exec(jq_state *jq, jv input, jv path, jv args) { } jv output = jv_string_empty(0); - char *buf = malloc(1024); + char *buf = jv_mem_alloc(1024); ssize_t bytes; while ((bytes = read(fout[0], buf, 1024)) > 0) { output = jv_string_append_buf(output, buf, bytes); } // NOTE: if we want to check the read for failures, it'd be done here close(fout[0]); - free(buf); + jv_mem_free(buf); // NOTE: if we want to check waitpid for failures, be careful since // it may be short-lived From 648e772bf120039546b20b6d6e0d4f0323e9cfe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Chlo=C3=A9=20Vulquin?= Date: Fri, 31 May 2024 13:17:18 +0200 Subject: [PATCH 04/11] Add tests for exec/2 --- tests/shtest | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/shtest b/tests/shtest index 03fbf665e5..fd1de68b88 100755 --- a/tests/shtest +++ b/tests/shtest @@ -689,4 +689,19 @@ $VALGRIND $Q $JQ . <<\NUM -10E-1000000001 NUM +# exec is tested by executing jq +if ! $msys && ! $mingw; then + now=$(date +%s) + if ! r=$($VALGRIND $Q $JQ -rn 'exec("'"$JQ_NO_B"'"; ["-rn", "'"$now"'"]) | trim') || + [ "$r" != "$now" ] ; then + echo "exec didn't pipe stdout correctly: expected $now but got $r" + exit 1 + fi + if ! r=$(echo "$now" | $VALGRIND $Q $JQ -r 'exec("'"$JQ_NO_B"'"; ["-r", "."]) | trim') || + [ "$r" != "$now" ]; then + echo "exec didn't pipe input correctly: expected $now but got $r" + exit 1 + fi +fi + exit 0 From 7d8c020d6a6f7e9b401ab8f99134ba96a6687453 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Chlo=C3=A9=20Vulquin?= Date: Fri, 31 May 2024 13:51:50 +0200 Subject: [PATCH 05/11] Add documentation for exec/2 --- docs/content/manual/manual.yml | 15 +++++++++++++++ jq.1.prebuilt | 6 ++++++ 2 files changed, 21 insertions(+) diff --git a/docs/content/manual/manual.yml b/docs/content/manual/manual.yml index d6746bc56a..643e50154d 100644 --- a/docs/content/manual/manual.yml +++ b/docs/content/manual/manual.yml @@ -3271,6 +3271,12 @@ sections: builtin outputs its input in raw mode to stder with no additional decoration, not even a newline. + Additionally, jq has support for executing "external filters" provided by + other executables. This functionality is provided by `exec`, but may not + be available on all platforms, and does not necessarily integrate well + with other jq features. It is intended to perform smaller processing that + is otherwise impossible to perform in jq itself. + Most jq builtins are referentially transparent, and yield constant and repeatable value streams when applied to constant inputs. This is not true of I/O builtins. @@ -3344,6 +3350,15 @@ sections: Returns the line number of the input currently being filtered. + - title: "`exec(path; [args…])`" + body: | + + Spawns a new process of path with arguments args. Pipes its input + converted to a string as the stdin of the process. The output of the + process is coalesced and outputted as a string. Note that this output + will include any terminating newlines, so you may want to add an + rtrim after this filter, or use a wrapper. + - title: 'Streaming' body: | diff --git a/jq.1.prebuilt b/jq.1.prebuilt index 7239e87d16..cb86db635a 100644 --- a/jq.1.prebuilt +++ b/jq.1.prebuilt @@ -3652,6 +3652,9 @@ At this time jq has minimal support for I/O, mostly in the form of control over Two builtins provide minimal output capabilities, \fBdebug\fR, and \fBstderr\fR\. (Recall that a jq program\'s output values are always output as JSON texts on \fBstdout\fR\.) The \fBdebug\fR builtin can have application\-specific behavior, such as for executables that use the libjq C API but aren\'t the jq executable itself\. The \fBstderr\fR builtin outputs its input in raw mode to stder with no additional decoration, not even a newline\. . .P +Additionally, jq has support for executing "external filters" provided by other executables\. This functionality is provided by \fBexec\fR, but may not be available on all platforms, and does not necessarily integrate well with other jq features\. It is intended to perform smaller processing that is otherwise impossible to perform in jq itself\. +. +.P Most jq builtins are referentially transparent, and yield constant and repeatable value streams when applied to constant inputs\. This is not true of I/O builtins\. . .SS "input" @@ -3744,6 +3747,9 @@ Returns the name of the file whose input is currently being filtered\. Note that .SS "input_line_number" Returns the line number of the input currently being filtered\. . +.SS "exec(path; [args…])" +Spawns a new process of path with arguments args\. Pipes its input converted to a string as the stdin of the process\. The output of the process is coalesced and outputted as a string\. Note that this output will include any terminating newlines, so you may want to add an rtrim after this filter, or use a wrapper\. +. .SH "STREAMING" With the \fB\-\-stream\fR option jq can parse input texts in a streaming fashion, allowing jq programs to start processing large JSON texts immediately rather than after the parse completes\. If you have a single JSON text that is 1GB in size, streaming it will allow you to process it much more quickly\. . From 000f24fa69de56d8cb3750b4750da66c833373b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Chlo=C3=A9=20Vulquin?= Date: Fri, 31 May 2024 15:19:26 +0200 Subject: [PATCH 06/11] Add exec/1: exec/2 with empty args array --- docs/content/manual/manual.yml | 2 +- jq.1.prebuilt | 2 +- src/builtin.jq | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/content/manual/manual.yml b/docs/content/manual/manual.yml index 643e50154d..7d86358325 100644 --- a/docs/content/manual/manual.yml +++ b/docs/content/manual/manual.yml @@ -3350,7 +3350,7 @@ sections: Returns the line number of the input currently being filtered. - - title: "`exec(path; [args…])`" + - title: "`exec(path)`, `exec(path; [args…])`" body: | Spawns a new process of path with arguments args. Pipes its input diff --git a/jq.1.prebuilt b/jq.1.prebuilt index cb86db635a..c44cf6c4b5 100644 --- a/jq.1.prebuilt +++ b/jq.1.prebuilt @@ -3747,7 +3747,7 @@ Returns the name of the file whose input is currently being filtered\. Note that .SS "input_line_number" Returns the line number of the input currently being filtered\. . -.SS "exec(path; [args…])" +.SS "exec(path), exec(path; [args…])" Spawns a new process of path with arguments args\. Pipes its input converted to a string as the stdin of the process\. The output of the process is coalesced and outputted as a string\. Note that this output will include any terminating newlines, so you may want to add an rtrim after this filter, or use a wrapper\. . .SH "STREAMING" diff --git a/src/builtin.jq b/src/builtin.jq index 802595bafd..072d42622f 100644 --- a/src/builtin.jq +++ b/src/builtin.jq @@ -278,3 +278,5 @@ def JOIN($idx; stream; idx_expr; join_expr): stream | [., $idx[idx_expr]] | join_expr; def IN(s): any(s == .; .); def IN(src; s): any(src == s; .); + +def exec(path): exec(path; []); From 5d3f3cd14884419713fa6b64419c36899fa320b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Chlo=C3=A9=20Vulquin?= Date: Fri, 31 May 2024 19:22:55 +0200 Subject: [PATCH 07/11] exec/2: capture stderr, output object --- src/builtin.c | 123 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 105 insertions(+), 18 deletions(-) diff --git a/src/builtin.c b/src/builtin.c index 038e0e5116..c7856d68ed 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -1798,7 +1798,7 @@ static jv f_exec(jq_state *jq, jv input, jv path, jv args) { jv_free(path); /* setting up pipes */ - int fin[2] = {0, 0}, fout[2] = {0, 0}; + int fin[2] = {0, 0}, fout[2] = {0, 0}, ferr[2] = {0, 0}; posix_spawn_file_actions_t fda; if ((ret = posix_spawn_file_actions_init(&fda))) { jv_free(args), jv_free(input); @@ -1918,7 +1918,69 @@ static jv f_exec(jq_state *jq, jv input, jv path, jv args) { return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close a file descriptor")); } } - // TODO: also set up an stderr pipe and pipe it into null, so it can be used later + + /** stderr **/ + if ((ret = pipe(ferr))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + close(fout[0]), close(fout[1]); + switch (errno) { + case EMFILE: + return jv_invalid_with_msg(jv_string("exec/2 couldn't pipe() because of a per-process limit")); + case ENFILE: + return jv_invalid_with_msg(jv_string("exec/2 couldn't pipe() because of a system-wide limit")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't pipe()")); + } + } + if ((ret = posix_spawn_file_actions_addclose(&fda, ferr[0]))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + close(fout[0]), close(fout[1]); + close(ferr[0]), close(ferr[1]); + switch (errno) { + case EBADF: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close an invalid file descriptor")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 tried to operate on an invalid file_actions object")); + case ENOMEM: + return jv_invalid_with_msg(jv_string("exec/2 ran out of memory while instructing the process to close a file descriptor")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close a file descriptor")); + } + } + if ((ret = posix_spawn_file_actions_adddup2(&fda, ferr[1], 2))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + close(fout[0]), close(fout[1]); + close(ferr[0]), close(ferr[1]); + switch (errno) { + case EBADF: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to dup2 an invalid file descriptor")); + case ENOMEM: + return jv_invalid_with_msg(jv_string("exec/2 ran out of memory while instructing the process to dup2 a file descriptor")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 tried to operate on an invalid file_actions object")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to dup2 a file descriptor")); + } + } + if ((ret = posix_spawn_file_actions_addclose(&fda, ferr[1]))) { + jv_free(args), jv_free(input); + close(fin[0]), close(fin[1]); + close(fout[0]), close(fout[1]); + close(ferr[0]), close(ferr[1]); + switch (errno) { + case EBADF: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close an invalid file descriptor")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 tried to operate on an invalid file_actions object")); + case ENOMEM: + return jv_invalid_with_msg(jv_string("exec/2 ran out of memory while instructing the process to close a file descriptor")); + default: + return jv_invalid_with_msg(jv_string("exec/2 couldn't instruct the process to close a file descriptor")); + } + } /* execute */ pid_t pid; @@ -1927,6 +1989,7 @@ static jv f_exec(jq_state *jq, jv input, jv path, jv args) { NULL, argv, NULL)) { close(fin[0]), close(fin[1]); close(fout[0]), close(fout[1]); + close(ferr[0]), close(ferr[1]); jv_free(input); switch (errno) { case EINVAL: @@ -1938,7 +2001,7 @@ static jv f_exec(jq_state *jq, jv input, jv path, jv args) { for (size_t i = 0; i < argc; i++) { jv_mem_free(argv[i]); } - close(fin[0]), close(fout[1]); + close(fin[0]), close(fout[1]), close(ferr[1]); jv_free(args); if ((ret = posix_spawn_file_actions_destroy(&fda))) { // NOTE: posix_spawn_file_actions_destroy isn't checked for errors @@ -1949,39 +2012,63 @@ static jv f_exec(jq_state *jq, jv input, jv path, jv args) { switch (jv_get_kind(input)) { case JV_KIND_INVALID: case JV_KIND_NULL: - close(fin[1]); - jv_free(input); break; // do not pipe invalid / null case JV_KIND_STRING: // NOTE: write isn't checked for errors because it's non-fatal write(fin[1], jv_string_value(input), jv_string_length_bytes(jv_copy(input))); - close(fin[1]); - jv_free(input); break; default: { - jv s = jv_dump_string(input, 0); + jv s = jv_dump_string(jv_copy(input), 0); // NOTE: write isn't checked for errors because it's non-fatal write(fin[1], jv_string_value(s), jv_string_length_bytes(jv_copy(s))); - close(fin[1]); jv_free(s); break; } } + close(fin[1]); + jv_free(input); - jv output = jv_string_empty(0); - char *buf = jv_mem_alloc(1024); + static const size_t bufsize = 1024; + jv sout = jv_string_empty(0), + serr = jv_string_empty(0); + char *buf = jv_mem_alloc(bufsize); ssize_t bytes; - while ((bytes = read(fout[0], buf, 1024)) > 0) { - output = jv_string_append_buf(output, buf, bytes); + while ((bytes = read(fout[0], buf, bufsize)) > 0) { + sout = jv_string_append_buf(sout, buf, bytes); + } + // NOTE: if we want to check the read for failures, it'd be done here + while ((bytes = read(ferr[0], buf, bufsize)) > 0) { + serr = jv_string_append_buf(serr, buf, bytes); } // NOTE: if we want to check the read for failures, it'd be done here - close(fout[0]); + close(fout[0]), close(ferr[0]); jv_mem_free(buf); - // NOTE: if we want to check waitpid for failures, be careful since - // it may be short-lived - waitpid(pid, &ret, 0); - return output; + if (waitpid(pid, &ret, 0) == -1) { + jv_free(sout), jv_free(serr); + switch (errno) { + case EINTR: + return jv_invalid_with_msg(jv_string("exec/2 was interrupted by a signal while waiting on the child process")); + case EINVAL: + return jv_invalid_with_msg(jv_string("exec/2 passed invalid options to waitpid")); + // we do not expect ECHILD here, so it's a generic failure + case ECHILD: + default: + return jv_invalid_with_msg(jv_string("exec/2 failed in waitpid")); + } + } + + jv obj = jv_object(); + if (WIFEXITED(ret)) { + obj = jv_object_set(obj, jv_string("status"), jv_number(WEXITSTATUS(ret))); + } else { + // POSIX guarantees that this is WIFSIGNALED(ret) + obj = jv_object_set(obj, jv_string("status"), jv_number(-1)); + obj = jv_object_set(obj, jv_string("signal"), jv_number(WSTOPSIG(ret))); + } + obj = jv_object_set(obj, jv_string("out"), sout); + obj = jv_object_set(obj, jv_string("err"), serr); + return obj; } #endif From a359afeabc2f9c1f8f766d52eece9ae0070c7a26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Chlo=C3=A9=20Vulquin?= Date: Fri, 31 May 2024 19:23:39 +0200 Subject: [PATCH 08/11] Add system/2 as an alias to exec/2 --- src/builtin.jq | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/builtin.jq b/src/builtin.jq index 072d42622f..904dfcd326 100644 --- a/src/builtin.jq +++ b/src/builtin.jq @@ -280,3 +280,5 @@ def IN(s): any(s == .; .); def IN(src; s): any(src == s; .); def exec(path): exec(path; []); +def system(path; args): exec(path; args) | .out | rtrim ; +def system(path): system(path; []); From 51a620c5c7a7e02a0ac0664362b51df80c890640 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Chlo=C3=A9=20Vulquin?= Date: Fri, 31 May 2024 19:29:26 +0200 Subject: [PATCH 09/11] Change tests to test both exec/2 and system/2 --- tests/shtest | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/shtest b/tests/shtest index fd1de68b88..5b5a7b7df2 100755 --- a/tests/shtest +++ b/tests/shtest @@ -692,12 +692,12 @@ NUM # exec is tested by executing jq if ! $msys && ! $mingw; then now=$(date +%s) - if ! r=$($VALGRIND $Q $JQ -rn 'exec("'"$JQ_NO_B"'"; ["-rn", "'"$now"'"]) | trim') || + if ! r=$($VALGRIND $Q $JQ -rn 'exec("'"$JQ_NO_B"'"; ["-rn", "'"$now"'"]) | .out | rtrim') || [ "$r" != "$now" ] ; then echo "exec didn't pipe stdout correctly: expected $now but got $r" exit 1 fi - if ! r=$(echo "$now" | $VALGRIND $Q $JQ -r 'exec("'"$JQ_NO_B"'"; ["-r", "."]) | trim') || + if ! r=$(echo "$now" | $VALGRIND $Q $JQ -r 'system("'"$JQ_NO_B"'"; ["-r", "."])') || [ "$r" != "$now" ]; then echo "exec didn't pipe input correctly: expected $now but got $r" exit 1 From cabd52db478d17f0fd4f4a5015c10c7354f664b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Chlo=C3=A9=20Vulquin?= Date: Fri, 31 May 2024 19:29:54 +0200 Subject: [PATCH 10/11] Document changes to exec/2 and system/2 --- docs/content/manual/manual.yml | 25 +++++++++++++++++-------- jq.1.prebuilt | 7 +++++-- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/docs/content/manual/manual.yml b/docs/content/manual/manual.yml index 7d86358325..9cdbbfe6ea 100644 --- a/docs/content/manual/manual.yml +++ b/docs/content/manual/manual.yml @@ -3272,10 +3272,11 @@ sections: decoration, not even a newline. Additionally, jq has support for executing "external filters" provided by - other executables. This functionality is provided by `exec`, but may not - be available on all platforms, and does not necessarily integrate well - with other jq features. It is intended to perform smaller processing that - is otherwise impossible to perform in jq itself. + other executables. This functionality is provided by `exec` and related + filters viz. `system`, but may not be available on all platforms, and does + not necessarily integrate well with other jq features. It is intended to + perform smaller processing that is otherwise impossible to perform in jq + itself. Most jq builtins are referentially transparent, and yield constant and repeatable value streams when applied to constant inputs. @@ -3354,10 +3355,18 @@ sections: body: | Spawns a new process of path with arguments args. Pipes its input - converted to a string as the stdin of the process. The output of the - process is coalesced and outputted as a string. Note that this output - will include any terminating newlines, so you may want to add an - rtrim after this filter, or use a wrapper. + converted to a string as the stdin of the process. Outputs an object + like `{'out': "", "err": "", "status": 0}`, containing the stdout, + stderr, and exit code of the process respectively. If the process + exits abnormally due to an unhandled signal, it will have a status of + "-1" and a "signal" parameter giving the numeric value of the signal + that caused the process to terminate. + + - title: "`system(path)`, `system(path; [args…])`" + body: | + + Exactly equivalent to an `exec` call with the same arguments, followed + by `| .out | rtrim`. - title: 'Streaming' body: | diff --git a/jq.1.prebuilt b/jq.1.prebuilt index c44cf6c4b5..eab5355cd4 100644 --- a/jq.1.prebuilt +++ b/jq.1.prebuilt @@ -3652,7 +3652,7 @@ At this time jq has minimal support for I/O, mostly in the form of control over Two builtins provide minimal output capabilities, \fBdebug\fR, and \fBstderr\fR\. (Recall that a jq program\'s output values are always output as JSON texts on \fBstdout\fR\.) The \fBdebug\fR builtin can have application\-specific behavior, such as for executables that use the libjq C API but aren\'t the jq executable itself\. The \fBstderr\fR builtin outputs its input in raw mode to stder with no additional decoration, not even a newline\. . .P -Additionally, jq has support for executing "external filters" provided by other executables\. This functionality is provided by \fBexec\fR, but may not be available on all platforms, and does not necessarily integrate well with other jq features\. It is intended to perform smaller processing that is otherwise impossible to perform in jq itself\. +Additionally, jq has support for executing "external filters" provided by other executables\. This functionality is provided by \fBexec\fR and related filters viz\. \fBsystem\fR, but may not be available on all platforms, and does not necessarily integrate well with other jq features\. It is intended to perform smaller processing that is otherwise impossible to perform in jq itself\. . .P Most jq builtins are referentially transparent, and yield constant and repeatable value streams when applied to constant inputs\. This is not true of I/O builtins\. @@ -3748,7 +3748,10 @@ Returns the name of the file whose input is currently being filtered\. Note that Returns the line number of the input currently being filtered\. . .SS "exec(path), exec(path; [args…])" -Spawns a new process of path with arguments args\. Pipes its input converted to a string as the stdin of the process\. The output of the process is coalesced and outputted as a string\. Note that this output will include any terminating newlines, so you may want to add an rtrim after this filter, or use a wrapper\. +Spawns a new process of path with arguments args\. Pipes its input converted to a string as the stdin of the process\. Outputs an object like \fB{\'out\': "", "err": "", "status": 0}\fR, containing the stdout, stderr, and exit code of the process respectively\. If the process exits abnormally due to an unhandled signal, it will have a status of "\-1" and a "signal" parameter giving the numeric value of the signal that caused the process to terminate\. +. +.SS "system(path), system(path; [args…])" +Exactly equivalent to an \fBexec\fR call with the same arguments, followed by \fB| \.out | rtrim\fR\. . .SH "STREAMING" With the \fB\-\-stream\fR option jq can parse input texts in a streaming fashion, allowing jq programs to start processing large JSON texts immediately rather than after the parse completes\. If you have a single JSON text that is 1GB in size, streaming it will allow you to process it much more quickly\. From 17a219826be14aa8c2890194e64e79d3bcfebd00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Chlo=C3=A9=20Vulquin?= Date: Fri, 31 May 2024 19:34:05 +0200 Subject: [PATCH 11/11] exec/2: correct typo: STOPSIG is not TERMSIG --- src/builtin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/builtin.c b/src/builtin.c index c7856d68ed..ec75e22858 100644 --- a/src/builtin.c +++ b/src/builtin.c @@ -2064,7 +2064,7 @@ static jv f_exec(jq_state *jq, jv input, jv path, jv args) { } else { // POSIX guarantees that this is WIFSIGNALED(ret) obj = jv_object_set(obj, jv_string("status"), jv_number(-1)); - obj = jv_object_set(obj, jv_string("signal"), jv_number(WSTOPSIG(ret))); + obj = jv_object_set(obj, jv_string("signal"), jv_number(WTERMSIG(ret))); } obj = jv_object_set(obj, jv_string("out"), sout); obj = jv_object_set(obj, jv_string("err"), serr);