From df6b856eb1f0bf5c638da345814e1591a0bdbf8a Mon Sep 17 00:00:00 2001 From: ge Date: Thu, 1 Jan 2026 04:48:18 +0300 Subject: [PATCH] init --- .editorconfig | 8 ++ .gitattributes | 8 ++ .gitignore | 24 +++++ LICENSE | 18 ++++ README.md | 1 + shellish.v | 272 ++++++++++++++++++++++++++++++++++++++++++++++++ shellish_test.v | 110 ++++++++++++++++++++ v.mod | 7 ++ 8 files changed, 448 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 shellish.v create mode 100644 shellish_test.v create mode 100644 v.mod diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..01072ca --- /dev/null +++ b/.editorconfig @@ -0,0 +1,8 @@ +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.v] +indent_style = tab diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9a98968 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,8 @@ +* text=auto eol=lf +*.bat eol=crlf + +*.v linguist-language=V +*.vv linguist-language=V +*.vsh linguist-language=V +v.mod linguist-language=V +.vdocignore linguist-language=ignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9c3a84b --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +# Binaries for programs and plugins +main +shellish +*.exe +*.exe~ +*.so +*.dylib +*.dll + +# Ignore binary output folders +bin/ + +# Ignore common editor/system specific metadata +.DS_Store +.idea/ +.vscode/ +*.iml + +# ENV +.env + +# vweb and database +*.db +*.js diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..188de5f --- /dev/null +++ b/LICENSE @@ -0,0 +1,18 @@ +MIT License + +Copyright (c) 2026 gechandesu + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +associated documentation files (the "Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the +following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial +portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT +LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO +EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..eae8a86 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# Shell-like Syntax Parser and Text Processor diff --git a/shellish.v b/shellish.v new file mode 100644 index 0000000..05a19a1 --- /dev/null +++ b/shellish.v @@ -0,0 +1,272 @@ +module shellish + +import strings + +// safe_chars contains ASCII characters that can be used in shell without any escaping. +pub const safe_chars = '%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz' + +// special_chars contains ASCII characters that must be escaped in shell. +pub const special_chars = ' \$"\'\\!' + +const special_chars_runes = [` `, `$`, `"`, `'`, `\``, `\\`, `!`] + +// quote returns a quoted version of string `s`. +// +// Note: String will be quoted with single quotes (`'`). If you expect `"`, use `double_quote()` instead. +// +// Example: +// ``` +// assert shellish.quote('Hello, world!') == "'Hello, world!'" +// assert shellish.quote("John's notebook") == '\'John\'"\'"\'s notebook\'' +// ``` +pub fn quote(s string) string { + if s == '' { + return "''" + } + if s.contains_only(safe_chars) { + return s + } + return "'" + s.replace("'", '\'"\'"\'') + "'" +} + +// double_quote returns a `"` quoted version of string. In-string double +// quote will be escaped with `\`. +// +// Note: Shell interprets special characters in string quoted by double quotes. +// To prevent this use `quote()` instead or use `double_quote()` in conjunction with `escape()`. +// Example: +// ``` +// assert shellish.double_quote('NAME="Arch Linux"') == r'"NAME=\"Arch Linux\""' +// assert shellish.double_quote(r'Hello, ${NAME}!') == r'"Hello, ${NAME}!"' +// ``` +pub fn double_quote(s string) string { + if s == '' { + return '""' + } + if s.contains_only(safe_chars) { + return s + } + return '"' + s.replace('"', r'\"') + '"' +} + +// unquote removes the leading and trailing quotes (any of `"`, `'`) from string. +pub fn unquote(s string) string { + mut ret := strings.new_builder(s.len) + for i := 0; i < s.len; i++ { + if (i == 0 || i == s.len - 1) && s[i] in [`"`, `'`] { + continue + } + ret.write_byte(s[i]) + } + return ret.str() +} + +// escape returns a shell-escaped version of string `s`. +// Shell special characters will be escaped with backslashes without inserting quotes. +// Example: assert shellish.escape('Hello, World!') == r'Hello,\ World\!' +pub fn escape(s string) string { + mut ret := strings.new_builder(s.len) + for i := 0; i < s.len; i++ { + if s[i] in special_chars_runes { + ret.write_byte(`\\`) + ret.write_byte(s[i]) + } else { + ret.write_byte(s[i]) + } + } + return ret.str() +} + +// unescape unescapes the escaped string by removing backslash escapes. +// Example: assert shellish.unescape(r'line\ with\ spaces\\n') == r'line with spaces\n' +pub fn unescape(s string) string { + mut ret := strings.new_builder(s.len) + for i := 0; i < s.len; i++ { + if s[i] == `\\` && i + 1 < s.len { + ret.write_byte(s[i + 1]) + i++ + } else { + ret.write_byte(s[i]) + } + } + return ret.str() +} + +// strip_non_printable strips non-printable ASCII characters (from `0x00` to `0x1f` and `0x7f`) from string. +pub fn strip_non_printable(s string) string { + mut ret := strings.new_builder(s.len) + for c in s { + if c > 0x1f || c != 0x7f { + ret.write_byte(c) + } + } + return ret.str() +} + +// strip_ansi_escape_codes strips ANSI escape sequences starting with `ESC [` from string. +pub fn strip_ansi_escape_codes(s string) string { + mut ret := strings.new_builder(s.len) + mut esc := false + mut lsbr := false + for c in s { + if c == 0x1b { // ESC + esc = true + continue + } + if esc && c == `[` { // ESC [ + lsbr = true + continue + } + if esc && lsbr { + if c >= 0x40 && c <= 0x7e { // end of sequence + esc = false + lsbr = false + } + continue + } + ret.write_byte(c) + } + + return ret.str() +} + +// split splits the `s` string into tokens using shell-like syntax in POSIX manner. +// Example: assert shellish.split('echo "Hello, World!"') == ['echo', 'Hello, World!'] +pub fn split(s string) ![]string { + if s.is_blank() { + return error('non-blank string expected') + } + return parse(s)! +} + +// join joins `a` array members into a shell command. +// Example: assert shellish.join(['sh', '-c', 'hostname -f']) == "sh -c 'hostname -f'" +pub fn join(a []string) string { + mut quoted_args := []string{} + for arg in a { + quoted_args << quote(arg) + } + return quoted_args.join(' ') +} + +enum Mode { + no + normal + quoted +} + +fn parse(line string) ![]string { + mut tokens := []string{} + mut buf := []u8{} + + mut escaped := false + mut single_quoted := false + mut double_quoted := false + mut back_quoted := false + mut dollar_quoted := false + + mut got := Mode.no + + for i, c in line { + mut r := c + + if escaped { + if r == `t` { + r = `\t` + } + if r == `n` { + r = `\n` + } + buf << r + escaped = false + got = .normal + continue + } + + if r == `\\` { + if single_quoted { + buf << r + } else { + if double_quoted && i + 1 <= line.len { + // POSIX-compliant shells removes backslash only if backslash is followed + // by $, `, ", and \ characters. Otherwise backslash is accepted as literal. + if line[i + 1] !in [`$`, `\``, `"`, `\\`] { + buf << r + } + } + escaped = true + } + continue + } + + if r.is_space() { + if single_quoted || double_quoted || back_quoted || dollar_quoted { + buf << r + } else if got != .no { + tokens << buf.bytestr() + buf = []u8{} + got = .no + } + continue + } + + match r { + `\`` { + if !single_quoted && !double_quoted && !dollar_quoted { + back_quoted = !back_quoted + } + } + `(` { + if !single_quoted && !double_quoted && !dollar_quoted { + if !dollar_quoted && buf.len - 1 >= 0 && buf[buf.len - 1..][0] == `$` { + dollar_quoted = true + buf << r + continue + } + } + } + `)` { + if !single_quoted && !double_quoted && !dollar_quoted { + dollar_quoted = false + } + } + `"` { + if !single_quoted && !dollar_quoted { + if double_quoted { + got = .quoted + } + double_quoted = !double_quoted + continue + } + } + `'` { + if !double_quoted && !dollar_quoted { + if single_quoted { + got = .quoted + } + single_quoted = !single_quoted + continue + } + } + else {} + } + + got = .normal + buf << r + } + + if got != .no { + tokens << buf.bytestr() + } + + match true { + escaped { return error('invalid escape in string') } + single_quoted { return error('non-terminated quote in string') } + double_quoted { return error('non-terminated double quote in string') } + back_quoted { return error('non-terminated backtick in string') } + dollar_quoted { return error('non-terminated dollar expression in string') } + else {} + } + + return tokens +} diff --git a/shellish_test.v b/shellish_test.v new file mode 100644 index 0000000..5bf052b --- /dev/null +++ b/shellish_test.v @@ -0,0 +1,110 @@ +import shellish + +fn test_quote() { + assert shellish.quote('Hello, world!') == "'Hello, world!'" + assert shellish.quote("John's notebook") == '\'John\'"\'"\'s notebook\'' + assert shellish.quote("Jack O'Neill") == '\'Jack O\'"\'"\'Neill\'' +} + +fn test_double_quote() { + assert shellish.double_quote('NAME="Arch Linux"') == r'"NAME=\"Arch Linux\""' + assert shellish.double_quote(r'Hello, ${NAME}!') == r'"Hello, ${NAME}!"' +} + +fn test_unquote() { + assert shellish.unquote('"hello"') == 'hello' + assert shellish.unquote("'world'") == 'world' +} + +fn test_escape() { + assert shellish.escape('Hello, World!') == r'Hello,\ World\!' +} + +fn test_unescape() { + assert shellish.unescape(r'\\\\\\') == r'\\\' + assert shellish.unescape(r'Line\ with\ spaces\\n') == r'Line with spaces\n' +} + +fn test_strip_ansi_escape_codes() { + assert shellish.strip_ansi_escape_codes('\033[32mhello\033[0m') == 'hello' +} + +// Original test data taken from https://github.com/python/cpython/blob/3.14/Lib/test/test_shlex.py +// FIXME: All test-cases must pass. +// vfmt off +const test_data = { + 'foo': ['foo'] + 'foo bar': ['foo', 'bar'] + ' foo bar': ['foo', 'bar'] + ' foo bar ': ['foo', 'bar'] + 'foo bar baz': ['foo', 'bar', 'baz'] + r'\foo bar': ['foo', 'bar'] + r'\ foo bar': [' foo', 'bar'] + r'\ foo': [' foo'] + r'foo\ bar': ['foo bar'] + r'foo \bar baz': ['foo', 'bar', 'baz'] + r'foo \ bar baz': ['foo', ' bar', 'baz'] + r'foo \ bar': ['foo', ' bar'] + '"foo" bar baz': ['foo', 'bar', 'baz'] + 'foo "bar" baz': ['foo', 'bar', 'baz'] + 'foo bar "baz"': ['foo', 'bar', 'baz'] + "'foo' bar baz": ['foo', 'bar', 'baz'] + "foo 'bar' baz": ['foo', 'bar', 'baz'] + "foo bar 'baz'": ['foo', 'bar', 'baz'] + '"foo" "bar" "baz"': ['foo', 'bar', 'baz'] + "'foo' 'bar' 'baz'": ['foo', 'bar', 'baz'] + 'foo bar"bla"bla"bar" baz': ['foo', 'barblablabar', 'baz'] + "foo bar'bla'bla'bar' baz": ['foo', 'barblablabar', 'baz'] + '""': [''] + "''": [''] + '"" "" ""': ['', '', ''] + 'foo "" bar': ['foo', '', 'bar'] + "foo '' bar": ['foo', '', 'bar'] + 'foo "" "" "" bar': ['foo', '', '', '', 'bar'] + "foo '' '' '' bar": ['foo', '', '', '', 'bar'] + 'foo "bar baz"': ['foo', 'bar baz'] + r"\'": ["'"] + r'\"': ['"'] + r'"\""': ['"'] + r'"foo\ bar"': [r'foo\ bar'] + r'"foo\\ bar"': [r'foo\ bar'] + r'"foo\\ bar\""': [r'foo\ bar"'] + r'"foo\\" bar\"': [r'foo\', 'bar"'] + r'"foo\\ bar\" abcde"': [r'foo\ bar" abcde'] + r'"foo\\\ bar\" abcde"': [r'foo\\ bar" abcde'] + r'"foo\\\x bar\" abcde"': [r'foo\\x bar" abcde'] + r'"foo\x bar\" abcde"': [r'foo\x bar" abcde'] + r"'foo\ bar'": [r'foo\ bar'] + r"'foo\\ bar'": [r'foo\\ bar'] + r'\"foo': ['"foo'] + r'\"foo\x': ['"foox'] + r'"foo\x"': [r'foo\x'] + r'"foo\ "': [r'foo\ '] + r'foo\ xx': ['foo xx'] + r'foo\ x\x': ['foo xx'] + r'foo\ x\x\"': ['foo xx"'] + r'"foo\ x\x"': [r'foo\ x\x'] + r'"foo\ x\x\\"': [r'foo\ x\x\'] + r'"foo\ x\x\\""foobar"': [r'foo\ x\x\foobar'] + + '":-) ;-)"': [':-) ;-)'] + 'foo `bar baz`': ['foo', '`bar baz`'] + r'foo "$(bar baz)"': ['foo', r'$(bar baz)'] + // r'foo $(bar)': ['foo', r'$(bar)'] // failing + // r'foo $(bar baz)': ['foo', r'$(bar baz)'] // failing + // r'foo#bar\nbaz': ['foo', 'baz'] // failing + // 'foo;bar': ['foo', ';', 'bar'] // failing + + '"foo\\\\\\x bar\\" df\'a\\ \'df"': ['foo\\\\x bar" df\'a\\ \'df'] + '"foo\\ x\\x\\\\"\\\'"foobar"': [r"foo\ x\x\'foobar"] + '"foo\\ x\\x\\\\"\\\'"fo\'obar"': [r"foo\ x\x\'fo'obar"] + '"foo\\ x\\x\\\\"\\\'"fo\'obar" \'don\'\\\'\'t\'': [r"foo\ x\x\'fo'obar", "don't"] + '"foo\\ x\\x\\\\"\\\'"fo\'obar" \'don\'\\\'\'t\' \\\\': [r"foo\ x\x\'fo'obar", r"don't", r'\'] +} +// vfmt on + +fn test_split() { + for input, output in test_data { + assert shellish.split(input)! == output, 'failed on input: ${input}' + } +} diff --git a/v.mod b/v.mod new file mode 100644 index 0000000..623b0af --- /dev/null +++ b/v.mod @@ -0,0 +1,7 @@ +Module { + name: 'shellish' + description: 'Shell-like syntax parser and text processor' + version: '0.1.0' + license: 'MIT' + dependencies: [] +}