commit a5f7006065d2dfda92dc99c62209422604a97284
Author: ge <gechandev@gmail.com>
Date:   Sat Jul 12 03:07:50 2025 +0300

    init

diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..01072ca
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,8 @@
+[*]
+charset = utf-8
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+
+[*.v]
+indent_style = tab
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..9a98968
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,8 @@
+* text=auto eol=lf
+*.bat eol=crlf
+
+*.v linguist-language=V
+*.vv linguist-language=V
+*.vsh linguist-language=V
+v.mod linguist-language=V
+.vdocignore linguist-language=ignore
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..88c0fdd
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,24 @@
+# Binaries for programs and plugins
+main
+code
+*.exe
+*.exe~
+*.so
+*.dylib
+*.dll
+
+# Ignore binary output folders
+bin/
+
+# Ignore common editor/system specific metadata
+.DS_Store
+.idea/
+.vscode/
+*.iml
+
+# ENV
+.env
+
+# vweb and database
+*.db
+*.js
diff --git a/shell.v b/shell.v
new file mode 100644
index 0000000..6f9c881
--- /dev/null
+++ b/shell.v
@@ -0,0 +1,329 @@
+module shell
+
+import strings.textscanner
+
+pub const safe_chars = '%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz'
+pub const word_chars = 'abcdfeghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
+pub const unicode_word_chars = 'ßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ'
+pub const extra_word_chars = '~-./*?='
+pub const punct_chars = '();<>|&'
+
+// quote returns a shell-escaped version of string `s`.
+// Example:
+// ```
+// assert shell.quote("d'arc") == 'd\'"\'"\'arc\''
+// ```
+pub fn quote(s string) string {
+	if s == '' {
+		return "''"
+	}
+	if s.contains_only(safe_chars) {
+		return s
+	}
+	return "'" + s.replace("'", '\'"\'"\'') + "'"
+}
+
+// join joins `s` array members into a shell-escaped string.
+// Example:
+// ```
+// assert shell.join(['sh', '-c', 'hostname -f']) == "sh -c 'hostname -f'"
+// ```
+pub fn join(s []string) string {
+	mut quoted_args := []string{}
+	for arg in s {
+		quoted_args << quote(arg)
+	}
+	return quoted_args.join(' ')
+}
+
+@[params]
+pub struct SplitParams {
+pub:
+	posix    bool = true
+	comments bool
+}
+
+// split splits the string `s` into array using shell-like syntax.
+// Example:
+// ```
+// assert shell.split("sh -c 'hostname -f'") == ['sh', '-c', 'hostname -f']
+// ```
+pub fn split(s string, params SplitParams) []string {
+	mut parts := []string{}
+	mut lexer := new(s,
+		posix:    params.posix
+		comments: params.comments
+	)
+	for token in lexer {
+		parts << token
+	}
+	return parts
+}
+
+@[params]
+pub struct LexerParams {
+pub:
+	posix       bool
+	comments    bool = true
+	punctuation bool
+}
+
+// new creates new Lexer instance. See LexerParams docstrings for info.
+// Instantiate Lexer directly if you need more custom lexer setup.
+pub fn new(input string, params LexerParams) Lexer {
+	mut words := word_chars
+	if params.posix {
+		words += unicode_word_chars
+	}
+	if params.punctuation {
+		words += extra_word_chars
+	}
+	return Lexer{
+		scanner:       textscanner.new(input)
+		word_chars:    words
+		punct_chars:   if params.punctuation { punct_chars } else { '' }
+		comment_chars: if params.comments { '#' } else { '' }
+		// `whitespace_split` must be true if `punctuation` is false, otherwise
+		// lexer stucks into infinite loop!
+		whitespace_split: if params.punctuation { false } else { true }
+		posix_mode:       params.posix
+	}
+}
+
+pub struct Lexer {
+pub mut:
+	scanner textscanner.TextScanner
+pub:
+	word_chars     string = word_chars
+	punct_chars    string = punct_chars
+	comment_chars  string = '#'
+	quotes         string = '\'"'
+	escape         string = '\\'
+	escaped_quotes string = '"'
+	posix_mode     bool
+	// whitespace_split must be true if puct_chars is empty to prevent
+	// lexer stucking into infinite loop! The parser sucks, I know.
+	// Use `new` function to cretae Lexer instance to ensure the
+	// correct whitespace_split value.
+	whitespace_split bool
+mut:
+	// This fields are used internally to store the current parser state.
+	lineno          int    = 1
+	state           string = ' '
+	token           string
+	push_back       []string
+	push_back_chars []string
+}
+
+// next returns parsed tokens until end of input string.
+pub fn (mut x Lexer) next() ?string {
+	if x.push_back.len != 0 {
+		token := x.push_back.first()
+		x.push_back.drop(1)
+		return token
+	}
+	if x.scanner.pos != x.scanner.ilen {
+		return x.token()
+	}
+	return none
+}
+
+// token parses and returns one token from input string regarding to current scanner state.
+fn (mut x Lexer) token() ?string {
+	// TODO: this function must be fixed and completely rewritten
+	mut quoted := false
+	mut escaped_state := ' '
+	for {
+		mut nextchar := x.scanner.peek_u8()
+		if x.punct_chars != '' && x.push_back_chars.len != 0 {
+			nextchar = x.push_back_chars[x.push_back_chars.len - 1..][0].u8()
+		}
+		print_dbg('state=<${x.state}> I see character <${nextchar.ascii_str()}>')
+		if nextchar == `\n` {
+			x.lineno++
+		}
+		match true {
+			x.state == '' {
+				x.token = ''
+				break
+			}
+			x.state == ' ' {
+				match true {
+					nextchar == 0 {
+						// There is `if not nextcahr` check, that means 'EOF reached'
+						x.state = ''
+						break
+					}
+					nextchar.is_space() {
+						print_dbg('I see whitespace in whitespace state')
+						if x.token != '' || (x.posix_mode && quoted) {
+							break
+						}
+						x.scanner.skip()
+						continue
+					}
+					x.comment_chars.contains_u8(nextchar) {
+						// We need to skip all commented characters until \n found.
+						x.scanner.goto_end()
+						x.lineno++
+					}
+					x.posix_mode && x.escape.contains_u8(nextchar) {
+						escaped_state = 'a'
+						x.state = nextchar.ascii_str()
+					}
+					x.word_chars.contains_u8(nextchar) {
+						x.token = nextchar.ascii_str()
+						x.state = 'a'
+					}
+					x.punct_chars.contains_u8(nextchar) {
+						x.token = nextchar.ascii_str()
+						x.state = 'c'
+					}
+					x.quotes.contains_u8(nextchar) {
+						if !x.posix_mode {
+							x.token = nextchar.ascii_str()
+						}
+						x.state = nextchar.ascii_str()
+					}
+					x.whitespace_split == true {
+						x.token = nextchar.ascii_str()
+						x.state = 'a'
+					}
+					else {
+						x.token = nextchar.ascii_str()
+						if x.token != '' || (x.posix_mode && quoted) {
+							break
+						} else {
+							continue
+						}
+					}
+				}
+			}
+			x.quotes.contains(x.state) {
+				quoted = true
+				if nextchar == 0 {
+					// There is `if not nextcahr` check, that means 'EOF reached'
+					panic('found non-terminated quote')
+				}
+				match true {
+					nextchar.ascii_str() == x.state {
+						if !x.posix_mode {
+							x.token += nextchar.ascii_str()
+							x.state = ' '
+						} else {
+							x.state = 'a'
+						}
+					}
+					x.posix_mode && x.escape.contains_u8(nextchar)
+						&& x.escaped_quotes.contains(x.state) {
+						escaped_state = x.state
+						x.state = nextchar.ascii_str()
+					}
+					else {
+						x.token += nextchar.ascii_str()
+					}
+				}
+			}
+			x.escape.contains(x.state) {
+				if nextchar == 0 {
+					// There is `if not nextcahr` check, that means 'EOF reached'
+					panic('no escaped character found')
+				}
+				if x.quotes.contains(escaped_state) && nextchar.ascii_str() != x.state
+					&& nextchar.ascii_str() != escaped_state {
+					x.token += x.state
+				}
+				x.token += nextchar.ascii_str()
+				x.state = escaped_state
+			}
+			x.state in ['a', 'c'] {
+				match true {
+					nextchar == 0 {
+						x.state = '' // self.state = None
+						break
+					}
+					nextchar.is_space() {
+						print_dbg('I see whitespace in word state')
+						x.state = ' '
+						if x.token != '' || (x.posix_mode && quoted) {
+							break
+						} else {
+							continue
+						}
+					}
+					x.comment_chars.contains_u8(nextchar) {
+						// We need to skip all commented characters until \n found.
+						// for {
+						// 	if x.scanner.peek_u8() == `\n` {
+						// 		x.scanner.skip()
+						// 		break
+						// 	}
+						// 	x.scanner.skip()
+						// }
+						// x.lineno++
+						if x.posix_mode {
+							x.state = ' '
+							if x.token != '' || (x.posix_mode && quoted) {
+								break
+							} else {
+								continue
+							}
+						}
+					}
+					x.state == 'c' {
+						if x.punct_chars.contains_u8(nextchar) {
+							x.token += nextchar.ascii_str()
+						} else {
+							if !nextchar.is_space() {
+								x.push_back_chars << nextchar.ascii_str()
+							}
+							x.state = ' '
+							break
+						}
+					}
+					x.posix_mode && x.quotes.contains_u8(nextchar) {
+						x.state = nextchar.ascii_str()
+					}
+					x.posix_mode && x.escape.contains_u8(nextchar) {
+						escaped_state = 'a'
+						x.state = nextchar.ascii_str()
+					}
+					(x.word_chars.contains_u8(nextchar)
+						|| x.quotes.contains_u8(nextchar))
+						|| (x.whitespace_split && x.punct_chars.contains_u8(nextchar)) {
+						x.token += nextchar.ascii_str()
+					}
+					else {
+						if x.punct_chars != '' {
+							x.push_back_chars << nextchar.ascii_str()
+						} else {
+							x.push_back.prepend(nextchar.ascii_str())
+						}
+						print_dbg('I see punctuation char in word state')
+						x.state = ' '
+						if x.token != '' || (x.posix_mode && quoted) {
+							break
+						} else {
+							continue
+						}
+					}
+				}
+			}
+			else {}
+		}
+		x.scanner.next()
+	}
+	result := x.token
+	x.token = ''
+	if x.posix_mode && !quoted && result == '' {
+		return none
+	}
+	print_dbg('I got token <${result}>')
+	return result
+}
+
+fn print_dbg(s string) {
+	$if trace_shell_lexer ? {
+		eprintln('shell lexer: ' + s)
+	}
+}
diff --git a/shell_test.v b/shell_test.v
new file mode 100644
index 0000000..f84b9c8
--- /dev/null
+++ b/shell_test.v
@@ -0,0 +1,27 @@
+import shell
+
+// TODO: pass all tests from https://github.com/python/cpython/blob/main/Lib/test/test_shlex.py
+
+fn test_quote() {
+	assert shell.quote("janna d'arc") == '\'janna d\'"\'"\'arc\''
+}
+
+fn test_join() {
+	assert shell.join(['sh', '-c', 'hostname -f']) == "sh -c 'hostname -f'"
+}
+
+fn test_split() {
+	assert shell.split("sh -c 'hostname -f'") == ['sh', '-c', 'hostname -f']
+	assert shell.split('sh -c hostname') == ['sh', '-c', 'hostname']
+	assert shell.split('hostname -f # some comment') == ['hostname', '-f', '#', 'some', 'comment']
+	assert shell.split('hostname -f # some comment', comments: true) == ['hostname', '-f']
+	assert shell.split('grep -rn "#"') == ['grep', '-rn', '#']
+	assert shell.split('grep -rn "#"', comments: true) == ['grep', '-rn', '#']
+
+	// FIXME: both assertions fails
+	// s := 'grep -rn hello # search hello
+	// awk --help
+	// '.trim_indent()
+	// assert shell.split(s) == ['grep', '-rn', 'hello', '#', 'search', 'hello', 'awk', '--help']
+	// assert shell.split(s, comments: true) == ['grep', '-rn', 'hello', 'awk', '--help']
+}
diff --git a/v.mod b/v.mod
new file mode 100644
index 0000000..eb6fc2e
--- /dev/null
+++ b/v.mod
@@ -0,0 +1,7 @@
+Module {
+	name: 'shell'
+	description: 'Shell lexer'
+	version: '0.0.0'
+	license: 'MIT'
+	dependencies: []
+}