This commit is contained in:
ge
2022-05-14 00:20:02 +03:00
commit efd493bc3f
8 changed files with 1409 additions and 0 deletions

128
src/lib/parse_uri.sh Normal file
View File

@ -0,0 +1,128 @@
#! /usr/bin/env bash
# parse_uri.sh - URI parser function (mostly compatible with RFC3986).
# Copyright (c) 2022 ge <https://nixhacks.net/>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
parse_uri() {
# Universal URI parser. There is fantastic shitcode, but it works.
# Refer RFC3986: https://datatracker.ietf.org/doc/html/rfc3986
# URL-encoded passwords supported.
#
# Usage: parse_uri URI
# Return variables:
# scheme
# username
# password
# hostname
# port
# path
# query
# fragment
local uri="$1"
# * Get scheme
scheme="$(<<< "$uri" cut -d ':' -f 1)"
# * Get authority component
# The absence of two slashes (//) after the scheme means that the path
# component follows, not the authority.
if [[ "$uri" =~ ${scheme}:[^//] ]]; then
local authority=
else
# Fix 'scheme://host' test failure
# Correctly detect authority component if empty path is set
if <<< "$uri" sed 's/\/\///g' | grep / &>/dev/null; then
# URI contain non empty path component
local authority="$(<<< "$uri" grep -Po '(?<=//)(.[^/]+)(?=/)')"
else
# If URI don/t contain path starts with "/"
local authority="$(<<< "$uri" grep -Po '(?<=//)(.[^/]+)$')"
fi
fi
# * Get host and userinfo components if authority is set.
if [[ "$authority" =~ @ ]]; then
local userinfo="$(<<< "$authority" cut -d '@' -f 1)"
local host="$(<<< "$authority" cut -d '@' -f 2)"
else
local userinfo=
local host="$authority"
fi
# * Get fragment
fragment="$(<<< "$uri" grep -Po '(?<=#)(.*)')" || true
# * Get query
query="$(<<< "$uri" grep -Po '(?<=\?)(.*)' | sed "s/#${fragment}//g")" || true
# * Get path
if [[ "$uri" =~ ^${scheme}:// ]]; then
path="$(<<< "$uri" sed "s/${scheme}:\/\/${authority//\[/\\[}//g")"
path="$(<<< "$path" sed "s/\?${query}//g;s/#${fragment}//g")"
# Dirty hack for 'schema://host:~/path'
if [[ "$uri" =~ :~/ ]]; then
path="~${path}"
fi
else
# For non authority component URI
path="$(<<< "$uri" sed "s/${scheme}://g")"
fi
# * Get hostname and port
local ipv6_hostname="$(<<< "$host" grep -Po '\[(.*)\]')"
if [ "$ipv6_hostname" ]; then
hostname="$ipv6_hostname"
port="$(<<< "$host" awk -F ']:' '{print $2}')"
elif [[ "$host" =~ : ]]; then
hostname="$(<<< "$host" cut -d ':' -f 1)"
port="$(<<< "$host" cut -d ':' -f 2)"
# Dirty hack for 'schema://host:~/path'
if [[ "$port" == '~' ]]; then
port=
fi
else
hostname="$host"
port=
fi
# * Get username and password
if [[ "$userinfo" =~ : ]]; then
username="$(<<< "$userinfo" cut -d ':' -f 1)"
password="$(<<< "$userinfo" cut -d ':' -f 2)"
else
username="$userinfo"
password=
fi
# Debug
# echo "SCHEME=<$scheme>"
# echo "AUTHORITY=<$authority>"
# echo "USERINFO=<$userinfo>"
# echo "USERNAME=<$username>"
# echo "PASSWORD=<$password>"
# echo "HOST=<$host>"
# echo "HOSTNAME=<$hostname>"
# echo "PORT=<$port>"
# echo "PATH=<$path>"
# echo "QUERY=<$query>"
# echo "FRAGMENT=<$fragment>"
# Decode URL-encoded password
if [ "$password" ]; then
password="$(<<< "$password" sed 's/+/ /g;s/%/\\\\x/g' | xargs echo -e)"
fi
}