From 7d81684acbe3cb066ad02b2331a80314b565bfa2 Mon Sep 17 00:00:00 2001 From: ge Date: Tue, 24 Dec 2024 08:09:46 +0300 Subject: [PATCH] Add missing exa-prefixed units, from_string(), to_string(), update example --- README.md | 2 +- cmd/dataunit.v | 54 ++---------- src/dataunits.v | 182 +++++++++++++++++++++++++++++++++++++++-- tests/convert_test.v | 12 --- tests/dataunits_test.v | 31 +++++++ 5 files changed, 215 insertions(+), 66 deletions(-) delete mode 100644 tests/convert_test.v create mode 100644 tests/dataunits_test.v diff --git a/README.md b/README.md index 1111cda..17b3141 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -## Data units converter +## The data size units converter Example: diff --git a/cmd/dataunit.v b/cmd/dataunit.v index 7c8387e..8770f3a 100644 --- a/cmd/dataunit.v +++ b/cmd/dataunit.v @@ -33,48 +33,6 @@ import os import flag import dataunits -const units = { - 'bit': dataunits.bit - 'nibble': dataunits.nibble - 'bytes': dataunits.bytes - 'kb': dataunits.kb - 'mb': dataunits.mb - 'gb': dataunits.gb - 'tb': dataunits.tb - 'pb': dataunits.pb - 'zb': dataunits.zb - 'yb': dataunits.yb - 'kib': dataunits.kib - 'mib': dataunits.mib - 'gib': dataunits.gib - 'tib': dataunits.tib - 'pib': dataunits.pib - 'zib': dataunits.zib - 'yib': dataunits.yib - 'kbit': dataunits.kbit - 'mbit': dataunits.mbit - 'gbit': dataunits.gbit - 'tbit': dataunits.tbit - 'pbit': dataunits.pbit - 'zbit': dataunits.zbit - 'ybit': dataunits.ybit - 'kibit': dataunits.kibit - 'mibit': dataunits.mibit - 'gibit': dataunits.gibit - 'tibit': dataunits.tibit - 'pibit': dataunits.pibit - 'zibit': dataunits.zibit - 'yibit': dataunits.yibit -} - -fn units_str() string { - mut str := []string{} - for key, _ in units { - str << key - } - return str.join(', ') -} - @[name: 'dataunit'] struct FlagConfig { help bool @@ -96,7 +54,7 @@ fn main() { flags.value = no_matches[0].f64() } if flags.help { - println('convert the value between *from* and *to* units.') + println('convert the value between data size units.') println('usage: dataunit -f -t ') println('options:') println(' -help print this help message and exit') @@ -112,15 +70,15 @@ fn main() { eprintln('no value passed, see -help for info') exit(2) } - unit_from := units[flags.from.to_lower()] or { - eprintln('invalid unit ${flags.from}, valid ones: ${units_str()}') + src := dataunits.from_string(flags.from) or { + eprintln('invalid source unit: ${err}') exit(1) } - unit_to := units[flags.to.to_lower()] or { - eprintln('invalid unit ${flags.to}, valid ones: ${units_str()}') + dst := dataunits.from_string(flags.to) or { + eprintln('invalid destination unit: ${err}') exit(1) } - result := '${dataunits.convert(flags.value, unit_from, unit_to):.20f}' + result := '${dataunits.convert(flags.value, src, dst):.20f}' splitted := result.split('.') if splitted[1].contains_only('0') { println(splitted[0]) diff --git a/src/dataunits.v b/src/dataunits.v index ddee423..ec7c804 100644 --- a/src/dataunits.v +++ b/src/dataunits.v @@ -25,9 +25,11 @@ module dataunits +import maps + pub type DataSize = f64 -pub fn (d DataSize) bits() f64 { +pub fn (d DataSize) bit() f64 { return f64(d) } @@ -59,6 +61,10 @@ pub fn (d DataSize) pb() f64 { return f64(d / pb) } +pub fn (d DataSize) eb() f64 { + return f64(d / eb) +} + pub fn (d DataSize) zb() f64 { return f64(d / zb) } @@ -87,6 +93,10 @@ pub fn (d DataSize) pib() f64 { return f64(d / pib) } +pub fn (d DataSize) eib() f64 { + return f64(d / eib) +} + pub fn (d DataSize) zib() f64 { return f64(d / zib) } @@ -115,6 +125,10 @@ pub fn (d DataSize) pbit() f64 { return f64(d / pbit) } +pub fn (d DataSize) ebit() f64 { + return f64(d / ebit) +} + pub fn (d DataSize) zbit() f64 { return f64(d / zbit) } @@ -143,6 +157,10 @@ pub fn (d DataSize) pibit() f64 { return f64(d / pibit) } +pub fn (d DataSize) eibit() f64 { + return f64(d / eibit) +} + pub fn (d DataSize) zibit() f64 { return f64(d / zibit) } @@ -160,7 +178,8 @@ pub const mb = kb * 1000 pub const gb = mb * 1000 pub const tb = gb * 1000 pub const pb = tb * 1000 -pub const zb = tb * 1000 +pub const eb = pb * 1000 +pub const zb = eb * 1000 pub const yb = zb * 1000 pub const kib = bytes * 1024 @@ -168,7 +187,8 @@ pub const mib = kib * 1024 pub const gib = mib * 1024 pub const tib = gib * 1024 pub const pib = tib * 1024 -pub const zib = tib * 1024 +pub const eib = pib * 1024 +pub const zib = eib * 1024 pub const yib = zib * 1024 pub const kbit = bit * 1000 @@ -176,7 +196,8 @@ pub const mbit = kbit * 1000 pub const gbit = mbit * 1000 pub const tbit = gbit * 1000 pub const pbit = tbit * 1000 -pub const zbit = pbit * 1000 +pub const ebit = pbit * 1000 +pub const zbit = ebit * 1000 pub const ybit = zbit * 1000 pub const kibit = bit * 1024 @@ -184,9 +205,69 @@ pub const mibit = kibit * 1024 pub const gibit = mibit * 1024 pub const tibit = gibit * 1024 pub const pibit = tibit * 1024 -pub const zibit = tibit * 1024 +pub const eibit = pibit * 1024 +pub const zibit = eibit * 1024 pub const yibit = zibit * 1024 +const units = { + 'bit': bit + 'nibble': nibble + 'bytes': bytes + 'kB': kb + 'MB': mb + 'GB': gb + 'TB': tb + 'PB': pb + 'EB': eb + 'ZB': zb + 'YB': yb + 'KiB': kib + 'MiB': mib + 'GiB': gib + 'TiB': tib + 'PiB': pib + 'EiB': eib + 'ZiB': zib + 'YiB': yib + 'kbit': kbit + 'Mbit': mbit + 'Gbit': gbit + 'Tbit': tbit + 'Pbit': pbit + 'Ebit': ebit + 'Zbit': zbit + 'Ybit': ybit + 'Kibit': kibit + 'Mibit': mibit + 'Gibit': gibit + 'Tibit': tibit + 'Pibit': pibit + 'Eibit': eibit + 'Zibit': zibit + 'Yibit': yibit +} + +pub const prefixes = { + // Metric size (1000^N) + 'kilo': 'k' + 'mega': 'M' + 'giga': 'G' + 'tera': 'T' + 'peta': 'P' + 'exa': 'E' + 'zetta': 'Z' + 'yotta': 'Y' + // Binary size (1024^N) + 'kibi': 'Ki' + 'mebi': 'Mi' + 'gibi': 'Gi' + 'tebi': 'Ti' + 'pebi': 'Pi' + 'exbi': 'Ei' + 'zebi': 'Zi' + 'yobi': 'Yi' +} + // convert returns the value converted between the *from* and *to* units. // Example: // ``` @@ -198,3 +279,94 @@ pub fn convert(value f64, from DataSize, to DataSize) f64 { } return f64(value * from / to) } + +// from_string parses input and returns the actual DataSize. +// Note: Case insensitivity makes unit abbreviations such as `Mb` (megabit) and `MB` (megabyte) +// ambiguous. Use `bit` suffix for bit units. The `b` suffix will be accepted as byte unit. +// Example: +// ``` +// assert dataunits.from_string('GiB')! == dataunits.gib +// assert dataunits.from_string('M')! == dataunits.mib +// assert dataunits.from_string('M', bits: true, metric: true)! == dataunits.mbit +// assert dataunits.from_string('ZeTtAbYtEs', ci: true)! == dataunits.zb +// ``` +pub fn from_string(input string, params ParseParams) !DataSize { + if !input.is_pure_ascii() { + return error('${input} non-ASCII characters is not allowed in data size unit') + } + unit := parse_unit_str(input, params) + if params.ci { + for key, value in units { + if key.to_lower_ascii() == unit { + return value + } + } + } + return units[unit] or { error('${input} is not a valid data size unit') } +} + +fn parse_unit_str(input string, params ParseParams) string { + mut unit := '' + match true { + input.to_lower_ascii() in ['byte', 'bytes'] { + return 'bytes' + } + input.to_lower_ascii() in ['bit', 'bits'] { + return 'bit' + } + input.len == 1 { + if params.metric { + unit = input + } else { + unit = input.to_upper_ascii() + 'i' + } + if params.bits { + unit += 'bit' + } else { + unit += 'B' + } + return unit + } + input.len == 2 && input[1] == u8(`b`) && params.ci == false { + if input[0] != u8(`k`) { + return input[..1] + 'bit' + } + return input[..1].to_upper_ascii() + 'bit' + } + else { + unit = input + } + } + if params.ci { + unit = unit.to_lower_ascii() + } + if unit.len == 5 && unit.ends_with('ibit') { + // prevent Gibit --> Git transform + return unit + } + unit = unit.replace_each(maps.flat_map[string, string, string](prefixes, |k, v| [ + k, + v, + ])) + unit = unit.replace_each(['bytes', 'B', 'byte', 'B']).replace_once('bits', 'bit') + return unit +} + +@[params] +pub struct ParseParams { +pub: + ci bool // if true parse string in case insensitive mode + bits bool // if true interpret single letter abbreviations as bit, otherwise as byte + metric bool // if ture apply single letter as metric prefix (power of ten), otherwise as binary +} + +// to_string returns a string representation of data size unit in short form +// e.g. kB, Mbit, GiB, etc. +pub fn to_string(input DataSize) !string { + for key, value in units { + if value == input { + return key + } + } + return error('invalid input data size unit') +} diff --git a/tests/convert_test.v b/tests/convert_test.v deleted file mode 100644 index 0f666e1..0000000 --- a/tests/convert_test.v +++ /dev/null @@ -1,12 +0,0 @@ -import dataunits - -fn test_convert() { - assert (dataunits.nibble * 4).bytes() == 2 - assert (dataunits.bit * 8).bits() == 8 - assert (dataunits.bit * 8).bytes() == 1 - assert (dataunits.gib * 10).mib() == 10240 - assert (dataunits.gib * 5000).bytes() == i64(5368709120000) - assert (dataunits.mbit * 500).kb() == 62500 - assert dataunits.convert(500, dataunits.mbit, dataunits.kb) == 62500 - assert dataunits.DataSize(4000 * dataunits.gib).bytes() == f64(4294967296000) -} diff --git a/tests/dataunits_test.v b/tests/dataunits_test.v new file mode 100644 index 0000000..8d18710 --- /dev/null +++ b/tests/dataunits_test.v @@ -0,0 +1,31 @@ +import dataunits + +fn test_convert() { + assert (dataunits.nibble * 4).bytes() == 2 + assert (dataunits.bit * 8).bit() == 8 + assert (dataunits.bit * 8).bytes() == 1 + assert (dataunits.gib * 10).mib() == 10240 + assert (dataunits.gib * 5000).bytes() == i64(5368709120000) + assert (dataunits.mbit * 500).kb() == 62500 + assert dataunits.convert(500, dataunits.mbit, dataunits.kb) == 62500 + assert dataunits.DataSize(4000 * dataunits.gib).bytes() == f64(4294967296000) +} + +fn test_from_string() { + assert dataunits.from_string('GiB')! == dataunits.gib + assert dataunits.from_string('M')! == dataunits.mib + assert dataunits.from_string('m', ci: true)! == dataunits.mib + assert dataunits.from_string('M', bits: true, metric: true)! == dataunits.mbit + assert dataunits.from_string('ZeTtAbYtEs', ci: true)! == dataunits.zb + assert dataunits.from_string('bytes')! == dataunits.bytes + assert dataunits.from_string('byte')! == dataunits.bytes + assert dataunits.from_string('megabytes')! == dataunits.mb + assert dataunits.from_string('megabyte')! == dataunits.mb + assert dataunits.from_string('exbibit')! == dataunits.eibit + assert dataunits.from_string('Mb')! == dataunits.mbit + assert dataunits.from_string('MB')! == dataunits.mb + assert dataunits.from_string('Mb', ci: true)! == dataunits.mb + assert dataunits.from_string('Gibit')! == dataunits.gibit + assert dataunits.from_string('gibit', ci: true)! == dataunits.gibit + assert dataunits.from_string('Gib', ci: true)! == dataunits.gib +}