summaryrefslogtreecommitdiff
path: root/segment-file.sh
diff options
context:
space:
mode:
authorAaron Ball <nullspoon@oper.io>2018-11-14 10:51:50 -0700
committerAaron Ball <nullspoon@oper.io>2018-11-14 11:58:49 -0700
commit0c3363d807f591c9054f5bdabe501bf6bf67ace1 (patch)
treed24b980b25d4a4055345eb451b2f30c13e001b6c /segment-file.sh
downloadsegment-file-master.tar.gz
segment-file-master.tar.xz
Initial commitHEADmaster
This includes the readme, license, and the bash implementation of the file segmenter.
Diffstat (limited to 'segment-file.sh')
-rwxr-xr-xsegment-file.sh92
1 files changed, 92 insertions, 0 deletions
diff --git a/segment-file.sh b/segment-file.sh
new file mode 100755
index 0000000..50f70b8
--- /dev/null
+++ b/segment-file.sh
@@ -0,0 +1,92 @@
+#!/usr/bin/env bash
+
+# Bash substring parse cheatsheet
+# -------------------------------
+# %.* filename.blah.foo.txt -> filename.blah.foo
+# %%.* filename.blah.foo.txt -> filename
+# #*. filename.blah.foo.txt -> foo.blah.txt
+# ##*. filename.blah.foo.txt -> txt
+
+
+
+# split:
+# Splits the specified file into the specified number of segments. Output file
+# is created within the 'out' directory.
+#
+# @file Source file to split into multiple segments
+# @splitcount Number of segments to split @file into.
+split() {
+ local file="${1}"
+ local splitcount="${2}"
+
+ local lc="$(wc -l ${file} | cut -d ' ' -f 1)"
+ local lps=$(( lc / splitcount + 1 )) # Calculate lines per segment
+ local segment=0 # Current file segment int
+ local segline=0 # Current line in the current segment
+ local outfile='' # Current output file segment path
+
+ # Filename without extension (filename.txt -> filename)
+ local filebase="${file%.*}"
+ # File extension (filename.txt -> txt)
+ local fileext=".${file##*.}"
+
+ # If file extension is the same as the file base (with a preceeding dot), the
+ # source file has no extention. Set this variable to empty so the extention
+ # will be the segment number
+ [ "${fileext}" = ".${filebase}" ] && fileext=''
+
+ # Create output directory to keep things a bit more organized
+ [ ! -d out ] && mkdir out
+
+ # Ensure IFS is only separating on newlines
+ local oldifs="${IFS}"
+ export IFS=$'\n'
+
+
+ # Update the outfile path
+ outfile="out/${filebase}.${segment}${fileext}"
+ # Truncate the current output segment file. If we skip this and the file
+ # already exists, we will append to an existing file, corrupting output.
+ > "${outfile}"
+ printf "Writing to '%s' segment\n" "${outfile}"
+
+
+ # Iterrate over the source file, line by line.
+ for line in $(cat ${file}); do
+ if [ "${segline}" -eq "${lps}" ]; then
+ # Increment the file segment counter
+ segment=$(( segment + 1 ))
+ # Reset the segment line counter
+ segline=0
+
+ # Update the outfile path
+ outfile="out/${filebase}.${segment}${fileext}"
+ > "${outfile}"
+ printf "Writing to '%s' segment\n" "${outfile}"
+ fi
+
+ # Append line to segment file
+ printf -- "%s\n" "${line}" >> "${outfile}"
+
+ # Increment the segment line counter so we can ensure we don't write past
+ # the 'lines per segement' (lps) var.
+ segline=$(( segline + 1 ))
+ done
+
+ export IFS="${oldifs}"
+}
+
+
+main() {
+ local file="${1}"
+ local count="${2}"
+
+ # Ensure file and segment count are specified
+ [ -z "${file}" ] && printf "Filename required\n" && return 1
+ [ -z "${count}" ] && printf "Segment count required\n" && return 1
+
+ # Split (Croatia)!
+ split "${file}" "${count}"
+}
+
+main ${@}

Generated by cgit