diff options
author | Aaron Ball <nullspoon@oper.io> | 2018-11-14 10:51:50 -0700 |
---|---|---|
committer | Aaron Ball <nullspoon@oper.io> | 2018-11-14 11:58:49 -0700 |
commit | 0c3363d807f591c9054f5bdabe501bf6bf67ace1 (patch) | |
tree | d24b980b25d4a4055345eb451b2f30c13e001b6c /segment-file.sh | |
download | segment-file-master.tar.gz segment-file-master.tar.xz |
This includes the readme, license, and the bash implementation of the
file segmenter.
Diffstat (limited to 'segment-file.sh')
-rwxr-xr-x | segment-file.sh | 92 |
1 files changed, 92 insertions, 0 deletions
diff --git a/segment-file.sh b/segment-file.sh new file mode 100755 index 0000000..50f70b8 --- /dev/null +++ b/segment-file.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash + +# Bash substring parse cheatsheet +# ------------------------------- +# %.* filename.blah.foo.txt -> filename.blah.foo +# %%.* filename.blah.foo.txt -> filename +# #*. filename.blah.foo.txt -> foo.blah.txt +# ##*. filename.blah.foo.txt -> txt + + + +# split: +# Splits the specified file into the specified number of segments. Output file +# is created within the 'out' directory. +# +# @file Source file to split into multiple segments +# @splitcount Number of segments to split @file into. +split() { + local file="${1}" + local splitcount="${2}" + + local lc="$(wc -l ${file} | cut -d ' ' -f 1)" + local lps=$(( lc / splitcount + 1 )) # Calculate lines per segment + local segment=0 # Current file segment int + local segline=0 # Current line in the current segment + local outfile='' # Current output file segment path + + # Filename without extension (filename.txt -> filename) + local filebase="${file%.*}" + # File extension (filename.txt -> txt) + local fileext=".${file##*.}" + + # If file extension is the same as the file base (with a preceeding dot), the + # source file has no extention. Set this variable to empty so the extention + # will be the segment number + [ "${fileext}" = ".${filebase}" ] && fileext='' + + # Create output directory to keep things a bit more organized + [ ! -d out ] && mkdir out + + # Ensure IFS is only separating on newlines + local oldifs="${IFS}" + export IFS=$'\n' + + + # Update the outfile path + outfile="out/${filebase}.${segment}${fileext}" + # Truncate the current output segment file. If we skip this and the file + # already exists, we will append to an existing file, corrupting output. + > "${outfile}" + printf "Writing to '%s' segment\n" "${outfile}" + + + # Iterrate over the source file, line by line. + for line in $(cat ${file}); do + if [ "${segline}" -eq "${lps}" ]; then + # Increment the file segment counter + segment=$(( segment + 1 )) + # Reset the segment line counter + segline=0 + + # Update the outfile path + outfile="out/${filebase}.${segment}${fileext}" + > "${outfile}" + printf "Writing to '%s' segment\n" "${outfile}" + fi + + # Append line to segment file + printf -- "%s\n" "${line}" >> "${outfile}" + + # Increment the segment line counter so we can ensure we don't write past + # the 'lines per segement' (lps) var. + segline=$(( segline + 1 )) + done + + export IFS="${oldifs}" +} + + +main() { + local file="${1}" + local count="${2}" + + # Ensure file and segment count are specified + [ -z "${file}" ] && printf "Filename required\n" && return 1 + [ -z "${count}" ] && printf "Segment count required\n" && return 1 + + # Split (Croatia)! + split "${file}" "${count}" +} + +main ${@} |