diff options
author | Aaron Ball <nullspoon@oper.io> | 2018-11-14 10:51:50 -0700 |
---|---|---|
committer | Aaron Ball <nullspoon@oper.io> | 2018-11-14 11:58:49 -0700 |
commit | 0c3363d807f591c9054f5bdabe501bf6bf67ace1 (patch) | |
tree | d24b980b25d4a4055345eb451b2f30c13e001b6c | |
download | segment-file-0c3363d807f591c9054f5bdabe501bf6bf67ace1.tar.gz segment-file-0c3363d807f591c9054f5bdabe501bf6bf67ace1.tar.xz |
This includes the readme, license, and the bash implementation of the
file segmenter.
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | LICENSE | 26 | ||||
-rw-r--r-- | README.adoc | 25 | ||||
-rwxr-xr-x | segment-file.sh | 92 |
4 files changed, 144 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1fcb152 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +out @@ -0,0 +1,26 @@ +Copyright 2018 Aaron Ball <nullspoon@oper.io> + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.adoc b/README.adoc new file mode 100644 index 0000000..ceaec6b --- /dev/null +++ b/README.adoc @@ -0,0 +1,25 @@ +README +====== + +Various implementations of a file segmenter. Note that all of these preserve +the original file after segmentation. + + +file-segment.sh +--------------- + +Bash implementation. Takes two arguments: **filename** and **segment count**. + +Usage +~~~~~ + + file-segment.sh <filename.txt> <segment_count> + + +Example +~~~~~~~ + + file-segment.sh /var/log/messages 8 + +This example will create an **out** directory containing 8 segments of the file +/var/log/messages. diff --git a/segment-file.sh b/segment-file.sh new file mode 100755 index 0000000..50f70b8 --- /dev/null +++ b/segment-file.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash + +# Bash substring parse cheatsheet +# ------------------------------- +# %.* filename.blah.foo.txt -> filename.blah.foo +# %%.* filename.blah.foo.txt -> filename +# #*. filename.blah.foo.txt -> foo.blah.txt +# ##*. filename.blah.foo.txt -> txt + + + +# split: +# Splits the specified file into the specified number of segments. Output file +# is created within the 'out' directory. +# +# @file Source file to split into multiple segments +# @splitcount Number of segments to split @file into. +split() { + local file="${1}" + local splitcount="${2}" + + local lc="$(wc -l ${file} | cut -d ' ' -f 1)" + local lps=$(( lc / splitcount + 1 )) # Calculate lines per segment + local segment=0 # Current file segment int + local segline=0 # Current line in the current segment + local outfile='' # Current output file segment path + + # Filename without extension (filename.txt -> filename) + local filebase="${file%.*}" + # File extension (filename.txt -> txt) + local fileext=".${file##*.}" + + # If file extension is the same as the file base (with a preceeding dot), the + # source file has no extention. Set this variable to empty so the extention + # will be the segment number + [ "${fileext}" = ".${filebase}" ] && fileext='' + + # Create output directory to keep things a bit more organized + [ ! -d out ] && mkdir out + + # Ensure IFS is only separating on newlines + local oldifs="${IFS}" + export IFS=$'\n' + + + # Update the outfile path + outfile="out/${filebase}.${segment}${fileext}" + # Truncate the current output segment file. If we skip this and the file + # already exists, we will append to an existing file, corrupting output. + > "${outfile}" + printf "Writing to '%s' segment\n" "${outfile}" + + + # Iterrate over the source file, line by line. + for line in $(cat ${file}); do + if [ "${segline}" -eq "${lps}" ]; then + # Increment the file segment counter + segment=$(( segment + 1 )) + # Reset the segment line counter + segline=0 + + # Update the outfile path + outfile="out/${filebase}.${segment}${fileext}" + > "${outfile}" + printf "Writing to '%s' segment\n" "${outfile}" + fi + + # Append line to segment file + printf -- "%s\n" "${line}" >> "${outfile}" + + # Increment the segment line counter so we can ensure we don't write past + # the 'lines per segement' (lps) var. + segline=$(( segline + 1 )) + done + + export IFS="${oldifs}" +} + + +main() { + local file="${1}" + local count="${2}" + + # Ensure file and segment count are specified + [ -z "${file}" ] && printf "Filename required\n" && return 1 + [ -z "${count}" ] && printf "Segment count required\n" && return 1 + + # Split (Croatia)! + split "${file}" "${count}" +} + +main ${@} |