summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAaron Ball <nullspoon@oper.io>2018-11-14 10:51:50 -0700
committerAaron Ball <nullspoon@oper.io>2018-11-14 11:58:49 -0700
commit0c3363d807f591c9054f5bdabe501bf6bf67ace1 (patch)
treed24b980b25d4a4055345eb451b2f30c13e001b6c
downloadsegment-file-0c3363d807f591c9054f5bdabe501bf6bf67ace1.tar.gz
segment-file-0c3363d807f591c9054f5bdabe501bf6bf67ace1.tar.xz
Initial commitHEADmaster
This includes the readme, license, and the bash implementation of the file segmenter.
-rw-r--r--.gitignore1
-rw-r--r--LICENSE26
-rw-r--r--README.adoc25
-rwxr-xr-xsegment-file.sh92
4 files changed, 144 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1fcb152
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+out
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..5875fc6
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,26 @@
+Copyright 2018 Aaron Ball <nullspoon@oper.io>
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors
+ may be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.adoc b/README.adoc
new file mode 100644
index 0000000..ceaec6b
--- /dev/null
+++ b/README.adoc
@@ -0,0 +1,25 @@
+README
+======
+
+Various implementations of a file segmenter. Note that all of these preserve
+the original file after segmentation.
+
+
+file-segment.sh
+---------------
+
+Bash implementation. Takes two arguments: **filename** and **segment count**.
+
+Usage
+~~~~~
+
+ file-segment.sh <filename.txt> <segment_count>
+
+
+Example
+~~~~~~~
+
+ file-segment.sh /var/log/messages 8
+
+This example will create an **out** directory containing 8 segments of the file
+/var/log/messages.
diff --git a/segment-file.sh b/segment-file.sh
new file mode 100755
index 0000000..50f70b8
--- /dev/null
+++ b/segment-file.sh
@@ -0,0 +1,92 @@
+#!/usr/bin/env bash
+
+# Bash substring parse cheatsheet
+# -------------------------------
+# %.* filename.blah.foo.txt -> filename.blah.foo
+# %%.* filename.blah.foo.txt -> filename
+# #*. filename.blah.foo.txt -> foo.blah.txt
+# ##*. filename.blah.foo.txt -> txt
+
+
+
+# split:
+# Splits the specified file into the specified number of segments. Output file
+# is created within the 'out' directory.
+#
+# @file Source file to split into multiple segments
+# @splitcount Number of segments to split @file into.
+split() {
+ local file="${1}"
+ local splitcount="${2}"
+
+ local lc="$(wc -l ${file} | cut -d ' ' -f 1)"
+ local lps=$(( lc / splitcount + 1 )) # Calculate lines per segment
+ local segment=0 # Current file segment int
+ local segline=0 # Current line in the current segment
+ local outfile='' # Current output file segment path
+
+ # Filename without extension (filename.txt -> filename)
+ local filebase="${file%.*}"
+ # File extension (filename.txt -> txt)
+ local fileext=".${file##*.}"
+
+ # If file extension is the same as the file base (with a preceeding dot), the
+ # source file has no extention. Set this variable to empty so the extention
+ # will be the segment number
+ [ "${fileext}" = ".${filebase}" ] && fileext=''
+
+ # Create output directory to keep things a bit more organized
+ [ ! -d out ] && mkdir out
+
+ # Ensure IFS is only separating on newlines
+ local oldifs="${IFS}"
+ export IFS=$'\n'
+
+
+ # Update the outfile path
+ outfile="out/${filebase}.${segment}${fileext}"
+ # Truncate the current output segment file. If we skip this and the file
+ # already exists, we will append to an existing file, corrupting output.
+ > "${outfile}"
+ printf "Writing to '%s' segment\n" "${outfile}"
+
+
+ # Iterrate over the source file, line by line.
+ for line in $(cat ${file}); do
+ if [ "${segline}" -eq "${lps}" ]; then
+ # Increment the file segment counter
+ segment=$(( segment + 1 ))
+ # Reset the segment line counter
+ segline=0
+
+ # Update the outfile path
+ outfile="out/${filebase}.${segment}${fileext}"
+ > "${outfile}"
+ printf "Writing to '%s' segment\n" "${outfile}"
+ fi
+
+ # Append line to segment file
+ printf -- "%s\n" "${line}" >> "${outfile}"
+
+ # Increment the segment line counter so we can ensure we don't write past
+ # the 'lines per segement' (lps) var.
+ segline=$(( segline + 1 ))
+ done
+
+ export IFS="${oldifs}"
+}
+
+
+main() {
+ local file="${1}"
+ local count="${2}"
+
+ # Ensure file and segment count are specified
+ [ -z "${file}" ] && printf "Filename required\n" && return 1
+ [ -z "${count}" ] && printf "Segment count required\n" && return 1
+
+ # Split (Croatia)!
+ split "${file}" "${count}"
+}
+
+main ${@}

Generated by cgit