summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAaron Ball <nullspoon@oper.io>2019-03-09 20:27:01 -0700
committerAaron Ball <nullspoon@oper.io>2019-03-09 20:27:01 -0700
commit67e1f43a0fd98d27c6d32c7def9a15ccc223aad4 (patch)
tree61569757b69602923b7376b3e9274a3af773d871
parent93a26f6394546f53b9531cead1713311de0dda78 (diff)
parentc7033290ac21023c74f59c6f02a918cf6b2b36a8 (diff)
downloadupwgen-67e1f43a0fd98d27c6d32c7def9a15ccc223aad4.tar.gz
upwgen-67e1f43a0fd98d27c6d32c7def9a15ccc223aad4.tar.xz
Merge branch 'i18n-breakout'
-rw-r--r--Makefile4
-rw-r--r--src/i18n_cat.c282
-rw-r--r--src/i18n_cat.h47
-rw-r--r--src/main.c134
4 files changed, 369 insertions, 98 deletions
diff --git a/Makefile b/Makefile
index 5ff1e25..731b5b0 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,9 @@ out = upwgen
PREFIX = /usr/bin
all:
- $(CC) $(CCOPTS) src/main.c -o $(out)
+ @if [ ! -d obj ]; then mkdir obj; fi
+ $(CC) $(CCOPTS) -c src/i18n_cat.c -o obj/i18n_cat.o
+ $(CC) $(CCOPTS) src/main.c obj/*.o -o $(out)
install:
mkdir -p $(DESTDIR)/$(PREFIX)
diff --git a/src/i18n_cat.c b/src/i18n_cat.c
new file mode 100644
index 0000000..18712ac
--- /dev/null
+++ b/src/i18n_cat.c
@@ -0,0 +1,282 @@
+/**
+ * upwgen generates random internationalized passwords
+ * Copyright (C) 2019 Aaron Ball <nullspoon@oper.io>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "i18n_cat.h"
+
+
+/**
+ * intrcat:
+ * Integer range concatenate. Appends the specified integer range to an int
+ * array.
+ *
+ * @arr Array to cat range of ints to
+ * @rstart Range start integer
+ * @rend Range end integer
+ *
+ * @return Number of integers appended to array
+ */
+unsigned int intrcat(unsigned int* arr, unsigned int rstart, unsigned int rend) {
+ int i = 0;
+ int total = rend - rstart; // Calculate our return count
+
+ while(arr[i] != '\0')
+ i++;
+
+ while(rstart <= rend) {
+ arr[i] = rstart;
+ //printf("% -4d % -7d %lc\n", i, rstart, rstart);
+ rstart++;
+ i++;
+ }
+
+ arr[i] = '\0';
+ return total;
+}
+
+
+/**
+ * i18n_cat_ascii_upper:
+ * Appends the ascii [english] upper case characters to the destination array.
+ *
+ * @dest Destination int array to copy into
+ *
+ * @return Count of characters appended
+ */
+unsigned int i18n_cat_ascii_upper(unsigned int* dest) {
+ return intrcat(dest, 65, 90);
+}
+
+
+/**
+ * i18n_cat_ascii_lower:
+ * Appends the ascii [english] upper case characters to the destination array.
+ *
+ * @dest Destination int array to copy into
+ *
+ * @return Count of characters appended
+ */
+unsigned int i18n_cat_ascii_lower(unsigned int* dest) {
+ return intrcat(dest, 97, 122);
+}
+
+
+/**
+ * i18n_cat_ascii_numerals:
+ * Appends the ascii numerals to the dest string.
+ *
+ * @dest Destination int array to copy into
+ *
+ * @return Count of characters appended
+ */
+unsigned int i18n_cat_ascii_numerals(unsigned int* dest) {
+ return intrcat(dest, 48, 57);
+}
+
+
+/**
+ * i18n_cat_ascii_symbols:
+ * Appends the ascii symbols to the dest string.
+ *
+ * @dest Destination int array to copy into
+ *
+ * @return Count of characters appended
+ */
+unsigned int i18n_cat_ascii_symbols(unsigned int* dest) {
+ unsigned int count = 0;
+ count += intrcat(dest, 33, 47); // English symbols ! - /
+ count += intrcat(dest, 58, 64); // English symbols : - @
+ count += intrcat(dest, 91, 96); // English symbols [ - `
+ count += intrcat(dest, 123, 126); // English symbols { - ~
+ return count;
+}
+
+
+/**
+ * i18n_cat_ascii:
+ * Appends the entire ascii printable characters (without the space at dec 32)
+ * range to the dest string. This includes English numerals, upper, lower, and
+ * symbols.
+ *
+ * @dest Destination int array to copy into
+ *
+ * @return Count of characters appended
+ */
+unsigned int i18n_cat_ascii(unsigned int* dest) {
+ return intrcat(dest, 0x0021, 0x007E);
+}
+
+
+/**
+ * i18n_cat_one:
+ * Appends the first group of unicode characters. This group covers some of the
+ * most common languages in the world, which use the latin script. This also
+ * includes the IPA extension characters.
+ *
+ * @dest Destination int array to copy into
+ *
+ * @return Count of characters appended
+ */
+unsigned int i18n_cat_one(unsigned int* dest) {
+ unsigned int count = 0;
+ count += intrcat(dest, 0x00A1, 0x00FF); // Latin-1 Suppliment
+ count += intrcat(dest, 0x0100, 0x017F); // Latin extended A
+ count += intrcat(dest, 0x0180, 0x024F); // Latin extended B
+ count += intrcat(dest, 0x0250, 0x02AF); // IPA Extensions
+ return count;
+}
+
+
+/**
+ * i18n_cat_two:
+ * Appends the second most popular group of unicode characters. This group
+ * attempts to cover scripts used by the second most common languages in the
+ * world. In this case, this includes the Devanagari (Hindi, Sanskrit), Hebrew,
+ * Arabic, and Cyrillic blocks.
+ *
+ * @dest Destination int array to copy into
+ *
+ * @return Count of characters appended
+ */
+unsigned int i18n_cat_two(unsigned int* dest) {
+ unsigned int count = 0;
+
+ // Devanagari (Sanskrit, Hindi, Marathi, Sindhi, Nepali, etc)
+ // This does not include vowels, as those are character modifiers that do not
+ // work with other character sets.
+ count += intrcat(dest, 0x0904, 0x0939);
+ count += intrcat(dest, 0x0958, 0x0961);
+ count += intrcat(dest, 0x0964, 0x096F);
+ count += intrcat(dest, 0x0972, 0x097F);
+
+ // Only includes Hebrew consonants, since vowel marks require modification of
+ // a previous character, which doesn't work when combined with other scripts.
+ count += intrcat(dest, 0x05D0, 0x05EA); // Hebrew
+ count += intrcat(dest, 0x05F0, 0x05F4); // Hebrew
+
+ // Arabic (only consonants)
+ count += intrcat(dest, 0x061E, 0x06FF);
+
+ // Cyrillic and Cyrillic suppliment
+ count += intrcat(dest, 0x0400, 0x04F0);
+ count += intrcat(dest, 0x0500, 0x052F);
+ return count;
+}
+
+
+/**
+ * i18n_cat_three:
+ * Appends the third most popular group of unicode characters. This group
+ * attempts to cover scripts used by the third most common languages in the
+ * world. In this case, this includes the Armenian, Bengali, Greek, and Coptic
+ * blocks.
+ *
+ * NOTE: Using this function will likely introduce characters for which your
+ * font does not have glyphs.
+ *
+ * @dest Destination int array to copy into
+ *
+ * @return Count of characters appended
+ */
+unsigned int i18n_cat_three(unsigned int* dest) {
+ unsigned int count = 0;
+
+ // Armenian
+ // Armenian has a gap at 0x0557, 0x0558, 0x058B, and 0x058C
+ count += intrcat(dest, 0x0531, 0x0556);
+ count += intrcat(dest, 0x0559, 0x058A);
+
+ // Bengali
+ count += intrcat(dest, 0x0985, 0x098C);
+ count += intrcat(dest, 0x098F, 0x0990);
+ count += intrcat(dest, 0x0993, 0x09A8);
+ count += intrcat(dest, 0x09AA, 0x09B0);
+ count += intrcat(dest, 0x09B2, 0x09B2);
+ count += intrcat(dest, 0x09B6, 0x09B9);
+ count += intrcat(dest, 0x09B6, 0x09B9);
+ count += intrcat(dest, 0x09DC, 0x09DD);
+ count += intrcat(dest, 0x09DF, 0x09D3);
+ count += intrcat(dest, 0x09D6, 0x09DC);
+
+ // Greek and Coptic
+ // These are complicated because they are missing assigned values for
+ // 0x0378, 0x0379, 0x0380, 0x0381, 0x0382, 0x0383, 0x038B, 0x038D, 0x03A2
+ count += intrcat(dest, 0x0370, 0x0377);
+ count += intrcat(dest, 0x037A, 0x037F);
+ count += intrcat(dest, 0x0384, 0x038A);
+ count += intrcat(dest, 0x038C, 0x038C);
+ count += intrcat(dest, 0x038C, 0x038C);
+ count += intrcat(dest, 0x038E, 0x03A1);
+ count += intrcat(dest, 0x03A3, 0x03FF);
+
+ return count;
+}
+
+
+/**
+ * i18n_cat_four:
+ * Appends the forth most popular group of unicode characters. This group
+ * attempts to cover scripts used by the forth most common languages in the
+ * world. In this case, this includes the Thaana, NKo, Samaritan, Mandaic,
+ * Syriac, Runic, Tifinagh, and Georgian blocks.
+ *
+ * NOTE: Using this function will likely introduce characters for which your
+ * font does not have glyphs.
+ *
+ * @dest Destination int array to copy into
+ *
+ * @return Count of characters appended
+ */
+unsigned int i18n_cat_four(unsigned int* dest) {
+ unsigned int count = 0;
+
+ count += intrcat(dest, 0x0780, 0x07A5); // Thaana
+ count += intrcat(dest, 0x07C0, 0x07EA); // NKo
+ count += intrcat(dest, 0x0800, 0x0815); // Samaritan
+ count += intrcat(dest, 0x0830, 0x083E); // Samaritan
+ count += intrcat(dest, 0x0840, 0x085B); // Mandaic
+
+ count += intrcat(dest, 0x0710, 0x072F); // Syriac
+ count += intrcat(dest, 0x074D, 0x074F); // Syriac
+
+ count += intrcat(dest, 0x16A0, 0x16F8); // Runic
+ count += intrcat(dest, 0x2D30, 0x2D67); // Tifinagh
+
+ // Georgian
+ count += intrcat(dest, 0x10A0, 0x10C5);
+ count += intrcat(dest, 0x10C7, 0x10C7);
+ count += intrcat(dest, 0x10CD, 0x10CD);
+ count += intrcat(dest, 0x10D0, 0x10FF);
+
+ return count;
+}
+
+
+/**
+ * print_intl_arr:
+ * Prints array containing unsigned ints representing internal characters.
+ * Outputs to STDOUT the unicode decimal, followed by the unicode character.
+ *
+ * @arr Unicode array to print
+ */
+void i18n_dump_arr(unsigned int* arr) {
+ int i = 0; // cursor
+
+ while(arr[i] != '\0') {
+ printf("0x%04x %5d: [%lc]\n", arr[i], arr[i], arr[i]);
+ i++;
+ }
+}
diff --git a/src/i18n_cat.h b/src/i18n_cat.h
new file mode 100644
index 0000000..02ac3f0
--- /dev/null
+++ b/src/i18n_cat.h
@@ -0,0 +1,47 @@
+/**
+ * upwgen generates random internationalized passwords
+ * Copyright (C) 2019 Aaron Ball <nullspoon@oper.io>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <locale.h>
+
+/* Deprecated */
+unsigned int i18n_cat_arr(unsigned int*);
+
+/**
+ * Functions to append to the specified char array the ascii basic latin
+ * characters.
+ */
+unsigned int i18n_cat_ascii(unsigned int*);
+unsigned int i18n_cat_ascii_numerals(unsigned int*);
+unsigned int i18n_cat_ascii_upper(unsigned int*);
+unsigned int i18n_cat_ascii_lower(unsigned int*);
+unsigned int i18n_cat_ascii_symbols(unsigned int*);
+
+
+/**
+ * Functions to append to the specified char array the first, second, third,
+ * and forth most used character set groups around the world.
+ */
+unsigned int i18n_cat_one(unsigned int*);
+unsigned int i18n_cat_two(unsigned int*);
+unsigned int i18n_cat_three(unsigned int*);
+unsigned int i18n_cat_four(unsigned int*);
+
+void i18n_dump_arr(unsigned int*);
diff --git a/src/main.c b/src/main.c
index 1f048ed..cb22210 100644
--- a/src/main.c
+++ b/src/main.c
@@ -19,97 +19,15 @@
#include <time.h>
#include <locale.h>
-
-// intrcat:
-// Integer range concatenate. Appends the specified integer range to an int
-// array.
-//
-// @arr Array to cat range of ints to
-// @rstart Range start integer
-// @rend Range end integer
-//
-// @return Number of integers appended to array
-int intrcat(unsigned int* arr, unsigned int rstart, unsigned int rend) {
- int i = 0;
- int total = rend - rstart; // Calculate our return count
-
- while(arr[i] != '\0')
- i++;
-
- while(rstart <= rend) {
- arr[i] = rstart;
- //printf("% -4d % -7d %lc\n", i, rstart, rstart);
- rstart++;
- i++;
- }
-
- arr[i] = '\0';
- return total;
-}
-
-
-// populate_intl_arr:
-// Populates an unsigned integer array with common unicode (utf-8) language
-// alphabets and symbols.
-//
-// Some example unicode integer ranges:
-// 33 - 126 Standard english ascii
-// 256 - 383 Latin extended A block
-// 256 - 383 Latin extended B block
-// 913 - 969 Greek
-// 1040 - 1103 Russian
-// 1329 - 1414 Armenian
-// 1488 - 1514 Hebrew
-// 65166 - 65265 Arabic
-//
-// No/rare font support (boo!)
-// 2325 - 2373 Devanagari (Hindi)
-// 2437 - 2509 Bengali alphabet
-// 2949 - 3020 Tamil
-// 3585 - 3663 Thai
-// 5792 - 5880 Runic
-// 11392 - 11483 Coptic alphabet
-// 66560 - 66639 Deseret
-//
-// @out Unsigned int array to be populated.
-//
-// @return Size of the array contents
-int populate_intl_arr(unsigned int* out) {
- int count = 0;
-
- // Populate the array
- count += intrcat(out, 33, 126); // English
- count += intrcat(out, 256, 383); // Latin A block
- count += intrcat(out, 399, 691); // Latin B block
- count += intrcat(out, 913, 969); // Greek
- count += intrcat(out, 1040, 1103); // Russian
- count += intrcat(out, 1329, 1414); // Armenian
- count += intrcat(out, 1488, 1514); // Hebrew
- count += intrcat(out, 65166, 65265); // Arabic
-
- return count;
-}
-
-
-// print_intl_arr:
-// Prints array containing unsigned ints representing internal characters.
-// Outputs to STDOUT the unicode decimal, followed by the unicode character.
-//
-// @arr Unicode array to print
-void print_intl_arr(unsigned int* arr) {
- int i = 0; // cursor
-
- while(arr[i] != '\0') {
- printf("%5d: [%lc]\n", arr[i], arr[i]);
- i++;
- }
-}
+#include "i18n_cat.h"
void usage() {
printf(
"Upwgen is a password generator with international support. If no length\n"
- "is specified, defaults to 32 characters output length\n\n"
+ "is specified, defaults to 32 characters output length, selecting from\n"
+ "the standard English character set (lower case, upper case, numerals,\n"
+ "and symbols).\n\n"
"Usage:\n upwgen [options] [length]\n\n"
"Options:\n"
" -c,--capitalize Include at least one capital letter in output\n"
@@ -117,6 +35,10 @@ void usage() {
" -n,--numerals Include at least one numeral in output\n"
" -y,--symbols Include at least one symbol in output\n"
" -i,--i18n Include at least one international letter in output\n"
+ " -1 Include chars from the most used scripts in the world\n"
+ " -2 Include chars from the second most used scripts in the world\n"
+ " -3 Include chars from the third most used scripts in the world\n"
+ " -4 Include chars from the forth most used scripts in the world\n"
"\n"
" -h,--help Print this help text\n"
);
@@ -125,11 +47,11 @@ void usage() {
int main(int argc, char* argv[]) {
struct timespec ts; // Timespec for seeding rng
- int count; // Number of chars to choose from
+ unsigned int count; // Number of chars to choose from
int len; // Password length
int i; // Arg index
unsigned long seed; // Seed for the RNG (current seconds * nanoseconds)
- unsigned int chars[1024]; // Uint array to hold international chars
+ unsigned int chars[4096]; // Uint array to hold international chars
// Initialize
count = 0;
@@ -140,21 +62,39 @@ int main(int argc, char* argv[]) {
while(i < argc) {
if(strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "--capitals") == 0) {
- count += intrcat(chars, 65, 90); // English uppercase
+ count += i18n_cat_ascii_upper(chars);
+
} else if(strcmp(argv[i], "-l") == 0 || strcmp(argv[i], "--lower") == 0) {
- count += intrcat(chars, 97, 122); // English lower case
+ count += i18n_cat_ascii_lower(chars);
+
} else if(strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "--numerals") == 0) {
- count += intrcat(chars, 48, 57); // English numerals
+ count += i18n_cat_ascii_numerals(chars);
+
} else if(strcmp(argv[i], "-y") == 0 || strcmp(argv[i], "--symbols") == 0) {
- count += intrcat(chars, 33, 47); // English symbols ! - /
- count += intrcat(chars, 58, 64); // English symbols : - @
- count += intrcat(chars, 91, 96); // English symbols [ - `
- count += intrcat(chars, 123, 126); // English symbols { - ~
+ count += i18n_cat_ascii_symbols(chars);
+
} else if(strcmp(argv[i], "-i") == 0 || strcmp(argv[i], "--i18n") == 0) {
- count += populate_intl_arr(chars);
+ count += i18n_cat_one(chars);
+ count += i18n_cat_two(chars);
+ count += i18n_cat_three(chars);
+ count += i18n_cat_four(chars);
+
+ } else if(strcmp(argv[i], "-1") == 0) {
+ count += i18n_cat_one(chars);
+
+ } else if(strcmp(argv[i], "-2") == 0) {
+ count += i18n_cat_two(chars);
+
+ } else if(strcmp(argv[i], "-3") == 0) {
+ count += i18n_cat_three(chars);
+
+ } else if(strcmp(argv[i], "-4") == 0) {
+ count += i18n_cat_four(chars);
+
} else if(strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) {
usage();
return 0;
+
} else {
// If we reach this block, the user specified a custom length (or
// fatfingered something). Test for ability to convert from str to int
@@ -172,7 +112,7 @@ int main(int argc, char* argv[]) {
// If no charset was specified, use standard ascii 33 - 126 chars, which
// includes english lower case, upper case, numbers, and some symbols.
if(chars[0] == '\0')
- count += intrcat(chars, 33, 126);
+ count += i18n_cat_ascii(chars);
// Get the random data seed
clock_gettime(CLOCK_REALTIME, &ts);

Generated by cgit