Merge branch 'i18n-breakout'

author: Aaron Ball <nullspoon@oper.io> 2019-03-09 20:27:01 -0700
committer: Aaron Ball <nullspoon@oper.io> 2019-03-09 20:27:01 -0700
commit: 67e1f43a0fd98d27c6d32c7def9a15ccc223aad4 (patch)
tree: 61569757b69602923b7376b3e9274a3af773d871
parent: 93a26f6394546f53b9531cead1713311de0dda78 (diff)
parent: c7033290ac21023c74f59c6f02a918cf6b2b36a8 (diff)
download: upwgen-67e1f43a0fd98d27c6d32c7def9a15ccc223aad4.tar.gz
upwgen-67e1f43a0fd98d27c6d32c7def9a15ccc223aad4.tar.xz
4 files changed, 369 insertions, 98 deletions
diff --git a/Makefile b/Makefile
index 5ff1e25..731b5b0 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,9 @@ out    = upwgen
 PREFIX = /usr/bin
 
 all:
-	$(CC) $(CCOPTS) src/main.c -o $(out)
+	@if [ ! -d obj ]; then mkdir obj; fi
+	$(CC) $(CCOPTS) -c src/i18n_cat.c -o obj/i18n_cat.o
+	$(CC) $(CCOPTS) src/main.c obj/*.o -o $(out)
 
 install:
 	mkdir -p $(DESTDIR)/$(PREFIX)
diff --git a/src/i18n_cat.c b/src/i18n_cat.c
new file mode 100644
index 0000000..18712ac
--- /dev/null
+++ b/src/i18n_cat.c
@@ -0,0 +1,282 @@
+/**
+ * upwgen generates random internationalized passwords
+ * Copyright (C) 2019  Aaron Ball <nullspoon@oper.io>
+ * 
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "i18n_cat.h"
+
+
+/**
+ * intrcat:
+ * Integer range concatenate. Appends the specified integer range to an int
+ * array.
+ *
+ * @arr    Array to cat range of ints to
+ * @rstart Range start integer
+ * @rend   Range end integer
+ *
+ * @return Number of integers appended to array
+ */
+unsigned int intrcat(unsigned int* arr, unsigned int rstart, unsigned int rend) {
+  int i = 0;
+  int total = rend - rstart; // Calculate our return count
+
+  while(arr[i] != '\0')
+    i++;
+
+  while(rstart <= rend) {
+    arr[i] = rstart;
+    //printf("% -4d % -7d   %lc\n", i, rstart, rstart);
+    rstart++;
+    i++;
+  }
+
+  arr[i] = '\0';
+  return total;
+}
+
+
+/**
+ * i18n_cat_ascii_upper:
+ * Appends the ascii [english] upper case characters to the destination array.
+ *
+ * @dest   Destination int array to copy into
+ *
+ * @return Count of characters appended
+ */
+unsigned int i18n_cat_ascii_upper(unsigned int* dest) {
+  return intrcat(dest, 65, 90);
+}
+
+
+/**
+ * i18n_cat_ascii_lower:
+ * Appends the ascii [english] upper case characters to the destination array.
+ *
+ * @dest   Destination int array to copy into
+ *
+ * @return Count of characters appended
+ */
+unsigned int i18n_cat_ascii_lower(unsigned int* dest) {
+  return intrcat(dest, 97, 122);
+}
+
+
+/**
+ * i18n_cat_ascii_numerals:
+ * Appends the ascii numerals to the dest string.
+ *
+ * @dest   Destination int array to copy into
+ *
+ * @return Count of characters appended
+ */
+unsigned int i18n_cat_ascii_numerals(unsigned int* dest) {
+  return intrcat(dest, 48, 57);
+}
+
+
+/**
+ * i18n_cat_ascii_symbols:
+ * Appends the ascii symbols to the dest string.
+ *
+ * @dest   Destination int array to copy into
+ *
+ * @return Count of characters appended
+ */
+unsigned int i18n_cat_ascii_symbols(unsigned int* dest) {
+  unsigned int count = 0;
+  count += intrcat(dest, 33, 47);   // English symbols ! - /
+  count += intrcat(dest, 58, 64);   // English symbols : - @
+  count += intrcat(dest, 91, 96);   // English symbols [ - `
+  count += intrcat(dest, 123, 126); // English symbols { - ~
+  return count;
+}
+
+
+/**
+ * i18n_cat_ascii:
+ * Appends the entire ascii printable characters (without the space at dec 32)
+ * range to the dest string. This includes English numerals, upper, lower, and
+ * symbols.
+ *
+ * @dest   Destination int array to copy into
+ *
+ * @return Count of characters appended
+ */
+unsigned int i18n_cat_ascii(unsigned int* dest) {
+  return intrcat(dest, 0x0021, 0x007E);
+}
+
+
+/**
+ * i18n_cat_one:
+ * Appends the first group of unicode characters. This group covers some of the
+ * most common languages in the world, which use the latin script. This also
+ * includes the IPA extension characters.
+ *
+ * @dest   Destination int array to copy into
+ *
+ * @return Count of characters appended
+ */
+unsigned int i18n_cat_one(unsigned int* dest) {
+  unsigned int count = 0;
+  count += intrcat(dest, 0x00A1, 0x00FF); // Latin-1 Suppliment
+  count += intrcat(dest, 0x0100, 0x017F); // Latin extended A
+  count += intrcat(dest, 0x0180, 0x024F); // Latin extended B
+  count += intrcat(dest, 0x0250, 0x02AF); // IPA Extensions
+  return count;
+}
+
+
+/**
+ * i18n_cat_two:
+ * Appends the second most popular group of unicode characters. This group
+ * attempts to cover scripts used by the second most common languages in the
+ * world. In this case, this includes the Devanagari (Hindi, Sanskrit), Hebrew,
+ * Arabic, and Cyrillic blocks.
+ *
+ * @dest   Destination int array to copy into
+ *
+ * @return Count of characters appended
+ */
+unsigned int i18n_cat_two(unsigned int* dest) {
+  unsigned int count = 0;
+
+  // Devanagari (Sanskrit, Hindi, Marathi, Sindhi, Nepali, etc)
+  // This does not include vowels, as those are character modifiers that do not
+  // work with other character sets.
+  count += intrcat(dest, 0x0904, 0x0939);
+  count += intrcat(dest, 0x0958, 0x0961);
+  count += intrcat(dest, 0x0964, 0x096F);
+  count += intrcat(dest, 0x0972, 0x097F);
+
+  // Only includes Hebrew consonants, since vowel marks require modification of
+  // a previous character, which doesn't work when combined with other scripts.
+  count += intrcat(dest, 0x05D0, 0x05EA); // Hebrew
+  count += intrcat(dest, 0x05F0, 0x05F4); // Hebrew
+
+  // Arabic (only consonants)
+  count += intrcat(dest, 0x061E, 0x06FF);
+
+  // Cyrillic and Cyrillic suppliment
+  count += intrcat(dest, 0x0400, 0x04F0);
+  count += intrcat(dest, 0x0500, 0x052F);
+  return count;
+}
+
+
+/**
+ * i18n_cat_three:
+ * Appends the third most popular group of unicode characters. This group
+ * attempts to cover scripts used by the third most common languages in the
+ * world. In this case, this includes the Armenian, Bengali, Greek, and Coptic
+ * blocks.
+ *
+ * NOTE: Using this function will likely introduce characters for which your
+ *       font does not have glyphs.
+ *
+ * @dest   Destination int array to copy into
+ *
+ * @return Count of characters appended
+ */
+unsigned int i18n_cat_three(unsigned int* dest) {
+  unsigned int count = 0;
+
+  // Armenian
+  // Armenian has a gap at 0x0557, 0x0558, 0x058B, and 0x058C
+  count += intrcat(dest, 0x0531, 0x0556);
+  count += intrcat(dest, 0x0559, 0x058A);
+
+  // Bengali
+  count += intrcat(dest, 0x0985, 0x098C);
+  count += intrcat(dest, 0x098F, 0x0990);
+  count += intrcat(dest, 0x0993, 0x09A8);
+  count += intrcat(dest, 0x09AA, 0x09B0);
+  count += intrcat(dest, 0x09B2, 0x09B2);
+  count += intrcat(dest, 0x09B6, 0x09B9);
+  count += intrcat(dest, 0x09B6, 0x09B9);
+  count += intrcat(dest, 0x09DC, 0x09DD);
+  count += intrcat(dest, 0x09DF, 0x09D3);
+  count += intrcat(dest, 0x09D6, 0x09DC);
+
+  // Greek and Coptic
+  // These are complicated because they are missing assigned values for
+  // 0x0378, 0x0379, 0x0380, 0x0381, 0x0382, 0x0383, 0x038B, 0x038D, 0x03A2
+  count += intrcat(dest, 0x0370, 0x0377);
+  count += intrcat(dest, 0x037A, 0x037F);
+  count += intrcat(dest, 0x0384, 0x038A);
+  count += intrcat(dest, 0x038C, 0x038C);
+  count += intrcat(dest, 0x038C, 0x038C);
+  count += intrcat(dest, 0x038E, 0x03A1);
+  count += intrcat(dest, 0x03A3, 0x03FF);
+
+  return count;
+}
+
+
+/**
+ * i18n_cat_four:
+ * Appends the forth most popular group of unicode characters. This group
+ * attempts to cover scripts used by the forth most common languages in the
+ * world. In this case, this includes the Thaana, NKo, Samaritan, Mandaic,
+ * Syriac, Runic, Tifinagh, and Georgian blocks.
+ *
+ * NOTE: Using this function will likely introduce characters for which your
+ *       font does not have glyphs.
+ *
+ * @dest   Destination int array to copy into
+ *
+ * @return Count of characters appended
+ */
+unsigned int i18n_cat_four(unsigned int* dest) {
+  unsigned int count = 0;
+
+  count += intrcat(dest, 0x0780, 0x07A5); // Thaana
+  count += intrcat(dest, 0x07C0, 0x07EA); // NKo
+  count += intrcat(dest, 0x0800, 0x0815); // Samaritan
+  count += intrcat(dest, 0x0830, 0x083E); // Samaritan
+  count += intrcat(dest, 0x0840, 0x085B); // Mandaic
+
+  count += intrcat(dest, 0x0710, 0x072F); // Syriac
+  count += intrcat(dest, 0x074D, 0x074F); // Syriac
+
+  count += intrcat(dest, 0x16A0, 0x16F8); // Runic
+  count += intrcat(dest, 0x2D30, 0x2D67); // Tifinagh
+
+  // Georgian
+  count += intrcat(dest, 0x10A0, 0x10C5);
+  count += intrcat(dest, 0x10C7, 0x10C7);
+  count += intrcat(dest, 0x10CD, 0x10CD);
+  count += intrcat(dest, 0x10D0, 0x10FF);
+
+  return count;
+}
+
+
+/**
+ * print_intl_arr:
+ * Prints array containing unsigned ints representing internal characters.
+ * Outputs to STDOUT the unicode decimal, followed by the unicode character.
+ *
+ * @arr Unicode array to print
+ */
+void i18n_dump_arr(unsigned int* arr) {
+  int i = 0; // cursor
+
+  while(arr[i] != '\0') {
+    printf("0x%04x   %5d: [%lc]\n", arr[i], arr[i], arr[i]);
+    i++;
+  }
+}
diff --git a/src/i18n_cat.h b/src/i18n_cat.h
new file mode 100644
index 0000000..02ac3f0
--- /dev/null
+++ b/src/i18n_cat.h
@@ -0,0 +1,47 @@
+/**
+ * upwgen generates random internationalized passwords
+ * Copyright (C) 2019  Aaron Ball <nullspoon@oper.io>
+ * 
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <locale.h>
+
+/* Deprecated */
+unsigned int i18n_cat_arr(unsigned int*);
+
+/**
+ * Functions to append to the specified char array the ascii basic latin
+ * characters.
+ */
+unsigned int i18n_cat_ascii(unsigned int*);
+unsigned int i18n_cat_ascii_numerals(unsigned int*);
+unsigned int i18n_cat_ascii_upper(unsigned int*);
+unsigned int i18n_cat_ascii_lower(unsigned int*);
+unsigned int i18n_cat_ascii_symbols(unsigned int*);
+
+
+/**
+ * Functions to append to the specified char array the first, second, third,
+ * and forth most used character set groups around the world.
+ */
+unsigned int i18n_cat_one(unsigned int*);
+unsigned int i18n_cat_two(unsigned int*);
+unsigned int i18n_cat_three(unsigned int*);
+unsigned int i18n_cat_four(unsigned int*);
+
+void i18n_dump_arr(unsigned int*);
diff --git a/src/main.c b/src/main.c
index 1f048ed..cb22210 100644
--- a/src/main.c
+++ b/src/main.c
@@ -19,97 +19,15 @@
 #include <time.h>
 #include <locale.h>
 
-
-// intrcat:
-// Integer range concatenate. Appends the specified integer range to an int
-// array.
-//
-// @arr    Array to cat range of ints to
-// @rstart Range start integer
-// @rend   Range end integer
-//
-// @return Number of integers appended to array
-int intrcat(unsigned int* arr, unsigned int rstart, unsigned int rend) {
-  int i = 0;
-  int total = rend - rstart; // Calculate our return count
-
-  while(arr[i] != '\0')
-    i++;
-
-  while(rstart <= rend) {
-    arr[i] = rstart;
-    //printf("% -4d % -7d   %lc\n", i, rstart, rstart);
-    rstart++;
-    i++;
-  }
-
-  arr[i] = '\0';
-  return total;
-}
-
-
-// populate_intl_arr:
-// Populates an unsigned integer array with common unicode (utf-8) language
-// alphabets and symbols.
-//
-// Some example unicode integer ranges:
-//   33 - 126      Standard english ascii
-//   256 - 383     Latin extended A block
-//   256 - 383     Latin extended B block
-//   913 - 969     Greek
-//   1040 - 1103   Russian
-//   1329 - 1414   Armenian
-//   1488 - 1514   Hebrew
-//   65166 - 65265 Arabic
-// 
-//   No/rare font support (boo!)
-//   2325 - 2373   Devanagari (Hindi)
-//   2437 - 2509   Bengali alphabet
-//   2949 - 3020   Tamil
-//   3585 - 3663   Thai
-//   5792 - 5880   Runic
-//   11392 - 11483 Coptic alphabet
-//   66560 - 66639 Deseret
-//
-// @out  Unsigned int array to be populated.
-//
-// @return Size of the array contents
-int populate_intl_arr(unsigned int* out) {
-  int count = 0;
-
-  // Populate the array
-  count += intrcat(out, 33, 126);      // English
-  count += intrcat(out, 256, 383);     // Latin A block
-  count += intrcat(out, 399, 691);     // Latin B block
-  count += intrcat(out, 913, 969);     // Greek
-  count += intrcat(out, 1040, 1103);   // Russian
-  count += intrcat(out, 1329, 1414);   // Armenian
-  count += intrcat(out, 1488, 1514);   // Hebrew
-  count += intrcat(out, 65166, 65265); // Arabic
-
-  return count;
-}
-
-
-// print_intl_arr:
-// Prints array containing unsigned ints representing internal characters.
-// Outputs to STDOUT the unicode decimal, followed by the unicode character.
-//
-// @arr Unicode array to print
-void print_intl_arr(unsigned int* arr) {
-  int i = 0; // cursor
-
-  while(arr[i] != '\0') {
-    printf("%5d: [%lc]\n", arr[i], arr[i]);
-    i++;
-  }
-}
+#include "i18n_cat.h"
 
 
 void usage() {
   printf(
     "Upwgen is a password generator with international support. If no length\n"
-    "is specified, defaults to 32 characters output length\n\n"
+    "is specified, defaults to 32 characters output length, selecting from\n"
+    "the standard English character set (lower case, upper case, numerals,\n"
+    "and symbols).\n\n"
     "Usage:\n  upwgen [options] [length]\n\n"
     "Options:\n"
     "  -c,--capitalize Include at least one capital letter in output\n"
@@ -117,6 +35,10 @@ void usage() {
     "  -n,--numerals   Include at least one numeral in output\n"
     "  -y,--symbols    Include at least one symbol in output\n"
     "  -i,--i18n       Include at least one international letter in output\n"
+    "  -1              Include chars from the most used scripts in the world\n"
+    "  -2              Include chars from the second most used scripts in the world\n"
+    "  -3              Include chars from the third most used scripts in the world\n"
+    "  -4              Include chars from the forth most used scripts in the world\n"
     "\n"
     "  -h,--help       Print this help text\n"
   );
@@ -125,11 +47,11 @@ void usage() {
 
 int main(int argc, char* argv[]) {
   struct timespec ts; // Timespec for seeding rng
-  int count;          // Number of chars to choose from
+  unsigned int count; // Number of chars to choose from
   int len;            // Password length
   int i;              // Arg index
   unsigned long seed; // Seed for the RNG (current seconds * nanoseconds)
-  unsigned int chars[1024]; // Uint array to hold international chars
+  unsigned int chars[4096]; // Uint array to hold international chars
 
   // Initialize
   count    = 0;
@@ -140,21 +62,39 @@ int main(int argc, char* argv[]) {
 
   while(i < argc) {
     if(strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "--capitals") == 0) {
-      count += intrcat(chars, 65, 90); // English uppercase
+      count += i18n_cat_ascii_upper(chars);
+
     } else if(strcmp(argv[i], "-l") == 0 || strcmp(argv[i], "--lower") == 0) {
-      count += intrcat(chars, 97, 122); // English lower case
+      count += i18n_cat_ascii_lower(chars);
+
     } else if(strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "--numerals") == 0) {
-      count += intrcat(chars, 48, 57); // English numerals
+      count += i18n_cat_ascii_numerals(chars);
+
     } else if(strcmp(argv[i], "-y") == 0 || strcmp(argv[i], "--symbols") == 0) {
-      count += intrcat(chars, 33, 47);   // English symbols ! - /
-      count += intrcat(chars, 58, 64);   // English symbols : - @
-      count += intrcat(chars, 91, 96);   // English symbols [ - `
-      count += intrcat(chars, 123, 126); // English symbols { - ~
+      count += i18n_cat_ascii_symbols(chars);
+
     } else if(strcmp(argv[i], "-i") == 0 || strcmp(argv[i], "--i18n") == 0) {
-      count += populate_intl_arr(chars);
+      count += i18n_cat_one(chars);
+      count += i18n_cat_two(chars);
+      count += i18n_cat_three(chars);
+      count += i18n_cat_four(chars);
+
+    } else if(strcmp(argv[i], "-1") == 0) {
+      count += i18n_cat_one(chars);
+
+    } else if(strcmp(argv[i], "-2") == 0) {
+      count += i18n_cat_two(chars);
+
+    } else if(strcmp(argv[i], "-3") == 0) {
+      count += i18n_cat_three(chars);
+
+    } else if(strcmp(argv[i], "-4") == 0) {
+      count += i18n_cat_four(chars);
+
     } else if(strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) {
       usage();
       return 0;
+
     } else {
       // If we reach this block, the user specified a custom length (or
       // fatfingered something). Test for ability to convert from str to int
@@ -172,7 +112,7 @@ int main(int argc, char* argv[]) {
   // If no charset was specified, use standard ascii 33 - 126 chars, which
   // includes english lower case, upper case, numbers, and some symbols.
   if(chars[0] == '\0')
-    count += intrcat(chars, 33, 126);
+    count += i18n_cat_ascii(chars);
 
   // Get the random data seed
   clock_gettime(CLOCK_REALTIME, &ts);
author	Aaron Ball <nullspoon@oper.io>	2019-03-09 20:27:01 -0700
committer	Aaron Ball <nullspoon@oper.io>	2019-03-09 20:27:01 -0700
commit	67e1f43a0fd98d27c6d32c7def9a15ccc223aad4 (patch)
tree	61569757b69602923b7376b3e9274a3af773d871
parent	93a26f6394546f53b9531cead1713311de0dda78 (diff)
parent	c7033290ac21023c74f59c6f02a918cf6b2b36a8 (diff)
download	upwgen-67e1f43a0fd98d27c6d32c7def9a15ccc223aad4.tar.gz upwgen-67e1f43a0fd98d27c6d32c7def9a15ccc223aad4.tar.xz