diff options
-rw-r--r-- | Makefile | 4 | ||||
-rw-r--r-- | src/i18n_cat.c | 282 | ||||
-rw-r--r-- | src/i18n_cat.h | 47 | ||||
-rw-r--r-- | src/main.c | 134 |
4 files changed, 369 insertions, 98 deletions
@@ -4,7 +4,9 @@ out = upwgen PREFIX = /usr/bin all: - $(CC) $(CCOPTS) src/main.c -o $(out) + @if [ ! -d obj ]; then mkdir obj; fi + $(CC) $(CCOPTS) -c src/i18n_cat.c -o obj/i18n_cat.o + $(CC) $(CCOPTS) src/main.c obj/*.o -o $(out) install: mkdir -p $(DESTDIR)/$(PREFIX) diff --git a/src/i18n_cat.c b/src/i18n_cat.c new file mode 100644 index 0000000..18712ac --- /dev/null +++ b/src/i18n_cat.c @@ -0,0 +1,282 @@ +/** + * upwgen generates random internationalized passwords + * Copyright (C) 2019 Aaron Ball <nullspoon@oper.io> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include "i18n_cat.h" + + +/** + * intrcat: + * Integer range concatenate. Appends the specified integer range to an int + * array. + * + * @arr Array to cat range of ints to + * @rstart Range start integer + * @rend Range end integer + * + * @return Number of integers appended to array + */ +unsigned int intrcat(unsigned int* arr, unsigned int rstart, unsigned int rend) { + int i = 0; + int total = rend - rstart; // Calculate our return count + + while(arr[i] != '\0') + i++; + + while(rstart <= rend) { + arr[i] = rstart; + //printf("% -4d % -7d %lc\n", i, rstart, rstart); + rstart++; + i++; + } + + arr[i] = '\0'; + return total; +} + + +/** + * i18n_cat_ascii_upper: + * Appends the ascii [english] upper case characters to the destination array. + * + * @dest Destination int array to copy into + * + * @return Count of characters appended + */ +unsigned int i18n_cat_ascii_upper(unsigned int* dest) { + return intrcat(dest, 65, 90); +} + + +/** + * i18n_cat_ascii_lower: + * Appends the ascii [english] upper case characters to the destination array. + * + * @dest Destination int array to copy into + * + * @return Count of characters appended + */ +unsigned int i18n_cat_ascii_lower(unsigned int* dest) { + return intrcat(dest, 97, 122); +} + + +/** + * i18n_cat_ascii_numerals: + * Appends the ascii numerals to the dest string. + * + * @dest Destination int array to copy into + * + * @return Count of characters appended + */ +unsigned int i18n_cat_ascii_numerals(unsigned int* dest) { + return intrcat(dest, 48, 57); +} + + +/** + * i18n_cat_ascii_symbols: + * Appends the ascii symbols to the dest string. + * + * @dest Destination int array to copy into + * + * @return Count of characters appended + */ +unsigned int i18n_cat_ascii_symbols(unsigned int* dest) { + unsigned int count = 0; + count += intrcat(dest, 33, 47); // English symbols ! - / + count += intrcat(dest, 58, 64); // English symbols : - @ + count += intrcat(dest, 91, 96); // English symbols [ - ` + count += intrcat(dest, 123, 126); // English symbols { - ~ + return count; +} + + +/** + * i18n_cat_ascii: + * Appends the entire ascii printable characters (without the space at dec 32) + * range to the dest string. This includes English numerals, upper, lower, and + * symbols. + * + * @dest Destination int array to copy into + * + * @return Count of characters appended + */ +unsigned int i18n_cat_ascii(unsigned int* dest) { + return intrcat(dest, 0x0021, 0x007E); +} + + +/** + * i18n_cat_one: + * Appends the first group of unicode characters. This group covers some of the + * most common languages in the world, which use the latin script. This also + * includes the IPA extension characters. + * + * @dest Destination int array to copy into + * + * @return Count of characters appended + */ +unsigned int i18n_cat_one(unsigned int* dest) { + unsigned int count = 0; + count += intrcat(dest, 0x00A1, 0x00FF); // Latin-1 Suppliment + count += intrcat(dest, 0x0100, 0x017F); // Latin extended A + count += intrcat(dest, 0x0180, 0x024F); // Latin extended B + count += intrcat(dest, 0x0250, 0x02AF); // IPA Extensions + return count; +} + + +/** + * i18n_cat_two: + * Appends the second most popular group of unicode characters. This group + * attempts to cover scripts used by the second most common languages in the + * world. In this case, this includes the Devanagari (Hindi, Sanskrit), Hebrew, + * Arabic, and Cyrillic blocks. + * + * @dest Destination int array to copy into + * + * @return Count of characters appended + */ +unsigned int i18n_cat_two(unsigned int* dest) { + unsigned int count = 0; + + // Devanagari (Sanskrit, Hindi, Marathi, Sindhi, Nepali, etc) + // This does not include vowels, as those are character modifiers that do not + // work with other character sets. + count += intrcat(dest, 0x0904, 0x0939); + count += intrcat(dest, 0x0958, 0x0961); + count += intrcat(dest, 0x0964, 0x096F); + count += intrcat(dest, 0x0972, 0x097F); + + // Only includes Hebrew consonants, since vowel marks require modification of + // a previous character, which doesn't work when combined with other scripts. + count += intrcat(dest, 0x05D0, 0x05EA); // Hebrew + count += intrcat(dest, 0x05F0, 0x05F4); // Hebrew + + // Arabic (only consonants) + count += intrcat(dest, 0x061E, 0x06FF); + + // Cyrillic and Cyrillic suppliment + count += intrcat(dest, 0x0400, 0x04F0); + count += intrcat(dest, 0x0500, 0x052F); + return count; +} + + +/** + * i18n_cat_three: + * Appends the third most popular group of unicode characters. This group + * attempts to cover scripts used by the third most common languages in the + * world. In this case, this includes the Armenian, Bengali, Greek, and Coptic + * blocks. + * + * NOTE: Using this function will likely introduce characters for which your + * font does not have glyphs. + * + * @dest Destination int array to copy into + * + * @return Count of characters appended + */ +unsigned int i18n_cat_three(unsigned int* dest) { + unsigned int count = 0; + + // Armenian + // Armenian has a gap at 0x0557, 0x0558, 0x058B, and 0x058C + count += intrcat(dest, 0x0531, 0x0556); + count += intrcat(dest, 0x0559, 0x058A); + + // Bengali + count += intrcat(dest, 0x0985, 0x098C); + count += intrcat(dest, 0x098F, 0x0990); + count += intrcat(dest, 0x0993, 0x09A8); + count += intrcat(dest, 0x09AA, 0x09B0); + count += intrcat(dest, 0x09B2, 0x09B2); + count += intrcat(dest, 0x09B6, 0x09B9); + count += intrcat(dest, 0x09B6, 0x09B9); + count += intrcat(dest, 0x09DC, 0x09DD); + count += intrcat(dest, 0x09DF, 0x09D3); + count += intrcat(dest, 0x09D6, 0x09DC); + + // Greek and Coptic + // These are complicated because they are missing assigned values for + // 0x0378, 0x0379, 0x0380, 0x0381, 0x0382, 0x0383, 0x038B, 0x038D, 0x03A2 + count += intrcat(dest, 0x0370, 0x0377); + count += intrcat(dest, 0x037A, 0x037F); + count += intrcat(dest, 0x0384, 0x038A); + count += intrcat(dest, 0x038C, 0x038C); + count += intrcat(dest, 0x038C, 0x038C); + count += intrcat(dest, 0x038E, 0x03A1); + count += intrcat(dest, 0x03A3, 0x03FF); + + return count; +} + + +/** + * i18n_cat_four: + * Appends the forth most popular group of unicode characters. This group + * attempts to cover scripts used by the forth most common languages in the + * world. In this case, this includes the Thaana, NKo, Samaritan, Mandaic, + * Syriac, Runic, Tifinagh, and Georgian blocks. + * + * NOTE: Using this function will likely introduce characters for which your + * font does not have glyphs. + * + * @dest Destination int array to copy into + * + * @return Count of characters appended + */ +unsigned int i18n_cat_four(unsigned int* dest) { + unsigned int count = 0; + + count += intrcat(dest, 0x0780, 0x07A5); // Thaana + count += intrcat(dest, 0x07C0, 0x07EA); // NKo + count += intrcat(dest, 0x0800, 0x0815); // Samaritan + count += intrcat(dest, 0x0830, 0x083E); // Samaritan + count += intrcat(dest, 0x0840, 0x085B); // Mandaic + + count += intrcat(dest, 0x0710, 0x072F); // Syriac + count += intrcat(dest, 0x074D, 0x074F); // Syriac + + count += intrcat(dest, 0x16A0, 0x16F8); // Runic + count += intrcat(dest, 0x2D30, 0x2D67); // Tifinagh + + // Georgian + count += intrcat(dest, 0x10A0, 0x10C5); + count += intrcat(dest, 0x10C7, 0x10C7); + count += intrcat(dest, 0x10CD, 0x10CD); + count += intrcat(dest, 0x10D0, 0x10FF); + + return count; +} + + +/** + * print_intl_arr: + * Prints array containing unsigned ints representing internal characters. + * Outputs to STDOUT the unicode decimal, followed by the unicode character. + * + * @arr Unicode array to print + */ +void i18n_dump_arr(unsigned int* arr) { + int i = 0; // cursor + + while(arr[i] != '\0') { + printf("0x%04x %5d: [%lc]\n", arr[i], arr[i], arr[i]); + i++; + } +} diff --git a/src/i18n_cat.h b/src/i18n_cat.h new file mode 100644 index 0000000..02ac3f0 --- /dev/null +++ b/src/i18n_cat.h @@ -0,0 +1,47 @@ +/** + * upwgen generates random internationalized passwords + * Copyright (C) 2019 Aaron Ball <nullspoon@oper.io> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <time.h> +#include <locale.h> + +/* Deprecated */ +unsigned int i18n_cat_arr(unsigned int*); + +/** + * Functions to append to the specified char array the ascii basic latin + * characters. + */ +unsigned int i18n_cat_ascii(unsigned int*); +unsigned int i18n_cat_ascii_numerals(unsigned int*); +unsigned int i18n_cat_ascii_upper(unsigned int*); +unsigned int i18n_cat_ascii_lower(unsigned int*); +unsigned int i18n_cat_ascii_symbols(unsigned int*); + + +/** + * Functions to append to the specified char array the first, second, third, + * and forth most used character set groups around the world. + */ +unsigned int i18n_cat_one(unsigned int*); +unsigned int i18n_cat_two(unsigned int*); +unsigned int i18n_cat_three(unsigned int*); +unsigned int i18n_cat_four(unsigned int*); + +void i18n_dump_arr(unsigned int*); @@ -19,97 +19,15 @@ #include <time.h> #include <locale.h> - -// intrcat: -// Integer range concatenate. Appends the specified integer range to an int -// array. -// -// @arr Array to cat range of ints to -// @rstart Range start integer -// @rend Range end integer -// -// @return Number of integers appended to array -int intrcat(unsigned int* arr, unsigned int rstart, unsigned int rend) { - int i = 0; - int total = rend - rstart; // Calculate our return count - - while(arr[i] != '\0') - i++; - - while(rstart <= rend) { - arr[i] = rstart; - //printf("% -4d % -7d %lc\n", i, rstart, rstart); - rstart++; - i++; - } - - arr[i] = '\0'; - return total; -} - - -// populate_intl_arr: -// Populates an unsigned integer array with common unicode (utf-8) language -// alphabets and symbols. -// -// Some example unicode integer ranges: -// 33 - 126 Standard english ascii -// 256 - 383 Latin extended A block -// 256 - 383 Latin extended B block -// 913 - 969 Greek -// 1040 - 1103 Russian -// 1329 - 1414 Armenian -// 1488 - 1514 Hebrew -// 65166 - 65265 Arabic -// -// No/rare font support (boo!) -// 2325 - 2373 Devanagari (Hindi) -// 2437 - 2509 Bengali alphabet -// 2949 - 3020 Tamil -// 3585 - 3663 Thai -// 5792 - 5880 Runic -// 11392 - 11483 Coptic alphabet -// 66560 - 66639 Deseret -// -// @out Unsigned int array to be populated. -// -// @return Size of the array contents -int populate_intl_arr(unsigned int* out) { - int count = 0; - - // Populate the array - count += intrcat(out, 33, 126); // English - count += intrcat(out, 256, 383); // Latin A block - count += intrcat(out, 399, 691); // Latin B block - count += intrcat(out, 913, 969); // Greek - count += intrcat(out, 1040, 1103); // Russian - count += intrcat(out, 1329, 1414); // Armenian - count += intrcat(out, 1488, 1514); // Hebrew - count += intrcat(out, 65166, 65265); // Arabic - - return count; -} - - -// print_intl_arr: -// Prints array containing unsigned ints representing internal characters. -// Outputs to STDOUT the unicode decimal, followed by the unicode character. -// -// @arr Unicode array to print -void print_intl_arr(unsigned int* arr) { - int i = 0; // cursor - - while(arr[i] != '\0') { - printf("%5d: [%lc]\n", arr[i], arr[i]); - i++; - } -} +#include "i18n_cat.h" void usage() { printf( "Upwgen is a password generator with international support. If no length\n" - "is specified, defaults to 32 characters output length\n\n" + "is specified, defaults to 32 characters output length, selecting from\n" + "the standard English character set (lower case, upper case, numerals,\n" + "and symbols).\n\n" "Usage:\n upwgen [options] [length]\n\n" "Options:\n" " -c,--capitalize Include at least one capital letter in output\n" @@ -117,6 +35,10 @@ void usage() { " -n,--numerals Include at least one numeral in output\n" " -y,--symbols Include at least one symbol in output\n" " -i,--i18n Include at least one international letter in output\n" + " -1 Include chars from the most used scripts in the world\n" + " -2 Include chars from the second most used scripts in the world\n" + " -3 Include chars from the third most used scripts in the world\n" + " -4 Include chars from the forth most used scripts in the world\n" "\n" " -h,--help Print this help text\n" ); @@ -125,11 +47,11 @@ void usage() { int main(int argc, char* argv[]) { struct timespec ts; // Timespec for seeding rng - int count; // Number of chars to choose from + unsigned int count; // Number of chars to choose from int len; // Password length int i; // Arg index unsigned long seed; // Seed for the RNG (current seconds * nanoseconds) - unsigned int chars[1024]; // Uint array to hold international chars + unsigned int chars[4096]; // Uint array to hold international chars // Initialize count = 0; @@ -140,21 +62,39 @@ int main(int argc, char* argv[]) { while(i < argc) { if(strcmp(argv[i], "-c") == 0 || strcmp(argv[i], "--capitals") == 0) { - count += intrcat(chars, 65, 90); // English uppercase + count += i18n_cat_ascii_upper(chars); + } else if(strcmp(argv[i], "-l") == 0 || strcmp(argv[i], "--lower") == 0) { - count += intrcat(chars, 97, 122); // English lower case + count += i18n_cat_ascii_lower(chars); + } else if(strcmp(argv[i], "-n") == 0 || strcmp(argv[i], "--numerals") == 0) { - count += intrcat(chars, 48, 57); // English numerals + count += i18n_cat_ascii_numerals(chars); + } else if(strcmp(argv[i], "-y") == 0 || strcmp(argv[i], "--symbols") == 0) { - count += intrcat(chars, 33, 47); // English symbols ! - / - count += intrcat(chars, 58, 64); // English symbols : - @ - count += intrcat(chars, 91, 96); // English symbols [ - ` - count += intrcat(chars, 123, 126); // English symbols { - ~ + count += i18n_cat_ascii_symbols(chars); + } else if(strcmp(argv[i], "-i") == 0 || strcmp(argv[i], "--i18n") == 0) { - count += populate_intl_arr(chars); + count += i18n_cat_one(chars); + count += i18n_cat_two(chars); + count += i18n_cat_three(chars); + count += i18n_cat_four(chars); + + } else if(strcmp(argv[i], "-1") == 0) { + count += i18n_cat_one(chars); + + } else if(strcmp(argv[i], "-2") == 0) { + count += i18n_cat_two(chars); + + } else if(strcmp(argv[i], "-3") == 0) { + count += i18n_cat_three(chars); + + } else if(strcmp(argv[i], "-4") == 0) { + count += i18n_cat_four(chars); + } else if(strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { usage(); return 0; + } else { // If we reach this block, the user specified a custom length (or // fatfingered something). Test for ability to convert from str to int @@ -172,7 +112,7 @@ int main(int argc, char* argv[]) { // If no charset was specified, use standard ascii 33 - 126 chars, which // includes english lower case, upper case, numbers, and some symbols. if(chars[0] == '\0') - count += intrcat(chars, 33, 126); + count += i18n_cat_ascii(chars); // Get the random data seed clock_gettime(CLOCK_REALTIME, &ts); |