summaryrefslogtreecommitdiff
path: root/src/i18n_cat.c
blob: f8d9f0577c4e82bb2825a42bbf9c339962e1576f (plain)
    1 /**
    2  * upwgen generates random internationalized passwords
    3  * Copyright (C) 2019  Aaron Ball <nullspoon@oper.io>
    4  * 
    5  * This program is free software: you can redistribute it and/or modify
    6  * it under the terms of the GNU General Public License as published by
    7  * the Free Software Foundation, either version 3 of the License, or
    8  * (at your option) any later version.
    9  * 
   10  * This program is distributed in the hope that it will be useful,
   11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   13  * GNU General Public License for more details.
   14  * 
   15  * You should have received a copy of the GNU General Public License
   16  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
   17  */
   18 #include "i18n_cat.h"
   19 
   20 
   21 /**
   22  * intrcat:
   23  * Integer range concatenate. Appends the specified integer range to an int
   24  * array.
   25  *
   26  * @arr    Array to cat range of ints to
   27  * @rstart Range start integer
   28  * @rend   Range end integer
   29  *
   30  * @return Number of integers appended to array
   31  */
   32 unsigned int intrcat(unsigned int* arr, unsigned int rstart, unsigned int rend) {
   33   int i = 0;
   34   int total = rend - rstart; // Calculate our return count
   35 
   36   while(arr[i] != '\0')
   37     i++;
   38 
   39   while(rstart <= rend) {
   40     arr[i] = rstart;
   41     //printf("% -4d % -7d   %lc\n", i, rstart, rstart);
   42     rstart++;
   43     i++;
   44   }
   45 
   46   arr[i] = '\0';
   47   return total;
   48 }
   49 
   50 
   51 /**
   52  * i18n_cat_ascii_upper:
   53  * Appends the ascii [english] upper case characters to the destination array.
   54  *
   55  * @dest   Destination int array to copy into
   56  *
   57  * @return Count of characters appended
   58  */
   59 unsigned int i18n_cat_ascii_upper(unsigned int* dest) {
   60   return intrcat(dest, 65, 90);
   61 }
   62 
   63 
   64 /**
   65  * i18n_cat_ascii_lower:
   66  * Appends the ascii [english] upper case characters to the destination array.
   67  *
   68  * @dest   Destination int array to copy into
   69  *
   70  * @return Count of characters appended
   71  */
   72 unsigned int i18n_cat_ascii_lower(unsigned int* dest) {
   73   return intrcat(dest, 97, 122);
   74 }
   75 
   76 
   77 /**
   78  * i18n_cat_ascii_numerals:
   79  * Appends the ascii numerals to the dest string.
   80  *
   81  * @dest   Destination int array to copy into
   82  *
   83  * @return Count of characters appended
   84  */
   85 unsigned int i18n_cat_ascii_numerals(unsigned int* dest) {
   86   return intrcat(dest, 48, 57);
   87 }
   88 
   89 
   90 /**
   91  * i18n_cat_ascii_symbols:
   92  * Appends the ascii symbols to the dest string.
   93  *
   94  * @dest   Destination int array to copy into
   95  *
   96  * @return Count of characters appended
   97  */
   98 unsigned int i18n_cat_ascii_symbols(unsigned int* dest) {
   99   unsigned int count = 0;
  100   count += intrcat(dest, 33, 47);   // English symbols ! - /
  101   count += intrcat(dest, 58, 64);   // English symbols : - @
  102   count += intrcat(dest, 91, 96);   // English symbols [ - `
  103   count += intrcat(dest, 123, 126); // English symbols { - ~
  104   return count;
  105 }
  106 
  107 
  108 /**
  109  * i18n_cat_ascii:
  110  * Appends the entire ascii printable characters (without the space at dec 32)
  111  * range to the dest string. This includes English numerals, upper, lower, and
  112  * symbols.
  113  *
  114  * @dest   Destination int array to copy into
  115  *
  116  * @return Count of characters appended
  117  */
  118 unsigned int i18n_cat_ascii(unsigned int* dest) {
  119   return intrcat(dest, 0x0021, 0x007E);
  120 }
  121 
  122 
  123 /**
  124  * i18n_cat_one:
  125  * Appends the first group of unicode characters. This group covers some of the
  126  * most common languages in the world, which use the latin script. This also
  127  * includes the IPA extension characters.
  128  *
  129  * @dest   Destination int array to copy into
  130  *
  131  * @return Count of characters appended
  132  */
  133 unsigned int i18n_cat_one(unsigned int* dest) {
  134   unsigned int count = 0;
  135   count += intrcat(dest, 0x00A1, 0x00FF); // Latin-1 Suppliment
  136   count += intrcat(dest, 0x0100, 0x017F); // Latin extended A
  137   count += intrcat(dest, 0x0180, 0x024F); // Latin extended B
  138   count += intrcat(dest, 0x0250, 0x02AF); // IPA Extensions
  139   return count;
  140 }
  141 
  142 
  143 /**
  144  * i18n_cat_two:
  145  * Appends the second most popular group of unicode characters. This group
  146  * attempts to cover scripts used by the second most common languages in the
  147  * world. In this case, this includes the Devanagari (Hindi, Sanskrit), Hebrew,
  148  * Arabic, and Cyrillic blocks.
  149  *
  150  * @dest   Destination int array to copy into
  151  *
  152  * @return Count of characters appended
  153  */
  154 unsigned int i18n_cat_two(unsigned int* dest) {
  155   unsigned int count = 0;
  156 
  157   // Devanagari (Sanskrit, Hindi, Marathi, Sindhi, Nepali, etc)
  158   // This does not include vowels, as those are character modifiers that do not
  159   // work with other character sets.
  160   count += intrcat(dest, 0x0904, 0x0939);
  161   count += intrcat(dest, 0x0958, 0x0961);
  162   count += intrcat(dest, 0x0964, 0x096F);
  163   count += intrcat(dest, 0x0972, 0x097F);
  164 
  165   // Only includes Hebrew consonants, since vowel marks require modification of
  166   // a previous character, which doesn't work when combined with other scripts.
  167   count += intrcat(dest, 0x05D0, 0x05EA); // Hebrew
  168   count += intrcat(dest, 0x05F0, 0x05F4); // Hebrew
  169 
  170   // Arabic (only consonants)
  171   count += intrcat(dest, 0x061E, 0x06FF);
  172 
  173   // Cyrillic and Cyrillic suppliment
  174   count += intrcat(dest, 0x0400, 0x04F0);
  175   count += intrcat(dest, 0x0500, 0x052F);
  176   return count;
  177 }
  178 
  179 
  180 /**
  181  * i18n_cat_three:
  182  * Appends the third most popular group of unicode characters. This group
  183  * attempts to cover scripts used by the third most common languages in the
  184  * world. In this case, this includes the Armenian, Bengali, Greek, and Coptic
  185  * blocks.
  186  *
  187  * NOTE: Using this function will likely introduce characters for which your
  188  *       font does not have glyphs.
  189  *
  190  * @dest   Destination int array to copy into
  191  *
  192  * @return Count of characters appended
  193  */
  194 unsigned int i18n_cat_three(unsigned int* dest) {
  195   unsigned int count = 0;
  196 
  197   // Armenian
  198   // Armenian has a gap at 0x0557, 0x0558, 0x058B, and 0x058C
  199   count += intrcat(dest, 0x0531, 0x0556);
  200   count += intrcat(dest, 0x0559, 0x058A);
  201 
  202   // Bengali
  203   count += intrcat(dest, 0x0985, 0x098C);
  204   count += intrcat(dest, 0x098F, 0x0990);
  205   count += intrcat(dest, 0x0993, 0x09A8);
  206   count += intrcat(dest, 0x09AA, 0x09B0);
  207   count += intrcat(dest, 0x09B2, 0x09B2);
  208   count += intrcat(dest, 0x09B6, 0x09B9);
  209   count += intrcat(dest, 0x09B6, 0x09B9);
  210   count += intrcat(dest, 0x09DC, 0x09DD);
  211   count += intrcat(dest, 0x09DF, 0x09D3);
  212   count += intrcat(dest, 0x09D6, 0x09DC);
  213 
  214   // Greek and Coptic
  215   // These are complicated because they are missing assigned values for
  216   // 0x0378, 0x0379, 0x0380, 0x0381, 0x0382, 0x0383, 0x038B, 0x038D, 0x03A2
  217   count += intrcat(dest, 0x0370, 0x0377);
  218   count += intrcat(dest, 0x037A, 0x037F);
  219   count += intrcat(dest, 0x0384, 0x038A);
  220   count += intrcat(dest, 0x038C, 0x038C);
  221   count += intrcat(dest, 0x038C, 0x038C);
  222   count += intrcat(dest, 0x038E, 0x03A1);
  223   count += intrcat(dest, 0x03A3, 0x03FF);
  224 
  225   return count;
  226 }
  227 
  228 
  229 /**
  230  * i18n_cat_four:
  231  * Appends the forth most popular group of unicode characters. This group
  232  * attempts to cover scripts used by the forth most common languages in the
  233  * world. In this case, this includes the Thaana, NKo, Samaritan, Mandaic,
  234  * Syriac, Runic, Tifinagh, and Georgian blocks.
  235  *
  236  * NOTE: Using this function will likely introduce characters for which your
  237  *       font does not have glyphs.
  238  *
  239  * @dest   Destination int array to copy into
  240  *
  241  * @return Count of characters appended
  242  */
  243 unsigned int i18n_cat_four(unsigned int* dest) {
  244   unsigned int count = 0;
  245 
  246   count += intrcat(dest, 0x0780, 0x07A5); // Thaana
  247   count += intrcat(dest, 0x07C0, 0x07EA); // NKo
  248   count += intrcat(dest, 0x0800, 0x0815); // Samaritan
  249   count += intrcat(dest, 0x0830, 0x083E); // Samaritan
  250   count += intrcat(dest, 0x0840, 0x085B); // Mandaic
  251 
  252   count += intrcat(dest, 0x0710, 0x072F); // Syriac
  253   count += intrcat(dest, 0x074D, 0x074F); // Syriac
  254 
  255   count += intrcat(dest, 0x16A0, 0x16F8); // Runic
  256   count += intrcat(dest, 0x2D30, 0x2D67); // Tifinagh
  257 
  258   // Georgian
  259   count += intrcat(dest, 0x10A0, 0x10C5);
  260   count += intrcat(dest, 0x10C7, 0x10C7);
  261   count += intrcat(dest, 0x10CD, 0x10CD);
  262   count += intrcat(dest, 0x10D0, 0x10FF);
  263 
  264   return count;
  265 }
  266 
  267 
  268 /**
  269  * print_intl_arr:
  270  * Prints array containing unsigned ints representing internal characters.
  271  * Outputs to STDOUT the unicode decimal, followed by the unicode character.
  272  *
  273  * @arr Unicode array to print
  274  */
  275 void i18n_dump_arr(unsigned int* arr) {
  276   int i = 0; // cursor
  277 
  278   printf("Hex      Decimal   [Character]\n");
  279   while(arr[i] != '\0') {
  280     printf("0x%04x   %-7d   [%lc]\n", arr[i], arr[i], arr[i]);
  281     i++;
  282   }
  283   printf("\nCount: %d\n\n", i);
  284 }

Generated by cgit