1 /**
2 * upwgen generates random internationalized passwords
3 * Copyright (C) 2019 Aaron Ball <nullspoon@oper.io>
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18 #include "i18n_cat.h"
19
20
21 /**
22 * intrcat:
23 * Integer range concatenate. Appends the specified integer range to an int
24 * array.
25 *
26 * @arr Array to cat range of ints to
27 * @rstart Range start integer
28 * @rend Range end integer
29 *
30 * @return Number of integers appended to array
31 */
32 unsigned int intrcat(unsigned int* arr, unsigned int rstart, unsigned int rend) {
33 int i = 0;
34 int total = rend - rstart; // Calculate our return count
35
36 while(arr[i] != '\0')
37 i++;
38
39 while(rstart <= rend) {
40 arr[i] = rstart;
41 //printf("% -4d % -7d %lc\n", i, rstart, rstart);
42 rstart++;
43 i++;
44 }
45
46 arr[i] = '\0';
47 return total;
48 }
49
50
51 /**
52 * i18n_cat_ascii_upper:
53 * Appends the ascii [english] upper case characters to the destination array.
54 *
55 * @dest Destination int array to copy into
56 *
57 * @return Count of characters appended
58 */
59 unsigned int i18n_cat_ascii_upper(unsigned int* dest) {
60 return intrcat(dest, 65, 90);
61 }
62
63
64 /**
65 * i18n_cat_ascii_lower:
66 * Appends the ascii [english] upper case characters to the destination array.
67 *
68 * @dest Destination int array to copy into
69 *
70 * @return Count of characters appended
71 */
72 unsigned int i18n_cat_ascii_lower(unsigned int* dest) {
73 return intrcat(dest, 97, 122);
74 }
75
76
77 /**
78 * i18n_cat_ascii_numerals:
79 * Appends the ascii numerals to the dest string.
80 *
81 * @dest Destination int array to copy into
82 *
83 * @return Count of characters appended
84 */
85 unsigned int i18n_cat_ascii_numerals(unsigned int* dest) {
86 return intrcat(dest, 48, 57);
87 }
88
89
90 /**
91 * i18n_cat_ascii_symbols:
92 * Appends the ascii symbols to the dest string.
93 *
94 * @dest Destination int array to copy into
95 *
96 * @return Count of characters appended
97 */
98 unsigned int i18n_cat_ascii_symbols(unsigned int* dest) {
99 unsigned int count = 0;
100 count += intrcat(dest, 33, 47); // English symbols ! - /
101 count += intrcat(dest, 58, 64); // English symbols : - @
102 count += intrcat(dest, 91, 96); // English symbols [ - `
103 count += intrcat(dest, 123, 126); // English symbols { - ~
104 return count;
105 }
106
107
108 /**
109 * i18n_cat_ascii:
110 * Appends the entire ascii printable characters (without the space at dec 32)
111 * range to the dest string. This includes English numerals, upper, lower, and
112 * symbols.
113 *
114 * @dest Destination int array to copy into
115 *
116 * @return Count of characters appended
117 */
118 unsigned int i18n_cat_ascii(unsigned int* dest) {
119 return intrcat(dest, 0x0021, 0x007E);
120 }
121
122
123 /**
124 * i18n_cat_one:
125 * Appends the first group of unicode characters. This group covers some of the
126 * most common languages in the world, which use the latin script. This also
127 * includes the IPA extension characters.
128 *
129 * @dest Destination int array to copy into
130 *
131 * @return Count of characters appended
132 */
133 unsigned int i18n_cat_one(unsigned int* dest) {
134 unsigned int count = 0;
135 count += intrcat(dest, 0x00A1, 0x00FF); // Latin-1 Suppliment
136 count += intrcat(dest, 0x0100, 0x017F); // Latin extended A
137 count += intrcat(dest, 0x0180, 0x024F); // Latin extended B
138 count += intrcat(dest, 0x0250, 0x02AF); // IPA Extensions
139 return count;
140 }
141
142
143 /**
144 * i18n_cat_two:
145 * Appends the second most popular group of unicode characters. This group
146 * attempts to cover scripts used by the second most common languages in the
147 * world. In this case, this includes the Devanagari (Hindi, Sanskrit), Hebrew,
148 * Arabic, and Cyrillic blocks.
149 *
150 * @dest Destination int array to copy into
151 *
152 * @return Count of characters appended
153 */
154 unsigned int i18n_cat_two(unsigned int* dest) {
155 unsigned int count = 0;
156
157 // Devanagari (Sanskrit, Hindi, Marathi, Sindhi, Nepali, etc)
158 // This does not include vowels, as those are character modifiers that do not
159 // work with other character sets.
160 count += intrcat(dest, 0x0904, 0x0939);
161 count += intrcat(dest, 0x0958, 0x0961);
162 count += intrcat(dest, 0x0964, 0x096F);
163 count += intrcat(dest, 0x0972, 0x097F);
164
165 // Only includes Hebrew consonants, since vowel marks require modification of
166 // a previous character, which doesn't work when combined with other scripts.
167 count += intrcat(dest, 0x05D0, 0x05EA); // Hebrew
168 count += intrcat(dest, 0x05F0, 0x05F4); // Hebrew
169
170 // Arabic (only consonants)
171 count += intrcat(dest, 0x061E, 0x06FF);
172
173 // Cyrillic and Cyrillic suppliment
174 count += intrcat(dest, 0x0400, 0x04F0);
175 count += intrcat(dest, 0x0500, 0x052F);
176 return count;
177 }
178
179
180 /**
181 * i18n_cat_three:
182 * Appends the third most popular group of unicode characters. This group
183 * attempts to cover scripts used by the third most common languages in the
184 * world. In this case, this includes the Armenian, Bengali, Greek, and Coptic
185 * blocks.
186 *
187 * NOTE: Using this function will likely introduce characters for which your
188 * font does not have glyphs.
189 *
190 * @dest Destination int array to copy into
191 *
192 * @return Count of characters appended
193 */
194 unsigned int i18n_cat_three(unsigned int* dest) {
195 unsigned int count = 0;
196
197 // Armenian
198 // Armenian has a gap at 0x0557, 0x0558, 0x058B, and 0x058C
199 count += intrcat(dest, 0x0531, 0x0556);
200 count += intrcat(dest, 0x0559, 0x058A);
201
202 // Bengali
203 count += intrcat(dest, 0x0985, 0x098C);
204 count += intrcat(dest, 0x098F, 0x0990);
205 count += intrcat(dest, 0x0993, 0x09A8);
206 count += intrcat(dest, 0x09AA, 0x09B0);
207 count += intrcat(dest, 0x09B2, 0x09B2);
208 count += intrcat(dest, 0x09B6, 0x09B9);
209 count += intrcat(dest, 0x09B6, 0x09B9);
210 count += intrcat(dest, 0x09DC, 0x09DD);
211 count += intrcat(dest, 0x09DF, 0x09D3);
212 count += intrcat(dest, 0x09D6, 0x09DC);
213
214 // Greek and Coptic
215 // These are complicated because they are missing assigned values for
216 // 0x0378, 0x0379, 0x0380, 0x0381, 0x0382, 0x0383, 0x038B, 0x038D, 0x03A2
217 count += intrcat(dest, 0x0370, 0x0377);
218 count += intrcat(dest, 0x037A, 0x037F);
219 count += intrcat(dest, 0x0384, 0x038A);
220 count += intrcat(dest, 0x038C, 0x038C);
221 count += intrcat(dest, 0x038C, 0x038C);
222 count += intrcat(dest, 0x038E, 0x03A1);
223 count += intrcat(dest, 0x03A3, 0x03FF);
224
225 return count;
226 }
227
228
229 /**
230 * i18n_cat_four:
231 * Appends the forth most popular group of unicode characters. This group
232 * attempts to cover scripts used by the forth most common languages in the
233 * world. In this case, this includes the Thaana, NKo, Samaritan, Mandaic,
234 * Syriac, Runic, Tifinagh, and Georgian blocks.
235 *
236 * NOTE: Using this function will likely introduce characters for which your
237 * font does not have glyphs.
238 *
239 * @dest Destination int array to copy into
240 *
241 * @return Count of characters appended
242 */
243 unsigned int i18n_cat_four(unsigned int* dest) {
244 unsigned int count = 0;
245
246 count += intrcat(dest, 0x0780, 0x07A5); // Thaana
247 count += intrcat(dest, 0x07C0, 0x07EA); // NKo
248 count += intrcat(dest, 0x0800, 0x0815); // Samaritan
249 count += intrcat(dest, 0x0830, 0x083E); // Samaritan
250 count += intrcat(dest, 0x0840, 0x085B); // Mandaic
251
252 count += intrcat(dest, 0x0710, 0x072F); // Syriac
253 count += intrcat(dest, 0x074D, 0x074F); // Syriac
254
255 count += intrcat(dest, 0x16A0, 0x16F8); // Runic
256 count += intrcat(dest, 0x2D30, 0x2D67); // Tifinagh
257
258 // Georgian
259 count += intrcat(dest, 0x10A0, 0x10C5);
260 count += intrcat(dest, 0x10C7, 0x10C7);
261 count += intrcat(dest, 0x10CD, 0x10CD);
262 count += intrcat(dest, 0x10D0, 0x10FF);
263
264 return count;
265 }
266
267
268 /**
269 * print_intl_arr:
270 * Prints array containing unsigned ints representing internal characters.
271 * Outputs to STDOUT the unicode decimal, followed by the unicode character.
272 *
273 * @arr Unicode array to print
274 */
275 void i18n_dump_arr(unsigned int* arr) {
276 int i = 0; // cursor
277
278 printf("Hex Decimal [Character]\n");
279 while(arr[i] != '\0') {
280 printf("0x%04x %-7d [%lc]\n", arr[i], arr[i], arr[i]);
281 i++;
282 }
283 printf("\nCount: %d\n\n", i);
284 }
|