Skip to content

Commit 3dd125e

Browse files
committed
UTF-8 discussion
1 parent a558348 commit 3dd125e

File tree

1 file changed

+23
-0
lines changed

1 file changed

+23
-0
lines changed

src/decode_aprs.c

+23
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,26 @@ void decode_aprs_print (decode_aprs_t *A) {
731731
* To be part of a valid UTF-8 sequence, it would need to be followed by 10xxxxxx.
732732
*/
733733

734+
// For values 00-7F, ASCII, Unicode, and ISO Latin-1 are all the same.
735+
// ISO Latin-1 adds 80-FF range with a few common symbols, such as degree, and
736+
// letters, with diacritical marks, for many European languages.
737+
// Unicode range 80-FF is called "Latin-1 Supplement." Exactly the same as ISO Latin-1.
738+
// For UTF-8, an additional byte is inserted.
739+
// Unicode UTF-8
740+
// ------- -----
741+
// 8x C2 8x Insert C2, keep original
742+
// 9x C2 9x "
743+
// Ax C2 Ax "
744+
// Bx C2 Bx "
745+
// Cx C3 8x Insert C3, subtract 40 from original
746+
// Dx C3 9x "
747+
// Ex C3 Ax "
748+
// Fx C3 Bx "
749+
//
750+
// Can we use this knowledge to provide guidance on other ISO Latin-1 characters besides degree?
751+
// Should we?
752+
// Reference: https://www.fileformat.info/info/unicode/utf8test.htm
753+
734754
if ( ! A->g_quiet) {
735755

736756
for (j=0; j<n; j++) {
@@ -3866,6 +3886,9 @@ static int data_extension_comment (decode_aprs_t *A, char *pdext)
38663886
strlcpy (A->g_directivity, dir[pdext[6]-'0'], sizeof(A->g_directivity));
38673887
}
38683888

3889+
// TODO: look for another 0-9 A-Z followed by a /
3890+
// http://www.aprs.org/aprs12/probes.txt
3891+
38693892
process_comment (A, pdext+7, -1);
38703893
return 1;
38713894
}

0 commit comments

Comments
 (0)