4 files changed, 608 insertions, 0 deletions
diff --git a/quantum/unicode/unicode.c b/quantum/unicode/unicode.c
new file mode 100644
index 0000000000..f9f429e7af
--- /dev/null
+++ b/quantum/unicode/unicode.c
@@ -0,0 +1,376 @@
+/* Copyright 2022
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "unicode.h"
+
+#include "eeprom.h"
+#include "eeconfig.h"
+#include "action.h"
+#include "action_util.h"
+#include "host.h"
+#include "keycode.h"
+#include "wait.h"
+#include "audio.h"
+#include "send_string.h"
+#include "utf8.h"
+
+#if defined(UNICODE_ENABLE) + defined(UNICODEMAP_ENABLE) + defined(UCIS_ENABLE) > 1
+#    error "Cannot enable more than one Unicode method (UNICODE, UNICODEMAP, UCIS) at the same time"
+#endif
+
+// Keycodes used for starting Unicode input on different platforms
+#ifndef UNICODE_KEY_MAC
+#    define UNICODE_KEY_MAC KC_LEFT_ALT
+#endif
+#ifndef UNICODE_KEY_LNX
+#    define UNICODE_KEY_LNX LCTL(LSFT(KC_U))
+#endif
+#ifndef UNICODE_KEY_WINC
+#    define UNICODE_KEY_WINC KC_RIGHT_ALT
+#endif
+
+// Comma-delimited, ordered list of input modes selected for use (e.g. in cycle)
+// Example: #define UNICODE_SELECTED_MODES UC_WINC, UC_LNX
+#ifndef UNICODE_SELECTED_MODES
+#    define UNICODE_SELECTED_MODES -1
+#endif
+
+// Whether input mode changes in cycle should be written to EEPROM
+#ifndef UNICODE_CYCLE_PERSIST
+#    define UNICODE_CYCLE_PERSIST true
+#endif
+
+// Delay between starting Unicode input and sending a sequence, in ms
+#ifndef UNICODE_TYPE_DELAY
+#    define UNICODE_TYPE_DELAY 10
+#endif
+
+unicode_config_t unicode_config;
+uint8_t          unicode_saved_mods;
+led_t            unicode_saved_led_state;
+
+#if UNICODE_SELECTED_MODES != -1
+static uint8_t selected[]     = {UNICODE_SELECTED_MODES};
+static int8_t  selected_count = ARRAY_SIZE(selected);
+static int8_t  selected_index;
+#endif
+
+/** \brief unicode input mode set at user level
+ *
+ * Run user code on unicode input mode change
+ */
+__attribute__((weak)) void unicode_input_mode_set_user(uint8_t input_mode) {}
+
+/** \brief unicode input mode set at keyboard level
+ *
+ *  Run keyboard code on unicode input mode change
+ */
+__attribute__((weak)) void unicode_input_mode_set_kb(uint8_t input_mode) {
+    unicode_input_mode_set_user(input_mode);
+}
+
+#ifdef AUDIO_ENABLE
+#    ifdef UNICODE_SONG_MAC
+static float song_mac[][2] = UNICODE_SONG_MAC;
+#    endif
+#    ifdef UNICODE_SONG_LNX
+static float song_lnx[][2] = UNICODE_SONG_LNX;
+#    endif
+#    ifdef UNICODE_SONG_WIN
+static float song_win[][2] = UNICODE_SONG_WIN;
+#    endif
+#    ifdef UNICODE_SONG_BSD
+static float song_bsd[][2] = UNICODE_SONG_BSD;
+#    endif
+#    ifdef UNICODE_SONG_WINC
+static float song_winc[][2] = UNICODE_SONG_WINC;
+#    endif
+#    ifdef UNICODE_SONG_EMACS
+static float song_emacs[][2] = UNICODE_SONG_EMACS;
+#    endif
+
+static void unicode_play_song(uint8_t mode) {
+    switch (mode) {
+#    ifdef UNICODE_SONG_MAC
+        case UC_MAC:
+            PLAY_SONG(song_mac);
+            break;
+#    endif
+#    ifdef UNICODE_SONG_LNX
+        case UC_LNX:
+            PLAY_SONG(song_lnx);
+            break;
+#    endif
+#    ifdef UNICODE_SONG_WIN
+        case UC_WIN:
+            PLAY_SONG(song_win);
+            break;
+#    endif
+#    ifdef UNICODE_SONG_BSD
+        case UC_BSD:
+            PLAY_SONG(song_bsd);
+            break;
+#    endif
+#    ifdef UNICODE_SONG_WINC
+        case UC_WINC:
+            PLAY_SONG(song_winc);
+            break;
+#    endif
+#    ifdef UNICODE_SONG_EMACS
+        case UC_EMACS:
+            PLAY_SONG(song_emacs);
+            break;
+#    endif
+    }
+}
+#endif
+
+void unicode_input_mode_init(void) {
+    unicode_config.raw = eeprom_read_byte(EECONFIG_UNICODEMODE);
+#if UNICODE_SELECTED_MODES != -1
+#    if UNICODE_CYCLE_PERSIST
+    // Find input_mode in selected modes
+    int8_t i;
+    for (i = 0; i < selected_count; i++) {
+        if (selected[i] == unicode_config.input_mode) {
+            selected_index = i;
+            break;
+        }
+    }
+    if (i == selected_count) {
+        // Not found: input_mode isn't selected, change to one that is
+        unicode_config.input_mode = selected[selected_index = 0];
+    }
+#    else
+    // Always change to the first selected input mode
+    unicode_config.input_mode = selected[selected_index = 0];
+#    endif
+#endif
+    unicode_input_mode_set_kb(unicode_config.input_mode);
+    dprintf("Unicode input mode init to: %u\n", unicode_config.input_mode);
+}
+
+uint8_t get_unicode_input_mode(void) {
+    return unicode_config.input_mode;
+}
+
+void set_unicode_input_mode(uint8_t mode) {
+    unicode_config.input_mode = mode;
+    persist_unicode_input_mode();
+#ifdef AUDIO_ENABLE
+    unicode_play_song(mode);
+#endif
+    unicode_input_mode_set_kb(mode);
+    dprintf("Unicode input mode set to: %u\n", unicode_config.input_mode);
+}
+
+void cycle_unicode_input_mode(int8_t offset) {
+#if UNICODE_SELECTED_MODES != -1
+    selected_index = (selected_index + offset) % selected_count;
+    if (selected_index < 0) {
+        selected_index += selected_count;
+    }
+    unicode_config.input_mode = selected[selected_index];
+#    if UNICODE_CYCLE_PERSIST
+    persist_unicode_input_mode();
+#    endif
+#    ifdef AUDIO_ENABLE
+    unicode_play_song(unicode_config.input_mode);
+#    endif
+    unicode_input_mode_set_kb(unicode_config.input_mode);
+    dprintf("Unicode input mode cycle to: %u\n", unicode_config.input_mode);
+#endif
+}
+
+void persist_unicode_input_mode(void) {
+    eeprom_update_byte(EECONFIG_UNICODEMODE, unicode_config.input_mode);
+}
+
+__attribute__((weak)) void unicode_input_start(void) {
+    unicode_saved_led_state = host_keyboard_led_state();
+
+    // Note the order matters here!
+    // Need to do this before we mess around with the mods, or else
+    // UNICODE_KEY_LNX (which is usually Ctrl-Shift-U) might not work
+    // correctly in the shifted case.
+    if (unicode_config.input_mode == UC_LNX && unicode_saved_led_state.caps_lock) {
+        tap_code(KC_CAPS_LOCK);
+    }
+
+    unicode_saved_mods = get_mods(); // Save current mods
+    clear_mods();                    // Unregister mods to start from a clean state
+    clear_weak_mods();
+
+    switch (unicode_config.input_mode) {
+        case UC_MAC:
+            register_code(UNICODE_KEY_MAC);
+            break;
+        case UC_LNX:
+            tap_code16(UNICODE_KEY_LNX);
+            break;
+        case UC_WIN:
+            // For increased reliability, use numpad keys for inputting digits
+            if (!unicode_saved_led_state.num_lock) {
+                tap_code(KC_NUM_LOCK);
+            }
+            register_code(KC_LEFT_ALT);
+            wait_ms(UNICODE_TYPE_DELAY);
+            tap_code(KC_KP_PLUS);
+            break;
+        case UC_WINC:
+            tap_code(UNICODE_KEY_WINC);
+            tap_code(KC_U);
+            break;
+        case UC_EMACS:
+            // The usual way to type unicode in emacs is C-x-8 <RET> then the unicode number in hex
+            tap_code16(LCTL(KC_X));
+            tap_code16(KC_8);
+            tap_code16(KC_ENTER);
+            break;
+    }
+
+    wait_ms(UNICODE_TYPE_DELAY);
+}
+
+__attribute__((weak)) void unicode_input_finish(void) {
+    switch (unicode_config.input_mode) {
+        case UC_MAC:
+            unregister_code(UNICODE_KEY_MAC);
+            break;
+        case UC_LNX:
+            tap_code(KC_SPACE);
+            if (unicode_saved_led_state.caps_lock) {
+                tap_code(KC_CAPS_LOCK);
+            }
+            break;
+        case UC_WIN:
+            unregister_code(KC_LEFT_ALT);
+            if (!unicode_saved_led_state.num_lock) {
+                tap_code(KC_NUM_LOCK);
+            }
+            break;
+        case UC_WINC:
+            tap_code(KC_ENTER);
+            break;
+        case UC_EMACS:
+            tap_code16(KC_ENTER);
+            break;
+    }
+
+    set_mods(unicode_saved_mods); // Reregister previously set mods
+}
+
+__attribute__((weak)) void unicode_input_cancel(void) {
+    switch (unicode_config.input_mode) {
+        case UC_MAC:
+            unregister_code(UNICODE_KEY_MAC);
+            break;
+        case UC_LNX:
+            tap_code(KC_ESCAPE);
+            if (unicode_saved_led_state.caps_lock) {
+                tap_code(KC_CAPS_LOCK);
+            }
+            break;
+        case UC_WINC:
+            tap_code(KC_ESCAPE);
+            break;
+        case UC_WIN:
+            unregister_code(KC_LEFT_ALT);
+            if (!unicode_saved_led_state.num_lock) {
+                tap_code(KC_NUM_LOCK);
+            }
+            break;
+        case UC_EMACS:
+            tap_code16(LCTL(KC_G)); // C-g cancels
+            break;
+    }
+
+    set_mods(unicode_saved_mods); // Reregister previously set mods
+}
+
+// clang-format off
+
+static void send_nibble_wrapper(uint8_t digit) {
+    if (unicode_config.input_mode == UC_WIN) {
+        uint8_t kc = digit < 10
+                   ? KC_KP_1 + (10 + digit - 1) % 10
+                   : KC_A + (digit - 10);
+        tap_code(kc);
+        return;
+    }
+    send_nibble(digit);
+}
+
+// clang-format on
+
+void register_hex(uint16_t hex) {
+    for (int i = 3; i >= 0; i--) {
+        uint8_t digit = ((hex >> (i * 4)) & 0xF);
+        send_nibble_wrapper(digit);
+    }
+}
+
+void register_hex32(uint32_t hex) {
+    bool onzerostart = true;
+    for (int i = 7; i >= 0; i--) {
+        if (i <= 3) {
+            onzerostart = false;
+        }
+        uint8_t digit = ((hex >> (i * 4)) & 0xF);
+        if (digit == 0) {
+            if (!onzerostart) {
+                send_nibble_wrapper(digit);
+            }
+        } else {
+            send_nibble_wrapper(digit);
+            onzerostart = false;
+        }
+    }
+}
+
+void register_unicode(uint32_t code_point) {
+    if (code_point > 0x10FFFF || (code_point > 0xFFFF && unicode_config.input_mode == UC_WIN)) {
+        // Code point out of range, do nothing
+        return;
+    }
+
+    unicode_input_start();
+    if (code_point > 0xFFFF && unicode_config.input_mode == UC_MAC) {
+        // Convert code point to UTF-16 surrogate pair on macOS
+        code_point -= 0x10000;
+        uint32_t lo = code_point & 0x3FF, hi = (code_point & 0xFFC00) >> 10;
+        register_hex32(hi + 0xD800);
+        register_hex32(lo + 0xDC00);
+    } else {
+        register_hex32(code_point);
+    }
+    unicode_input_finish();
+}
+
+void send_unicode_string(const char *str) {
+    if (!str) {
+        return;
+    }
+
+    while (*str) {
+        int32_t code_point = 0;
+        str                = decode_utf8(str, &code_point);
+
+        if (code_point >= 0) {
+            register_unicode(code_point);
+        }
+    }
+}
diff --git a/quantum/unicode/unicode.h b/quantum/unicode/unicode.h
new file mode 100644
index 0000000000..b3e43799ff
--- /dev/null
+++ b/quantum/unicode/unicode.h
@@ -0,0 +1,165 @@
+/* Copyright 2022
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+#include "quantum.h"
+
+typedef union {
+    uint32_t raw;
+    struct {
+        uint8_t input_mode : 8;
+    };
+} unicode_config_t;
+
+extern unicode_config_t unicode_config;
+
+enum unicode_input_modes {
+    UC_MAC,   // macOS using Unicode Hex Input
+    UC_LNX,   // Linux using IBus
+    UC_WIN,   // Windows using EnableHexNumpad
+    UC_BSD,   // BSD (not implemented)
+    UC_WINC,  // Windows using WinCompose (https://github.com/samhocevar/wincompose)
+    UC_EMACS, // Emacs is an operating system in search of a good text editor
+    UC__COUNT // Number of available input modes (always leave at the end)
+};
+
+void    unicode_input_mode_init(void);
+uint8_t get_unicode_input_mode(void);
+void    set_unicode_input_mode(uint8_t mode);
+void    cycle_unicode_input_mode(int8_t offset);
+void    persist_unicode_input_mode(void);
+
+void unicode_input_mode_set_user(uint8_t input_mode);
+void unicode_input_mode_set_kb(uint8_t input_mode);
+
+void unicode_input_start(void);
+void unicode_input_finish(void);
+void unicode_input_cancel(void);
+
+void register_hex(uint16_t hex);
+void register_hex32(uint32_t hex);
+void register_unicode(uint32_t code_point);
+
+void send_unicode_string(const char *str);
+
+// clang-format off
+
+#define UC_BSPC UC(0x0008) // (backspace)
+
+#define UC_SPC  UC(0x0020) // (space)
+#define UC_EXLM UC(0x0021) // !
+#define UC_DQUT UC(0x0022) // "
+#define UC_HASH UC(0x0023) // #
+#define UC_DLR  UC(0x0024) // $
+#define UC_PERC UC(0x0025) // %
+#define UC_AMPR UC(0x0026) // &
+#define UC_QUOT UC(0x0027) // '
+#define UC_LPRN UC(0x0028) // (
+#define UC_RPRN UC(0x0029) // )
+#define UC_ASTR UC(0x002A) // *
+#define UC_PLUS UC(0x002B) // +
+#define UC_COMM UC(0x002C) // ,
+#define UC_DASH UC(0x002D) // -
+#define UC_DOT  UC(0x002E) // .
+#define UC_SLSH UC(0x002F) // /
+
+#define UC_0    UC(0x0030) // 0
+#define UC_1    UC(0x0031) // 1
+#define UC_2    UC(0x0032) // 2
+#define UC_3    UC(0x0033) // 3
+#define UC_4    UC(0x0034) // 4
+#define UC_5    UC(0x0035) // 5
+#define UC_6    UC(0x0036) // 6
+#define UC_7    UC(0x0037) // 7
+#define UC_8    UC(0x0038) // 8
+#define UC_9    UC(0x0039) // 9
+#define UC_COLN UC(0x003A) // :
+#define UC_SCLN UC(0x003B) // ;
+#define UC_LT   UC(0x003C) // <
+#define UC_EQL  UC(0x003D) // =
+#define UC_GT   UC(0x003E) // >
+#define UC_QUES UC(0x003F) // ?
+
+#define UC_AT   UC(0x0040) // @
+#define UC_A    UC(0x0041) // A
+#define UC_B    UC(0x0042) // B
+#define UC_C    UC(0x0043) // C
+#define UC_D    UC(0x0044) // D
+#define UC_E    UC(0x0045) // E
+#define UC_F    UC(0x0046) // F
+#define UC_G    UC(0x0047) // G
+#define UC_H    UC(0x0048) // H
+#define UC_I    UC(0x0049) // I
+#define UC_J    UC(0x004A) // J
+#define UC_K    UC(0x004B) // K
+#define UC_L    UC(0x004C) // L
+#define UC_M    UC(0x004D) // M
+#define UC_N    UC(0x004E) // N
+#define UC_O    UC(0x004F) // O
+
+#define UC_P    UC(0x0050) // P
+#define UC_Q    UC(0x0051) // Q
+#define UC_R    UC(0x0052) // R
+#define UC_S    UC(0x0053) // S
+#define UC_T    UC(0x0054) // T
+#define UC_U    UC(0x0055) // U
+#define UC_V    UC(0x0056) // V
+#define UC_W    UC(0x0057) // W
+#define UC_X    UC(0x0058) // X
+#define UC_Y    UC(0x0059) // Y
+#define UC_Z    UC(0x005A) // Z
+#define UC_LBRC UC(0x005B) // [
+#define UC_BSLS UC(0x005C) // (backslash)
+#define UC_RBRC UC(0x005D) // ]
+#define UC_CIRM UC(0x005E) // ^
+#define UC_UNDR UC(0x005F) // _
+
+#define UC_GRV  UC(0x0060) // `
+#define UC_a    UC(0x0061) // a
+#define UC_b    UC(0x0062) // b
+#define UC_c    UC(0x0063) // c
+#define UC_d    UC(0x0064) // d
+#define UC_e    UC(0x0065) // e
+#define UC_f    UC(0x0066) // f
+#define UC_g    UC(0x0067) // g
+#define UC_h    UC(0x0068) // h
+#define UC_i    UC(0x0069) // i
+#define UC_j    UC(0x006A) // j
+#define UC_k    UC(0x006B) // k
+#define UC_l    UC(0x006C) // l
+#define UC_m    UC(0x006D) // m
+#define UC_n    UC(0x006E) // n
+#define UC_o    UC(0x006F) // o
+
+#define UC_p    UC(0x0070) // p
+#define UC_q    UC(0x0071) // q
+#define UC_r    UC(0x0072) // r
+#define UC_s    UC(0x0073) // s
+#define UC_t    UC(0x0074) // t
+#define UC_u    UC(0x0075) // u
+#define UC_v    UC(0x0076) // v
+#define UC_w    UC(0x0077) // w
+#define UC_x    UC(0x0078) // x
+#define UC_y    UC(0x0079) // y
+#define UC_z    UC(0x007A) // z
+#define UC_LCBR UC(0x007B) // {
+#define UC_PIPE UC(0x007C) // |
+#define UC_RCBR UC(0x007D) // }
+#define UC_TILD UC(0x007E) // ~
+#define UC_DEL  UC(0x007F) // (delete)
diff --git a/quantum/unicode/utf8.c b/quantum/unicode/utf8.c
new file mode 100644
index 0000000000..4b2cd4d8d4
--- /dev/null
+++ b/quantum/unicode/utf8.c
@@ -0,0 +1,46 @@
+/* Copyright 2021 QMK
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "utf8.h"
+
+// Borrowed from https://nullprogram.com/blog/2017/10/06/
+const char *decode_utf8(const char *str, int32_t *code_point) {
+    const char *next;
+
+    if (str[0] < 0x80) { // U+0000-007F
+        *code_point = str[0];
+        next        = str + 1;
+    } else if ((str[0] & 0xE0) == 0xC0) { // U+0080-07FF
+        *code_point = ((int32_t)(str[0] & 0x1F) << 6) | ((int32_t)(str[1] & 0x3F) << 0);
+        next        = str + 2;
+    } else if ((str[0] & 0xF0) == 0xE0) { // U+0800-FFFF
+        *code_point = ((int32_t)(str[0] & 0x0F) << 12) | ((int32_t)(str[1] & 0x3F) << 6) | ((int32_t)(str[2] & 0x3F) << 0);
+        next        = str + 3;
+    } else if ((str[0] & 0xF8) == 0xF0 && (str[0] <= 0xF4)) { // U+10000-10FFFF
+        *code_point = ((int32_t)(str[0] & 0x07) << 18) | ((int32_t)(str[1] & 0x3F) << 12) | ((int32_t)(str[2] & 0x3F) << 6) | ((int32_t)(str[3] & 0x3F) << 0);
+        next        = str + 4;
+    } else {
+        *code_point = -1;
+        next        = str + 1;
+    }
+
+    // part of a UTF-16 surrogate pair - invalid
+    if (*code_point >= 0xD800 && *code_point <= 0xDFFF) {
+        *code_point = -1;
+    }
+
+    return next;
+}
diff --git a/quantum/unicode/utf8.h b/quantum/unicode/utf8.h
new file mode 100644
index 0000000000..521dd1918c
--- /dev/null
+++ b/quantum/unicode/utf8.h
@@ -0,0 +1,21 @@
+/* Copyright 2021 QMK
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+const char *decode_utf8(const char *str, int32_t *code_point);