Add Text To Speech API (and hint mode)

This adds an excmd interface to the Web Speech API (TTS), which allows users to read text out and set voice and parameters via config options. Excmds: - ttsread: reads out the given text or element content (CSS selected) - ttsvoices: lists available voice names (can be used in the 'ttsvoice' option) - ttscontrol: stops the current reading (should also pause/resume, but that doesn't seem to work right now) Config options: - ttsvoice: the name of the voice to use - ttsrate: (0.1-10) - ttsvolume: (0-1) - ttspitch: (0-2) Also the ;r hint submode is added which reads the textcontent of the element with the configured voice
2025-03-06 01:51:40 -05:00 · 2017-11-30 04:11:49 +00:00 · 2017-11-30 04:11:49 +00:00 · c69aac0a53
commit c69aac0a53
parent 7334777714
5 changed files with 197 additions and 0 deletions
--- a/src/config.ts
+++ b/src/config.ts
@ -70,6 +70,7 @@ const DEFAULTS = o({
        ";I": "hint -I",
        ";y": "hint -y",
        ";p": "hint -p",
        ";r": "hint -r",
        ";;": "hint -;",
        ";#": "hint -#",
        "I": "mode ignore",
@ -80,6 +81,11 @@ const DEFAULTS = o({
    "storageloc": "sync",
    "hintchars": "hjklasdfgyuiopqwertnmzxcvb",
    "hintorder": "normal",
    "ttsvoice": "default",  // chosen from the listvoices list, or "default"
    "ttsvolume": 1,         // 0 to 1
    "ttsrate": 1,           // 0.1 to 10
    "ttspitch": 1,          // 0 to 2
 })
 // currently only supports 2D or 1D storage
--- a/src/excmds.ts
+++ b/src/excmds.ts
@ -1083,6 +1083,7 @@ import * as hinting from './hinting_background'
        - -b open in background
        - -y copy (yank) link's target to clipboard
        - -p copy an element's text to the clipboard
        - -r read an element's text with text-to-speech
        - -i view an image
        - -I view an image in a new tab
        - -; focus an element
@ -1104,6 +1105,7 @@ export function hint(option?: string, selectors="") {
    else if (option === "-;") hinting.hintFocus()
    else if (option === "-#") hinting.hintPageAnchorYank()
    else if (option === "-c") hinting.hintPageSimple(selectors)
    else if (option === "-r") hinting.hintRead()
    else hinting.hintPageSimple()
 }
@ -1129,6 +1131,92 @@ export async function gobble(nChars: number, endCmd: string) {
 // }}}
 //
 // {{{TEXT TO SPEECH
 import * as TTS from './text_to_speech'
 /**
 * Read text content of elements matching the given selector
 *
 * @param selector the selector to match elements
 */
 //#content_helper
 function tssReadFromCss(selector: string): void {
    let elems = DOM.getElemsBySelector(selector, [])
    elems.forEach(e=>{
        TTS.readText(e.textContent)
    })
 }
 /**
 * Read the given text using the browser's text to speech functionality and
 * the settings currently set
 *
 * @param mode      the command mode
 *                      -t read the following args as text
 *                      -c read the content of elements matching the selector
 */
 //#content
 export async function ttsread(mode: "-t" | "-c", ...args: string[]) {
    if (mode === "-t") {
        // really should quote args, but for now, join
        TTS.readText(args.join(" "))
    }
    else if (mode === "-c") {
        if (args.length > 0) {
            tssReadFromCss(args[0])
        } else {
            console.log("Error: no CSS selector supplied")
        }
    } else {
        console.log("Unknown mode for ttsread command: " + mode)
    }
 }
 /**
 * Show a list of the voices available to the TTS system. These can be
 * set in the config using `ttsvoice`
 */
 //#background
 export async function ttsvoices() {
    let voices = TTS.listVoices()
    // need a better way to show this to the user
    fillcmdline_notrail(voices.sort().join(", "))
 }
 /**
 * Cancel current reading and clear pending queue
 *
 * Arguments:
 *   - stop:    cancel current and pending utterances
 */
 //#content
 export async function ttscontrol(action: string) {
    let ttsAction: TTS.Action = null
    // convert user input to TTS.Action
    // only pause seems to be working, so only provide access to that
    // to avoid exposing users to things that won't work
    switch (action) {
        case "stop":
            ttsAction = "stop"
            break
    }
    if (ttsAction) {
        TTS.doAction(ttsAction)
    } else {
        console.log("Unknown text-to-speech action: " + action)
    }
 }
 //}}}
 // unsupported on android
 /** Add or remove a bookmark.
 *
--- a/src/hinting.ts
+++ b/src/hinting.ts
@ -17,6 +17,7 @@ import {hasModifiers} from './keyseq'
 import state from './state'
 import {messageActiveTab} from './messaging'
 import * as config from './config'
 import * as TTS from './text_to_speech'
 /** Simple container for the state of a single frame's hints. */
 class HintState {
@ -366,6 +367,13 @@ function hintFocus() {
    })
 }
 /** Hint items and read out the content of the selection */
 function hintRead() {
    hintPage(elementswithtext(), hint=>{
        TTS.readText(hint.target.textContent)
    })
 }
 function selectFocusedHint() {
    console.log("Selecting hint.", state.mode)
    const focused = modeState.focusedHint
@ -385,4 +393,5 @@ addListener('hinting_content', attributeCaller({
    hintPageOpenInBackground,
    hintImage,
    hintFocus,
    hintRead,
 }))
--- a/src/hinting_background.ts
+++ b/src/hinting_background.ts
@ -40,6 +40,10 @@ export async function hintFocus() {
    return await messageActiveTab('hinting_content', 'hintFocus')
 }
 export async function hintRead() {
    return await messageActiveTab('hinting_content', 'hintRead')
 }
 import {MsgSafeKeyboardEvent} from './msgsafe'
 /** At some point, this might be turned into a real keyseq parser
--- a/src/text_to_speech.ts
+++ b/src/text_to_speech.ts
@ -0,0 +1,90 @@
 /** Functions to deal with text to speech in Tridactyl
 */
 import * as Config from './config'
 /** Find the voice object for a voice name
 *
 * @return voice from the TTS API, or undefined
 */
 function getVoiceFromName(name: string | "default"): SpeechSynthesisVoice {
    let voices = window.speechSynthesis.getVoices()
    return voices.find(voice=>(voice.name === name))
 }
 /**
 * Read the text using the borwser's HTML5 TTS API
 *
 * @param text      the text to read out
 */
 export function readText(text: string): void {
    let utterance = new SpeechSynthesisUtterance(text);
    let pitch = Config.get("ttspitch")
    let voice = Config.get("ttsvoice")
    let volume = Config.get("ttsvolume")
    let rate = Config.get("ttsrate")
    if (pitch >= 0 && pitch < 2)
        utterance.pitch = pitch
    if (volume >= 0 && volume <= 1)
        utterance.volume = volume
    if (rate >= 0.1 && rate <= 10)
        utterance.rate = rate
    let voiceObj = getVoiceFromName(voice)
    if (voiceObj) {
        utterance.voice = voiceObj
    }
    window.speechSynthesis.speak(utterance);
 }
 /**
 * Supported TTS control actions
 */
 export type Action = "stop" | "play" | "pause" | "playpause"
 /**
 * Control any reading in progress
 *
 * Note: pause() doesn't seem to work, so play, pause and playpause arent going
 * to be very useful right now
 */
 export function doAction(action: Action): void {
    let synth = window.speechSynthesis
    switch (action) {
        case "play":
            synth.resume()
            break
        case "pause":
            synth.pause()
            break
        case "playpause":
            synth.paused ? synth.resume() : synth.pause()
            break
        case "stop":
            synth.cancel()
            break
    }
 }
 /**
 * Get a list of the installed TTS voice names, by which users
 * can refer to the vocies for use in config
 *
 * @return list of voice names
 */
 export function listVoices(): string[] {
    let voices = window.speechSynthesis.getVoices()
    return voices.map(voice=>voice.name)
 }