class AudioStream { | |||||
constructor({ stdin, recorder = false }) { | |||||
if (!/^espeak-ng/.test(stdin)) { | |||||
throw new Error(`stdin should begin with "espeak-ng" command`); | |||||
} | |||||
this.command = stdin; | |||||
this.stdin = new ReadableStream({ | |||||
start(c) { | |||||
c.enqueue( | |||||
new File([stdin], 'espeakng', { | |||||
type: 'application/octet-stream', | |||||
}) | |||||
); | |||||
c.close(); | |||||
}, | |||||
}); | |||||
this.readOffset = 0; | |||||
this.duration = 0; | |||||
this.channelDataLength = 440; | |||||
this.sampleRate = 22050; | |||||
this.numberOfChannels = 1; | |||||
this.init = false; | |||||
this.src = | |||||
'chrome-extension://<id>/nativeTransferableStream.html'; | |||||
this.ac = new AudioContext({ | |||||
latencyHint: 0, | |||||
}); | |||||
this.ac.suspend(); | |||||
this.msd = new MediaStreamAudioDestinationNode(this.ac, { | |||||
channelCount: this.numberOfChannels, | |||||
}); | |||||
this.inputController = void 0; | |||||
this.inputStream = new ReadableStream({ | |||||
start: (_) => { | |||||
return (this.inputController = _); | |||||
}, | |||||
}); | |||||
this.inputReader = this.inputStream.getReader(); | |||||
const { stream } = this.msd; | |||||
this.stream = stream; | |||||
const [track] = stream.getAudioTracks(); | |||||
this.track = track; | |||||
this.osc = new OscillatorNode(this.ac, { frequency: 0 }); | |||||
this.processor = new MediaStreamTrackProcessor({ track }); | |||||
this.generator = new MediaStreamTrackGenerator({ kind: 'audio' }); | |||||
const { writable } = this.generator; | |||||
this.writable = writable; | |||||
const { readable: audioReadable } = this.processor; | |||||
this.audioReadable = audioReadable; | |||||
this.audioWriter = this.writable.getWriter(); | |||||
this.mediaStream = new MediaStream([this.generator]); | |||||
if (recorder) { | |||||
this.recorder = new MediaRecorder(this.mediaStream); | |||||
this.recorder.ondataavailable = ({ data }) => { | |||||
this.data = data; | |||||
}; | |||||
} | |||||
this.outputSource = new MediaStreamAudioSourceNode(this.ac, { | |||||
mediaStream: this.mediaStream, | |||||
}); | |||||
this.outputSource.connect(this.ac.destination); | |||||
this.resolve = void 0; | |||||
this.promise = new Promise((_) => (this.resolve = _)); | |||||
this.osc.connect(this.msd); | |||||
this.osc.start(); | |||||
this.track.onmute = this.track.onunmute = this.track.onended = (e) => | |||||
console.log(e); | |||||
this.abortable = new AbortController(); | |||||
const { signal } = this.abortable; | |||||
this.signal = signal; | |||||
this.audioReadableAbortable = new AbortController(); | |||||
const { signal: audioReadableSignal } = this.audioReadableAbortable; | |||||
this.audioReadableSignal = audioReadableSignal; | |||||
this.audioReadableSignal.onabort = (e) => console.log(e.type); | |||||
this.abortHandler = async (e) => { | |||||
try { | |||||
await this.disconnect(true); | |||||
} catch (err) { | |||||
console.warn(err.message); | |||||
} | |||||
console.log( | |||||
`readOffset:${this.readOffset}, duration:${this.duration}, ac.currentTime:${this.ac.currentTime}`, | |||||
`generator.readyState:${this.generator.readyState}, audioWriter.desiredSize:${this.audioWriter.desiredSize}`, | |||||
`inputController.desiredSize:${this.inputController.desiredSize}, ac.state:${this.ac.state}` | |||||
); | |||||
if ( | |||||
this.transferableWindow || | |||||
document.body.querySelector(`iframe[src="${this.src}"]`) | |||||
) { | |||||
document.body.removeChild(this.transferableWindow); | |||||
} | |||||
this.resolve('Stream aborted.'); | |||||
}; | |||||
this.signal.onabort = this.abortHandler; | |||||
} | |||||
async disconnect(abort = false) { | |||||
if (abort) { | |||||
this.audioReadableAbortable.abort(); | |||||
} | |||||
this.msd.disconnect(); | |||||
this.osc.disconnect(); | |||||
this.outputSource.disconnect(); | |||||
this.track.stop(); | |||||
try { | |||||
await this.audioWriter.close(); | |||||
await this.audioWriter.closed; | |||||
await this.inputReader.cancel(); | |||||
} catch (err) { | |||||
throw err; | |||||
} | |||||
this.generator.stop(); | |||||
if (this.recorder && this.recorder.state === 'recording') { | |||||
this.recorder.stop(); | |||||
} | |||||
return this.ac.close(); | |||||
} | |||||
async start() { | |||||
return this.nativeTransferableStream(); | |||||
} | |||||
async abort() { | |||||
this.abortable.abort(); | |||||
if (this.source) { | |||||
this.source.postMessage('Abort.', '*'); | |||||
} | |||||
return this.promise; | |||||
} | |||||
async nativeTransferableStream() { | |||||
return new Promise((resolve) => { | |||||
onmessage = (e) => { | |||||
this.source = e.source; | |||||
if (typeof e.data === 'string') { | |||||
console.log(e.data); | |||||
if (e.data === 'Ready.') { | |||||
this.source.postMessage(this.stdin, '*', [this.stdin]); | |||||
} | |||||
if (e.data === 'Local server off.') { | |||||
document.body.removeChild(this.transferableWindow); | |||||
this.transferableWindow = onmessage = null; | |||||
} | |||||
} | |||||
if (e.data instanceof ReadableStream) { | |||||
this.stdout = e.data; | |||||
resolve(this.audioStream()); | |||||
} | |||||
}; | |||||
this.transferableWindow = document.createElement('iframe'); | |||||
this.transferableWindow.style.display = 'none'; | |||||
this.transferableWindow.name = location.href; | |||||
this.transferableWindow.src = this.src; | |||||
document.body.appendChild(this.transferableWindow); | |||||
}).catch((err) => { | |||||
throw err; | |||||
}); | |||||
} | |||||
async audioStream() { | |||||
let channelData = []; | |||||
try { | |||||
await this.ac.resume(); | |||||
await this.audioWriter.ready; | |||||
await Promise.allSettled([ | |||||
this.stdout.pipeTo( | |||||
new WritableStream({ | |||||
write: async (value, c) => { | |||||
let i = 0; | |||||
if (!this.init) { | |||||
this.init = true; | |||||
i = 44; | |||||
} | |||||
for (; i < value.buffer.byteLength; i++, this.readOffset++) { | |||||
if (channelData.length === this.channelDataLength) { | |||||
this.inputController.enqueue( | |||||
new Uint8Array( | |||||
channelData.splice(0, this.channelDataLength) | |||||
) | |||||
); | |||||
} | |||||
channelData.push(value[i]); | |||||
} | |||||
}, | |||||
abort(e) { | |||||
console.error(e.message); | |||||
}, | |||||
close: async () => { | |||||
console.log('Done writing input stream.'); | |||||
if (channelData.length) { | |||||
this.inputController.enqueue(new Uint8Array(channelData.splice(0, channelData.length))); | |||||
} | |||||
this.inputController.close(); | |||||
this.source.postMessage('Done writing input stream.', '*'); | |||||
}, | |||||
}), | |||||
{ signal: this.signal } | |||||
), | |||||
this.audioReadable.pipeTo( | |||||
new WritableStream({ | |||||
write: async ({ timestamp }) => { | |||||
const { value, done } = await this.inputReader.read(); | |||||
if (done) { | |||||
await this.inputReader.closed; | |||||
try { | |||||
await this.disconnect(); | |||||
} catch (err) { | |||||
console.warn(err.message); | |||||
} | |||||
console.log( | |||||
`readOffset:${this.readOffset}, duration:${this.duration}, ac.currentTime:${this.ac.currentTime}`, | |||||
`generator.readyState:${this.generator.readyState}, audioWriter.desiredSize:${this.audioWriter.desiredSize}` | |||||
); | |||||
return await Promise.all([ | |||||
new Promise((resolve) => (this.stream.oninactive = resolve)), | |||||
new Promise((resolve) => (this.ac.onstatechange = resolve)), | |||||
]); | |||||
} | |||||
const frame = new AudioData({ | |||||
format: 's16', | |||||
sampleRate: 22050, | |||||
numberOfChannels: 1, | |||||
numberOfFrames: value.length / 2, | |||||
timestamp, | |||||
data: value, | |||||
}); | |||||
this.duration += (frame.duration / 10**6); | |||||
if (this.recorder && this.recorder.state === 'inactive') { | |||||
this.recorder.start(); | |||||
} | |||||
await this.audioWriter.write(frame); | |||||
}, | |||||
abort(e) { | |||||
console.error(e.message); | |||||
}, | |||||
close() { | |||||
console.log('Done reading input stream.'); | |||||
}, | |||||
}), | |||||
{ signal: this.audioReadableSignal } | |||||
), | |||||
]); | |||||
this.resolve( | |||||
this.recorder | |||||
? this.data && (await this.data.arrayBuffer()) | |||||
: 'Done streaming.' | |||||
); | |||||
return this.promise; | |||||
} catch (err) { | |||||
console.error(err); | |||||
throw err; | |||||
} | |||||
} | |||||
} |
<h5>Motivation</h5> | |||||
Web Speech API does not support SSML input to the speech synthesis engine https://github.com/WICG/speech-api/issues/10, or the ability to capture the output of `speechSynthesis.speak()` as a`MedaiStreamTrack` or raw audio https://lists.w3.org/Archives/Public/public-speech-api/2017Jun/0000.html. | |||||
See [Issue 1115640: [FUGU] NativeTransferableStream](https://bugs.chromium.org/p/chromium/issues/detail?id=1115640). | |||||
<h5>Synopsis</h5> | |||||
Native Messaging => eSpeak NG => PHP `passthru()` => `fetch()` => Transferable Streams => `MediaStreamTrack`. | |||||
Use local `espeak-ng` with `-m` option set in the browser. | |||||
Output speech sythesis audio as a live `MediaStreamTrack`. | |||||
Use [Native Messaging](https://developer.chrome.com/extensions/nativeMessaging), PHP `passthru()` to input text and [Speech Synthesis Markup Language](https://www.w3.org/TR/speech-synthesis11/) as STDIN to [`espeak-ng`](https://github.com/espeak-ng/espeak-ng), stream STDOUT in "real-time" as live `MediaStreamTrack`. | |||||
<h5>Install<h5> | |||||
<h6>Dependencies</h6> | |||||
eSpeak NG [Building eSpeak NG](https://github.com/espeak-ng/espeak-ng/blob/master/docs/building.md#building-espeak-ng). | |||||
PHP is used for `passthru()`. Substitute server language of choice. | |||||
``` | |||||
git clone https://github.com/guest271314/native-messaging-espeak-ng.git | |||||
cd native-messaging-espeak-ng/ | |||||
chmod +x local_server.sh index.php | |||||
``` | |||||
Navigate to `chrome://extensions`, set `Developer mode` to on, click `Load unpacked`, select downloaded git directory. | |||||
Note the generated extension ID, substitute that value for `<id>` in `native_messaging_espeakng.json`, `AudioStream.js`, `index.php`; add the value to `"extensions"` array in `manifest.json`. | |||||
Substitute full local path to `local_server.sh` for `/path/to` in `native_messaging_espeakng.json`. | |||||
Copy `native_messaging_espeakng.json` to `NativeMessagingHosts` directory in Chromium or Chrome configuration folder, on Linux, i.e., `~/.config/chromium`; `~/.config/google-chrome-unstable`. | |||||
`cp native_messaging_espeakng ~/.config/chromium/NativeMessagingHosts` | |||||
Reload extension. | |||||
<h5>Usage</h5> | |||||
On origins listed in `"matches"` array in `"web_accessible_resources"` object in `manifest.json`, e.g., at `console` | |||||
``` | |||||
var text = `Test`; | |||||
var stdin = `espeak-ng -m --stdout "${text}"`; | |||||
var espeakng = new AudioStream({ stdin, recorder: true }); | |||||
// espeakng.mediaStream: MediaStream containing MediaStreamTrack source output of espeak-ng --stdout | |||||
var ab = await espeakng.start(); | |||||
console.log( | |||||
URL.createObjectURL(new Blob([ab], { type: 'audio/webm;codecs=opus' })) | |||||
); | |||||
``` | |||||
Abort the request and audio output. | |||||
``` | |||||
await espeakng.abort() | |||||
``` | |||||
To turn local server on and off with user action pin and click the extension icon on Chromium or Chrome toolbar. |
chrome.action.onClicked.addListener(() => | |||||
chrome.runtime.sendNativeMessage('native_messaging_espeakng' | |||||
, {}, (nativeMessage) => console.log({nativeMessage})) | |||||
); |
<?php | |||||
if (isset($_POST["espeakng"])) { | |||||
header('Vary: Origin'); | |||||
header("Access-Control-Allow-Origin: chrome-extension://<id>"); | |||||
header("Access-Control-Allow-Methods: POST"); | |||||
header("Content-Type: application/octet-stream"); | |||||
header("X-Powered-By:"); | |||||
echo passthru($_POST["espeakng"]); | |||||
exit(); | |||||
} |
#!/bin/bash | |||||
# https://stackoverflow.com/a/24777120 | |||||
send_message() { | |||||
message="$1" | |||||
# Calculate the byte size of the string. | |||||
# NOTE: This assumes that byte length is identical to the string length! | |||||
# Do not use multibyte (unicode) characters, escape them instead, e.g. | |||||
# message='"Some unicode character:\u1234"' | |||||
messagelen=${#message} | |||||
# Convert to an integer in native byte order. | |||||
# If you see an error message in Chrome's stdout with | |||||
# "Native Messaging host tried sending a message that is ... bytes long.", | |||||
# then just swap the order, i.e. messagelen1 <-> messagelen4 and | |||||
# messagelen2 <-> messagelen3 | |||||
messagelen1=$(( ($messagelen ) & 0xFF )) | |||||
messagelen2=$(( ($messagelen >> 8) & 0xFF )) | |||||
messagelen3=$(( ($messagelen >> 16) & 0xFF )) | |||||
messagelen4=$(( ($messagelen >> 24) & 0xFF )) | |||||
# Print the message byte length followed by the actual message. | |||||
printf "$(printf '\\x%x\\x%x\\x%x\\x%x' \ | |||||
$messagelen1 $messagelpen2 $messagelen3 $messagelen4)%s" "$message" | |||||
} | |||||
local_server() { | |||||
if pgrep -f 'php -S localhost:8000' > /dev/null; then | |||||
pkill -f 'php -S localhost:8000' & send_message '"Local server off."' | |||||
else | |||||
php -S localhost:8000 & send_message '"Local server on."' | |||||
fi | |||||
} | |||||
local_server |
{ | |||||
"name": "Native Messaging espeak-ng", | |||||
"description": "Native Messaging => eSpeak NG => PHP passthru() => fetch() => Transferable Streams => MediaStreamTrack", | |||||
"version": "2.0", | |||||
"manifest_version": 3, | |||||
"permissions": ["nativeMessaging", "tabs"], | |||||
"background": { | |||||
"service_worker": "background.js" | |||||
}, | |||||
"web_accessible_resources": [ { | |||||
"resources": [ "nativeTransferableStream.html", "nativeTransferableStream.js" ], | |||||
"matches": [ "https://github.com/*", "https://bugs.chromium.org/*" ], | |||||
"extensions": [ ] | |||||
}], | |||||
"action": {}, | |||||
"author": "guest271314" | |||||
} |
<!DOCTYPE html> | |||||
<html> | |||||
<head> | |||||
<script src="nativeTransferableStream.js"></script> | |||||
</head> | |||||
<body> | |||||
</body> | |||||
</html> |
onload = async () => { | |||||
chrome.runtime.sendNativeMessage( | |||||
'native_messaging_espeakng', | |||||
{}, | |||||
async (nativeMessage) => { | |||||
parent.postMessage(nativeMessage, name); | |||||
await new Promise((resolve) => setTimeout(resolve, 100)); | |||||
const controller = new AbortController(); | |||||
const { signal } = controller; | |||||
parent.postMessage('Ready.', name); | |||||
onmessage = async (e) => { | |||||
if (e.data instanceof ReadableStream) { | |||||
try { | |||||
const { value: file, done } = await e.data.getReader().read(); | |||||
const fd = new FormData(); | |||||
const stdin = await file.text(); | |||||
fd.append(file.name, stdin); | |||||
const { body } = await fetch('http://localhost:8000', { | |||||
method: 'post', | |||||
cache: 'no-store', | |||||
credentials: 'omit', | |||||
body: fd, | |||||
signal, | |||||
}); | |||||
parent.postMessage(body, name, [body]); | |||||
} catch (err) { | |||||
parent.postMessage(err, name); | |||||
} | |||||
} else { | |||||
if (e.data === 'Done writing input stream.') { | |||||
chrome.runtime.sendNativeMessage( | |||||
'native_messaging_espeakng', | |||||
{}, | |||||
(nativeMessage) => { | |||||
parent.postMessage(nativeMessage, name); | |||||
} | |||||
); | |||||
} | |||||
if (e.data === 'Abort.') { | |||||
controller.abort(); | |||||
} | |||||
} | |||||
}; | |||||
} | |||||
); | |||||
}; |
{ | |||||
"name": "native_messaging_espeakng", | |||||
"description": "Native Messaging => eSpeak NG => PHP passthru() => fetch() => Transferable Streams => MediaStreamTrack", | |||||
"path": "/path/to/local_server.sh", | |||||
"type": "stdio", | |||||
"allowed_origins": [ "chrome-extension://<id>" ] | |||||
} |