summaryrefslogtreecommitdiffstats
path: root/util/lib/stt
diff options
context:
space:
mode:
authormakefu <github@syntax-fehler.de>2013-11-05 18:14:21 +0100
committermakefu <github@syntax-fehler.de>2013-11-05 18:14:42 +0100
commit628ad2aaa2e207aa2836e9b68f7bfbd890dff766 (patch)
tree1913f767539ee18679f21bbff5ec2ba13f941acd /util/lib/stt
parente71f7dde60f687c3fb037d2e4d2b0ada608e512d (diff)
speech to text: initial commit
Diffstat (limited to 'util/lib/stt')
-rw-r--r--util/lib/stt/README.md4
-rw-r--r--util/lib/stt/google.sh35
2 files changed, 39 insertions, 0 deletions
diff --git a/util/lib/stt/README.md b/util/lib/stt/README.md
new file mode 100644
index 00000000..be905770
--- /dev/null
+++ b/util/lib/stt/README.md
@@ -0,0 +1,4 @@
+# Speech to Text api wrapper
+
+Because Speech to text is hardâ„¢ with FOSS, these libraries utilize the magic of
+the internets to solve this problem.
diff --git a/util/lib/stt/google.sh b/util/lib/stt/google.sh
new file mode 100644
index 00000000..a78579d5
--- /dev/null
+++ b/util/lib/stt/google.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+_get_content_type(){
+ file -b --mime-type "$1"
+}
+_get_audio_rate(){
+ file "$1" | sed -n -e 's/.* \([.0-9]\+\) kHz.*/\1/p' \
+ | awk '{print int($1 *1000)}'
+}
+
+record_audio(){
+ # usage : _record_audio num_seconds
+ # echoes the output file
+ tmpfile=$(mktemp)
+ : ${1?please provide number of seconds to record}
+ arecord -d "$1" -r 16000 -t wav -q -f cd | flac -s -f - -o "$tmpfile" && echo "$tmpfile"
+}
+stt(){
+ # usage: (lang=de-de stty recorded_file)
+ : ${1? please provide recorded file}
+ infile="$1"
+ lang=${lang:-en-us}
+ _get_content_type "$1" | (! grep -q "x-flac" ) \
+ && echo "infile needs to be in flac format" \
+ && return 1
+ # only flac seems to be working...
+ wget -q -O - \
+ -U 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.77 Safari/535.7' \
+ --post-file "$infile" \
+ --header "Content-Type: `_get_content_type $infile`; rate=`_get_audio_rate $infile`;" \
+ "http://www.google.com/speech-api/v1/recognize?lang=${lang}&client=chromium&maxresults=1" \
+ | sed -n 's/.*utterance":"\([^"]*\)".*/\1/p'
+
+ # returns {"status":0,"id":"d9269e6f741997161e41a4d441b34ba1-1","hypotheses":[{"utterance":"hallo Welt","confidence":0.7008959}]}
+}