summaryrefslogtreecommitdiffstats
path: root/util/bin/dict.leo.org
diff options
context:
space:
mode:
authormakefu <github@syntax-fehler.de>2011-09-12 14:32:18 +0200
committermakefu <github@syntax-fehler.de>2011-09-12 14:32:18 +0200
commit9f70f9b5b462aa3e150b9708f695edfaf716edd4 (patch)
tree5cbebe6cbc0ada2c988d7f14d0bbbc52a68b37c1 /util/bin/dict.leo.org
parent568fbd7fa5541182bbbc33560863ec9c3ff01d48 (diff)
parenta07326f6c57a7e8f49b9bef96ded427275a16e2e (diff)
Merge branch 'master' into punani_files
Diffstat (limited to 'util/bin/dict.leo.org')
-rwxr-xr-xutil/bin/dict.leo.org87
1 files changed, 87 insertions, 0 deletions
diff --git a/util/bin/dict.leo.org b/util/bin/dict.leo.org
new file mode 100755
index 00000000..03f16b96
--- /dev/null
+++ b/util/bin/dict.leo.org
@@ -0,0 +1,87 @@
+#! /bin/sh
+#### dict.leo.org version 2.0 beta 1
+set -euf
+
+cache_dir=/tmp/dict.leo.org/
+file="$cache_dir$*"
+
+url="http://dict.leo.org/?$*"
+
+# TODO check sanity of filename
+
+if test -d "$cache_dir" ; then
+ test -e "$file" || { curl --silent "$url" | tee "$file" ; }
+else
+ curl --silent "$url"
+fi |
+sed 's/>\( *.\)/>\n\1/g' | sed -n '
+ s/[[:space:]]/ /g
+ /<!-- # Werbung # -->/,/<\/td>/b
+ /<!-- ============================================== -->/,/^<\/td>/b
+ /<!-- # Trailer # -->/,/^ <\/table>/b
+ /<table id="subnavigation" class="border">/,/<\/table>/b
+ /<div id="divMoreInfo" class="popup">/,/^ <\/table>/b
+
+ ## show link in rendered version
+ s/^<a href="\(.*searchLocRelinked.*\)">$/&mehr: \1/
+ s/^mehr &gt;&gt;//
+
+ s/<[Bb][Rr] *\/>/,/g
+
+ p
+ b
+ :c;# comment
+ s/-->/\\-\\-\\>/g
+ s/<!--/\\<\\!\\-\\-/g
+ s/.*/<!--(&)-->/
+ p
+' \
+| w3m -cols 1024 -T text/html -dump | sed '
+ s/[[:space:]]\+/ /g
+ s/ ,/,/g
+ s/^ //;s/ $//
+ s/[┌┬┐└┴┘├┼┤─]//g
+ s/ *│ */|/g
+ s/\[ \]//
+ s/\[Speichern\] der ausgewa:hlten Wo:rter im Trainer//
+ s/^||//;s/|| \?$//
+' | sed -n '
+ /^\[EN-> DE\]/,$b
+ s/^mehr: /+ /
+ s/^ENGLISCH||DEUTSCH \?/= Englisch -> Deutsch/
+ s/^|\([0-9]\+\) \(Treffer\)|$/= \1 \2/
+ s/^\([^|]\+\)||\([^|]\+\)$/- \1|\2/
+
+ s/^[^=+#-][^|]\+$/## &/
+
+ p
+' | sed -n '
+ /^$/b
+ s/ \([?!]\)/\1/g
+ s/\([[(]\) /\1/g;s/ \([]\)]\)/\1/g
+
+ s/ ([0-9]\+ of [0-9]\+) \?//;# TODO
+
+ #s/^##.*Grundform.*/\n#&/
+ /^## .*Grundform.*/,/^##\( .*\)\?/{
+ /##/{/Grundform/!p}
+ b
+ }
+
+ /^## Informationen /,$b;# TODO
+ #/^## Beispiele/,$b;# TODO
+ #/^## Wendungen/,$b;# TODO
+ /^+/b;# TODO
+ #/^=/b;# TODO
+
+ p
+' | sed '
+ ##
+ s/|\(.*\)/\n \1/g
+ s/^#.*/\n&/
+
+ s/##.*Treffer $/\n#&/
+' | less -R
+echo
+
+#### end of file.