diff options
author | tv <tv@nomic.retiolum> | 2013-02-22 23:58:49 +0100 |
---|---|---|
committer | tv <tv@nomic.retiolum> | 2013-02-22 23:58:49 +0100 |
commit | d363f47a406b7e46a409bf8d5a0bcd0a9f8065e7 (patch) | |
tree | 0abb1d8925f334d478cf63e892d5578a2ddabed6 /util | |
parent | d59b012d3487afa52433ee19ab61b24f594cb6bc (diff) |
rm obsolete dict.leo.org scraper
Diffstat (limited to 'util')
-rwxr-xr-x | util/bin/dict.leo.org | 87 |
1 files changed, 0 insertions, 87 deletions
diff --git a/util/bin/dict.leo.org b/util/bin/dict.leo.org deleted file mode 100755 index 03f16b96..00000000 --- a/util/bin/dict.leo.org +++ /dev/null @@ -1,87 +0,0 @@ -#! /bin/sh -#### dict.leo.org version 2.0 beta 1 -set -euf - -cache_dir=/tmp/dict.leo.org/ -file="$cache_dir$*" - -url="http://dict.leo.org/?$*" - -# TODO check sanity of filename - -if test -d "$cache_dir" ; then - test -e "$file" || { curl --silent "$url" | tee "$file" ; } -else - curl --silent "$url" -fi | -sed 's/>\( *.\)/>\n\1/g' | sed -n ' - s/[[:space:]]/ /g - /<!-- # Werbung # -->/,/<\/td>/b - /<!-- ============================================== -->/,/^<\/td>/b - /<!-- # Trailer # -->/,/^ <\/table>/b - /<table id="subnavigation" class="border">/,/<\/table>/b - /<div id="divMoreInfo" class="popup">/,/^ <\/table>/b - - ## show link in rendered version - s/^<a href="\(.*searchLocRelinked.*\)">$/&mehr: \1/ - s/^mehr >>// - - s/<[Bb][Rr] *\/>/,/g - - p - b - :c;# comment - s/-->/\\-\\-\\>/g - s/<!--/\\<\\!\\-\\-/g - s/.*/<!--(&)-->/ - p -' \ -| w3m -cols 1024 -T text/html -dump | sed ' - s/[[:space:]]\+/ /g - s/ ,/,/g - s/^ //;s/ $// - s/[┌┬┐└┴┘├┼┤─]//g - s/ *│ */|/g - s/\[ \]// - s/\[Speichern\] der ausgewa:hlten Wo:rter im Trainer// - s/^||//;s/|| \?$// -' | sed -n ' - /^\[EN-> DE\]/,$b - s/^mehr: /+ / - s/^ENGLISCH||DEUTSCH \?/= Englisch -> Deutsch/ - s/^|\([0-9]\+\) \(Treffer\)|$/= \1 \2/ - s/^\([^|]\+\)||\([^|]\+\)$/- \1|\2/ - - s/^[^=+#-][^|]\+$/## &/ - - p -' | sed -n ' - /^$/b - s/ \([?!]\)/\1/g - s/\([[(]\) /\1/g;s/ \([]\)]\)/\1/g - - s/ ([0-9]\+ of [0-9]\+) \?//;# TODO - - #s/^##.*Grundform.*/\n#&/ - /^## .*Grundform.*/,/^##\( .*\)\?/{ - /##/{/Grundform/!p} - b - } - - /^## Informationen /,$b;# TODO - #/^## Beispiele/,$b;# TODO - #/^## Wendungen/,$b;# TODO - /^+/b;# TODO - #/^=/b;# TODO - - p -' | sed ' - ## - s/|\(.*\)/\n [32m\1[m/g - s/^#.*/\n[33m&[m/ - - s/##.*Treffer $/\n[1;33m#&[m/ -' | less -R -echo - -#### end of file. |