summaryrefslogtreecommitdiffstats
path: root/util
diff options
context:
space:
mode:
authortv <tv@xso>2011-08-30 12:37:14 +0200
committertv <tv@xso>2011-08-30 12:37:14 +0200
commit113d54f5243f68be7620b22b5651400d07af36d2 (patch)
treef2389eadf0d14cb96b097fc5185423cbe5a6a23b /util
parent2864df44d167066d1e409afc817cbcb56f350bb9 (diff)
util hrefs: initial commit
The hrefs util extracts hrefs from html-stdin... we could use query for this kind of extraction but hrefs is older and works already as fast as light.. not^_^
Diffstat (limited to 'util')
-rwxr-xr-xutil/bin/hrefs20
1 files changed, 20 insertions, 0 deletions
diff --git a/util/bin/hrefs b/util/bin/hrefs
new file mode 100755
index 00000000..3a1a51de
--- /dev/null
+++ b/util/bin/hrefs
@@ -0,0 +1,20 @@
+#! /bin/sh
+
+_hrefs() {
+ sed -n 's/href="\([^"]\+\)"/\n&\n/gp' |
+ sed -n 's/^href="\([^"]\+\)"$/\1/p'; }
+
+_add_prefix_to_relative_hrefs() {
+ sed '/^http:/!s^'"$1"''; }
+
+_main() {
+ case $# in
+ (0) _hrefs;;
+ (1) _hrefs | _add_prefix_to_relative_hrefs "$1";;
+ (*)
+ echo "bad command line: $0 $*" >&2; exit 23;;
+ esac; }
+
+set -euf
+_main "$@"
+#### end of file.