#
# Sample links dictionary file for Seth Golub's txt2html
# http://www.aigeek.com/txt2html/
#
# This dictionary contains some patterns for converting obvious URLs,
# ftp sites, hostnames, email addresses and the like to hrefs.
#
# Adapted shamelessly from the html.pl package by Oscar Nierstrasz in
# the Software Archive of the Software Composition Group
# http://iamwww.unibe.ch/~scg/Src/
#

# Some people like to mark URLs explicitly <URL:foo>
/&lt;URL:\s*(\S+?)\s*&gt;/ -> $1

# Urls: <service>:<rest-of-url>

|snews:[\w\.]+|        -> $&
|news:[\w\.]+|         -> $&
|nntp:[\w/\.:+\-]+|    -> $&
|http:[\w/\.:\@+\-~\%#?=&;,]+[\w/]|  -> $&
|shttp:[\w/\.:+\-~\%#?=&;,]+| -> $&
|https:[\w/\.:+\-~\%#?=&;,]+| -> $&
|file:[\w/\.:+\-]+|     -> $&
|ftp:[\w/\.:+\-]+|      -> $&
|wais:[\w/\.:+\-]+|     -> $&
|gopher:[\w/\.:+\-]+|   -> $&
|telnet:[\w/\@\.:+\-]+|   -> $&


# catch some newsgroups to avoid confusion with sites:
|([^\w\-/\.:\@>])(alt\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(bionet\.[\w\.+\-]+[\w+\-]+)| -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(bit\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(biz\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(clari\.[\w\.+\-]+[\w+\-]+)|  -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(comp\.[\w\.+\-]+[\w+\-]+)|   -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(gnu\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(humanities\.[\w\.+\-]+[\w+\-]+)| 
          -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(k12\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(misc\.[\w\.+\-]+[\w+\-]+)|   -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(news\.[\w\.+\-]+[\w+\-]+)|   -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(rec\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(soc\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(talk\.[\w\.+\-]+[\w+\-]+)|   -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(us\.[\w\.+\-]+[\w+\-]+)|     -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(ch\.[\w\.+\-]+[\w+\-]+)|     -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(de\.[\w\.+\-]+[\w+\-]+)|     -h-> $1<A HREF="news:$2">$2</A>

# FTP locations (with directory):
# anonymous@<site>:<path>
|(anonymous\@)([a-zA-Z][\w\.+\-]+\.[a-zA-Z]{2,}):(\s*)([\w\d+\-/\.]+)|
  -h-> $1<A HREF="ftp://$2/$4">$2:$4</A>$3

# ftp@<site>:<path>
|(ftp\@)([a-zA-Z][\w\.+\-]+\.[a-zA-Z]{2,}):(\s*)([\w\d+\-/\.]+)|
  -h-> $1<A HREF="ftp://$2/$4">$2:$4</A>$3

# Email address
|[a-zA-Z0-9_\+\-\.]+\@([a-zA-Z0-9][\w\.+\-]+\.[a-zA-Z]{2,})|
  -> mailto:$&

# <site>:<path>
|([^\w\-/\.:\@>])([a-zA-Z][\w\.+\-]+\.[a-zA-Z]{2,}):(\s*)([\w\d+\-/\.]+)|
  -h-> $1<A HREF="ftp://$2/$4">$2:$4</A>$3

# NB: don't confuse an http server with a port number for
# an FTP location!
# internet number version: <internet-num>:<path>
|([^\w\-/\.:\@])(\d{2,}\.\d{2,}\.\d+\.\d+):([\w\d+\-/\.]+)|
  -h-> $1<A HREF="ftp://$2/$3">$2:$3</A>

# telnet <site> <port>
|telnet ([a-zA-Z][\w+\-]+(\.[\w\.+\-]+)+\.[a-zA-Z]{2,})\s+(\d{2,4})|
  -h-> telnet <A HREF="telnet://$1:$3/">$1 $3</A>

# ftp <site>
|ftp ([a-zA-Z][\w+\-]+(\.[\w\.+\-]+)+\.[a-zA-Z]{2,})|
  -h-> ftp <A HREF="ftp://$1/">$1</A>

# host with "ftp" in the machine name
|(^|[^\w\d\-/\.:!])(([a-zA-Z][\w+\-]*)?ftp[\w+\-]*\.[\w\.+\-]+\.[a-zA-Z]{2,})([^\w\d\-/\.:!])|
  -h-> $1ftp <A HREF="ftp://$2/">$2</A>$4

# ftp.foo.net/blah/
|ftp(\.[a-zA-Z0-9_\@:-]+)+/\S+| -> ftp://$&

# www.thehouse.org/txt2html/
|www(\.[a-zA-Z0-9_\@:-]+)+/\S+| -> http://$&

# host with "www" in the machine name
|(^|[^\w\d\-/\.:!])(([a-zA-Z][\w+\-]*)?www[\w+\-]*\.[\w\.+\-]+\.[a-zA-Z]{2,})([^\w\d\-/\.:!\@])|
  -h-> $1<A HREF="http://$2/">$2</A>$4

# <site> <port>
|([a-zA-Z][\w+\-]+\.[\w+\-]+\.[a-zA-Z]{2,})\s+(\d{2,4})|
  -h-> <A HREF="telnet://$1:$2/">$1 $2</A>

# just the site name: <site>
|([^\w\-/\.:\@>])([a-zA-Z][\w+\-]+(\.[\w+\-]+)+\.[a-zA-Z]{2,})|
  -h-> $1<A HREF="http://$2">$2</A>/

# just internet numbers with port:
|([^\w\-/\.:\@])(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s+(\d{1,4})|
  -h-> $1<A HREF="telnet://$2:$3">$2 $3</A>

# just internet numbers:
|([^\w\-/\.:\@])(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})|
  -h-> $1<A HREF="telnet://$2">$2</A>


# (see "relative path") as used by Tom Fine
# /\(see \"([^\"]+)\"\)/  -> $1.html

# RFCs
/RFC ?(\d+)/ -i-> http://www.cis.ohio-state.edu/rfc/rfc$1.txt

# This would turn "f^H_o^H_o^H_" into "<U>foo</U>".  Gross, isn't it?
# Thanks to Mark O'Dell <emark@cns.caltech.edu> for fixing this. 
#
# /(.\\010_)+/ -he-> $tmp = $&;$tmp =~ s@\010_@@g;"<U>$tmp</U>"
# /(_\\010.)+/ -he-> $tmp = $&;$tmp =~ s@_\010@@g;"<U>$tmp</U>"
# /(.\^H_)+/ -he-> $tmp = $&;$tmp =~ s@\^H_@@g;"<U>$tmp</U>"
# /(_\^H.)+/ -he-> $tmp = $&;$tmp =~ s@_\^H@@g;"<U>$tmp</U>"


# Mark _underline stuff_ as <U>underlined stuff</U>
/\B_([a-z][a-z ]*[a-z])_\B/ -hi-> <U>$1</U>

# Use this one instead if you want it to match more aggressively.
/\B_(\w(\w|\s|\!|\?|,|;|\.|\-)*(\w|\.|\!|\?))_\B/ -hi-> <U>$1</U>

# Need special case for _x_
/\B_([a-z])_\B/ -hi-> <U>$1</U>

# Mark *emphasized stuff* as <EM>emphasized stuff</EM>
/\B\*([a-z][a-z -]*[a-z])\*\B/ -hi-> <EM>$1</EM>

# Use this one instead if you want it to match more aggressively.
/\B\*(\w(\w|\s|\!|\?|,|;|\.)*(\w|\.|\!|\?))\*\B/ -hi-> <EM>$1</EM>

# We also need a special case for *x*
/\B\*([a-z])\*\B/ -hi-> <EM>$1</EM>



# Seth and his amazing conversion program    :-)

"Seth Golub"  -io-> http://www.aigeek.com/
"txt2html"    -io-> http://www.aigeek.com/txt2html/


# End of sample dictionary

