Subscribe<?xml version="1.0" encoding="UTF-8" ?>This one will deal with the case of extra tags in the hyperlink too, eg, <a href="..."><b>http://chance-to-advertise-my-site-in-code.com</b></a>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns="http://www.w3.org/1999/xhtml">
<xsl:template match="text()[ancestor::a][contains(., '://')]">
<xsl:value-of select="substring(.,1,10)"/>
</xsl:template>
<xsl:template match="*|@*">
<xsl:copy><xsl:apply-templates/></xsl:copy>
</xsl:template>
</xsl:stylesheet>
#!/usr/bin/perl
undef $/;
open( HTMLFILE, "/usr/ambrose/file.html" ) || die "$!";
my $html = <HTMLFILE>;
close( HTMLFILE );
$html =~ s{(<a [^>]+>)([^< ]+)</a>}
{$1 . munge($2) . '</a>'}egsi;
print $html;
sub munge() {
my $tag_contents = shift();
if ( $tag_contents =~ m|^http(s)?://|
&& length( $tag_contents ) > 32 )
{
$tag_contents = substr( $tag_contents, 0, 32 ) . '...';
}
return $tag_contents;
}
>
<a href="http://www.yahoo.com/">http://www.yahoo.com/</a>
Short URL as link text
<a href="http://www.yahoo.com/">Click here</a>
non-URL as link text
<a href="http://www.yahoo.com/foo/bar/baz/quux/">http://www.yahoo.com/foo/bar/baz/quux/</a>
long URL as link text
<a href="http://www.yahoo.com/">http://www.yahoo.com/</a>
Short URL as link text
<a href="http://www.yahoo.com/">Click here</a>
non-URL as link text
<a href="http://www.yahoo.com/foo/bar/baz/quux/">http://www.yahoo.com/foo/bar/baz...</a>
long URL as link text
use Regexp::Common qw /URI/;
use warnings;
use strict;
open(my $html, "<test.html");
while (<$html>) {
m#(<a\s+href.*>)\s*(\S+)\s*(</a\s*>)#mi;
my ($opentag, $linktext, $closetag) = ($1, $2, $3);
if ($linktext =~ /$RE{URI}{HTTP}{-keep}/) {
my $host = $3;
print "Found URL as link text...\n";
print "\tNew link is \'$opentag$host$closetag\'\n";
}
}
if ($linktext =~ /$RE{URI}{HTTP}{-keep}/) {
my ($proto, $host) = ($2,$3);
print "Found URL as link text...\n";
print "\tOld link was \'$opentag$linktext$closetag\'\n";
print "\tNew link is \'$opentag $proto://$host... $closetag\'\n";
}
open(my $html, "<test.html");
while (<$html>){
}
<a
href="foo">
m#(<a\s+href.*>)\s*(\S+)\s*(</a\s*>)#mi;
<a href="http://foo.com/">foo</a>, <a href="http://bar.com/">bar</a>You are not logged in, either login or create an account to post comments
These are the correct regular expressions:
First one: /\<a.*href=.*\>(.*?)\< \/a\>/gi
Second one: s/">$z\</$modtxt\</
Third one: s/">$z\</\"\>$modtxt\</
Hopefully they make it out of live preview.>
posted by Captain_Tenille at 3:07 PM on October 26, 2005