Actually, it's the hesk_makeURL function inside common.inc.php that takes care of this.
This is what 2.5 will use, it should keep query strings in URLs and detect almost any link type, for example ftp://, sftp://, torrent://, file:// etc...
Code: Select all
function hesk_makeURL($text)
{
$rexScheme = '[a-z\d+]*:/{2}';
$rexDomain = '(?:[-a-zA-Z0-9]{1,63}\.)+[a-zA-Z][-a-zA-Z0-9]{1,62}';
$rexIp = '(?:[1-9][0-9]{0,2}\.|0\.){3}(?:[1-9][0-9]{0,2}|0)';
$rexPort = '(:[0-9]{1,5})?';
$rexPath = '(/[!$-/0-9:;=@_\':;!a-zA-Z\x7f-\xff]*?)?';
$rexQuery = '(\?[!$-/0-9:;=@_\':;!a-zA-Z\x7f-\xff]+?)?';
$rexFragment = '(#[!$-/0-9:;=@_\':;!a-zA-Z\x7f-\xff]+?)?';
$rexUsername = '[^]\\\\\x00-\x20\"(),:-<>[\x7f-\xff]{1,64}';
$rexPassword = $rexUsername; // allow the same characters as in the username
$rexUrl = "($rexScheme)?(?:($rexUsername)(:$rexPassword)?@)?($rexDomain|$rexIp)($rexPort$rexPath$rexQuery$rexFragment)";
$rexTrailPunct= "[)'?.!,;:]"; // valid URL characters which are not part of the URL if they appear at the very end
$rexNonUrl = "[^-_$+.!*'(),;/?:@=&a-zA-Z0-9]"; // characters that should never appear in a URL
$rexUrlLinker = "{\\b$rexUrl(?=$rexTrailPunct*($rexNonUrl|$))}";
/**
* List source: http://data.iana.org/TLD/tlds-alpha-by-domain.txt
* Last updated: 2012-08-08
*/
$validTlds = hesk_array_fill_keys(explode(" ", ".ac .ad .ae .aero .af .ag .ai .al .am .an .ao .aq .ar .arpa .as .asia .at .au .aw .ax .az .ba .bb .bd .be .bf .bg .bh .bi .biz .bj .bm .bn .bo .br .bs .bt .bv .bw .by .bz .ca .cat .cc .cd .cf .cg .ch .ci .ck .cl .cm .cn .co .com .coop .cr .cu .cv .cw .cx .cy .cz .de .dj .dk .dm .do .dz .ec .edu .ee .eg .er .es .et .eu .fi .fj .fk .fm .fo .fr .ga .gb .gd .ge .gf .gg .gh .gi .gl .gm .gn .gov .gp .gq .gr .gs .gt .gu .gw .gy .hk .hm .hn .hr .ht .hu .id .ie .il .im .in .info .int .io .iq .ir .is .it .je .jm .jo .jobs .jp .ke .kg .kh .ki .km .kn .kp .kr .kw .ky .kz .la .lb .lc .li .lk .lr .ls .lt .lu .lv .ly .ma .mc .md .me .mg .mh .mil .mk .ml .mm .mn .mo .mobi .mp .mq .mr .ms .mt .mu .museum .mv .mw .mx .my .mz .na .name .nc .ne .net .nf .ng .ni .nl .no .np .nr .nu .nz .om .org .pa .pe .pf .pg .ph .pk .pl .pm .pn .post .pr .pro .ps .pt .pw .py .qa .re .ro .rs .ru .rw .sa .sb .sc .sd .se .sg .sh .si .sj .sk .sl .sm .sn .so .sr .st .su .sv .sx .sy .sz .tc .td .tel .tf .tg .th .tj .tk .tl .tm .tn .to .tp .tr .travel .tt .tv .tw .tz .ua .ug .uk .us .uy .uz .va .vc .ve .vg .vi .vn .vu .wf .ws .xn--0zwm56d .xn--11b5bs3a9aj6g .xn--3e0b707e .xn--45brj9c .xn--80akhbyknj4f .xn--80ao21a .xn--90a3ac .xn--9t4b11yi5a .xn--clchc0ea0b2g2a9gcd .xn--deba0ad .xn--fiqs8s .xn--fiqz9s .xn--fpcrj9c3d .xn--fzc2c9e2c .xn--g6w251d .xn--gecrj9c .xn--h2brj9c .xn--hgbk6aj7f53bba .xn--hlcj6aya9esc7a .xn--j6w193g .xn--jxalpdlp .xn--kgbechtv .xn--kprw13d .xn--kpry57d .xn--lgbbat1ad8j .xn--mgb9awbf .xn--mgbaam7a8h .xn--mgbayh7gpa .xn--mgbbh1a71e .xn--mgbc0a9azcg .xn--mgberp4a5d4ar .xn--o3cw4h .xn--ogbpf8fl .xn--p1ai .xn--pgbs0dh .xn--s9brj9c .xn--wgbh1c .xn--wgbl6a .xn--xkc2al3hye2a .xn--xkc2dl3a5ee0h .xn--yfro4i67o .xn--ygbi2ammx .xn--zckzah .xxx .ye .yt .za .zm .zw"), true);
$html = '';
$html = '';
$position = 0;
while (preg_match($rexUrlLinker, $text, $match, PREG_OFFSET_CAPTURE, $position))
{
list($url, $urlPosition) = $match[0];
// Add the text leading up to the URL.
$html .= substr($text, $position, $urlPosition - $position);
$linkText = $match[0][0];
$scheme = $match[1][0];
$username = $match[2][0];
$password = $match[3][0];
$domain = $match[4][0];
$afterDomain = $match[5][0];
// Check that the TLD is valid or that $domain is an IP address.
$tld = strtolower(strrchr($domain, '.'));
if (preg_match('{^\.[0-9]{1,3}$}', $tld) || isset($validTlds[$tld]))
{
// Do not permit implicit scheme if a password is specified, as
// this causes too many errors (e.g. "my email:foo@example.org").
if (!$scheme && $password)
{
$html .= $username;
// Continue text parsing at the ':' following the "username".
$position = $urlPosition + strlen($username);
continue;
}
if (!$scheme && $username && !$password && !$afterDomain)
{
// Looks like an email address.
$completeUrl = "mailto:$url";
$linkText = $url;
}
else
{
// Prepend http:// if no scheme is specified
$completeUrl = $scheme ? $url : "http://$url";
}
$linkHtml = '<a href="' . $completeUrl . '">' . $linkText . '</a>';
// Cheap e-mail obfuscation to trick the dumbest mail harvesters.
$linkHtml = str_replace('@', '@', $linkHtml);
// Add the hyperlink.
$html .= $linkHtml;
}
else
{
// Not a valid URL.
$html .= $url;
}
// Continue text parsing from after the URL.
$position = $urlPosition + strlen($url);
}
// Add the remainder of the text.
$html .= substr($text, $position);
return $html;
} // End hesk_makeURL()