-
-
Save ronisaha/73a6aa13bc71c8913976e500e597fc15 to your computer and use it in GitHub Desktop.
URL Slugs in PHP (with UTF-8 and Transliteration Support)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| function url_slug($string = null, $separator = "-") { | |
| if (is_null($string)) { | |
| return ""; | |
| } | |
| // Remove spaces from the beginning and from the end of the string | |
| $string = trim($string); | |
| // Lower case everything | |
| // using mb_strtolower() function is important for non-Latin UTF-8 string | more info: http://goo.gl/QL2tzK | |
| $string = mb_strtolower($string, "UTF-8");; | |
| // Make alphanumeric (removes all other characters) | |
| // this makes the string safe especially when used as a part of a URL | |
| // this keeps latin characters and bengali charactrs as well | |
| $string = preg_replace("/[^a-z0-9_\s-কখগঘঙচছজঝঞটঠডড়ঢঢ়ণতথদধনপফবভমযয়রলশষসহৎ্যঅআইঈউঊঋএঐওঔািীুূৃেৈোৌ০১২৩৪৫৬৭৮৯]/u", "", $string); | |
| // Remove multiple dashes or whitespaces | |
| $string = preg_replace("/[\s-]+/", " ", $string); | |
| // Convert whitespaces and underscore to the given separator | |
| $string = preg_replace("/[\s_]/", $separator, $string); | |
| return $string; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| /** | |
| * Create a web friendly URL slug from a string. | |
| * | |
| * Although supported, transliteration is discouraged because | |
| * 1) most web browsers support UTF-8 characters in URLs | |
| * 2) transliteration causes a loss of information | |
| * | |
| * @author Sean Murphy <[email protected]> | |
| * @copyright Copyright 2012 Sean Murphy. All rights reserved. | |
| * @license http://creativecommons.org/publicdomain/zero/1.0/ | |
| * | |
| * @param string $str | |
| * @param array $options | |
| * @return string | |
| */ | |
| function url_slug($str, $options = array()) { | |
| // Make sure string is in UTF-8 and strip invalid UTF-8 characters | |
| $str = mb_convert_encoding((string)$str, 'UTF-8', mb_list_encodings()); | |
| $defaults = array( | |
| 'delimiter' => '-', | |
| 'limit' => null, | |
| 'lowercase' => true, | |
| 'replacements' => array(), | |
| 'transliterate' => false, | |
| ); | |
| // Merge options | |
| $options = array_merge($defaults, $options); | |
| $char_map = array(); | |
| // Make custom replacements | |
| $str = preg_replace(array_keys($options['replacements']), $options['replacements'], $str); | |
| // Transliterate characters to ASCII | |
| if ($options['transliterate']) { | |
| $str = str_replace(array_keys($char_map), $char_map, $str); | |
| } | |
| // Replace non-alphanumeric characters with our delimiter | |
| $str = preg_replace('/[^\p{L}\p{Nd}]+/u', $options['delimiter'], $str); | |
| // Remove duplicate delimiters | |
| $str = preg_replace('/(' . preg_quote($options['delimiter'], '/') . '){2,}/', '$1', $str); | |
| // Truncate slug to max. characters | |
| $str = mb_substr($str, 0, ($options['limit'] ? $options['limit'] : mb_strlen($str, 'UTF-8')), 'UTF-8'); | |
| // Remove delimiter from ends | |
| $str = trim($str, $options['delimiter']); | |
| return $options['lowercase'] ? mb_strtolower($str, 'UTF-8') : $str; | |
| } | |
| ?> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| include('url_slug.php'); | |
| header('Content-type: text/plain; charset=utf-8'); | |
| // Basic usage | |
| echo "This is an example string. Nothing fancy." . "\n"; | |
| echo url_slug("This is an example string. Nothing fancy.") . "\n\n"; | |
| // Example using French with unwanted characters ('?) | |
| echo "Qu'en est-il français? Ça marche alors?" . "\n"; | |
| echo url_slug("Qu'en est-il français? Ça marche alors?") . "\n\n"; | |
| // Example using transliteration | |
| echo "Что делать, если я не хочу, UTF-8?" . "\n"; | |
| echo url_slug("Что делать, если я не хочу, UTF-8?", array('transliterate' => true)) . "\n\n"; | |
| // Example using transliteration on an unsupported language | |
| echo "מה אם אני לא רוצה UTF-8 תווים?" . "\n"; | |
| echo url_slug("מה אם אני לא רוצה UTF-8 תווים?", array('transliterate' => true)) . "\n\n"; | |
| // Some other options | |
| echo "This is an Example String. What's Going to Happen to Me?" . "\n"; | |
| echo url_slug( | |
| "This is an Example String. What's Going to Happen to Me?", | |
| array( | |
| 'delimiter' => '_', | |
| 'limit' => 40, | |
| 'lowercase' => false, | |
| 'replacements' => array( | |
| '/\b(an)\b/i' => 'a', | |
| '/\b(example)\b/i' => 'Test' | |
| ) | |
| ) | |
| ); | |
| /* | |
| Output: | |
| This is an example string. Nothing fancy. | |
| this-is-an-example-string-nothing-fancy | |
| Qu'en est-il français? Ça marche alors? | |
| qu-en-est-il-français-ça-marche-alors | |
| Что делать, если я не хочу, UTF-8? | |
| chto-delat-esli-ya-ne-hochu-utf-8 | |
| מה אם אני לא רוצה UTF-8 תווים? | |
| מה-אם-אני-לא-רוצה-utf-8-תווים | |
| This is an Example String. What's Going to Happen to Me? | |
| This_is_a_Test_String_What_s_Going_to_Ha | |
| */ | |
| ?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment