Correctly encode a name for a URL.

image_pdfimage_print
   
 

//CruiseControl is open source software and is developed and maintained by a group of dedicated volunteers. 
//CruiseControl is distributed under a BSD-style license.
//http://cruisecontrol.sourceforge.net/
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;

namespace ThoughtWorks.CruiseControl.Core.Util
{
    /// <summary>
    /// Class with handy stirng routines
    /// </summary>
    public class StringUtil
    {
        private static readonly Regex urlEncodeRegex = new Regex("[^a-zA-Z0-9.-_~]", RegexOptions.Compiled);

        // public for testing only
        public const string DEFAULT_DELIMITER = ",";


        /// <summary>
        /// Correctly encode a name for a URL.
        /// </summary>
        /// <param name="name">The name to encode.</param>
        /// <returns>The encoded name.</returns>
        /// <remarks>
        /// <para>
        /// HttpUtility.UrlEncode does not correctly encode for a URL, spaces get converted into 
        /// pluses, which can cause security errors.
        /// </para>
        /// <para>
        /// This method will encode characters according to RFC 3986. This means only the following 
        /// characters are allowed un-encoded:
        /// </para>
        /// <para>
        /// A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s 
        /// t u v w x y z 0 1 2 3 4 5 6 7 8 9 - _ . ~
        /// </para>
        /// <para>
        /// However, since the encoding only uses two-hex digits, it is not possible to encode non-ASCII
        /// characters using this approach. Therefore we are using the RFC 3986 recommendation and assuming
        /// the string will be using UTF-8 encoding and leaving the characters as they are.
        /// </para>
        /// </remarks>
        public static string UrlEncodeName(string name)
        {
            var encodedName = urlEncodeRegex.Replace(name, (match) =>
            {
                var charValue = (int)match.Value[0];
                var value = charValue >= 255 ? match.Value : "%" + string.Format("{0:x2}", charValue);
                return value;
            });
            return encodedName;
        }
    }
}

   
     


This entry was posted in C# Network. Bookmark the permalink.