#region Copyright (c) 2004, Ryan Whitaker /********************************************************************************* ' ' Copyright (c) 2004 Ryan Whitaker ' ' This software is provided 'as-is', without any express or implied warranty. In no ' event will the authors be held liable for any damages arising from the use of this ' software. ' ' Permission is granted to anyone to use this software for any purpose, including ' commercial applications, and to alter it and redistribute it freely, subject to the ' following restrictions: ' ' 1. The origin of this software must not be misrepresented; you must not claim that ' you wrote the original software. If you use this software in a product, an ' acknowledgment (see the following) in the product documentation is required. ' ' This product uses software written by the developers of NClassifier ' (http://nclassifier.sourceforge.net). NClassifier is a .NET port of the Nick ' Lothian's Java text classification engine, Classifier4J ' (http://classifier4j.sourceforge.net). ' ' 2. Altered source versions must be plainly marked as such, and must not be ' misrepresented as being the original software. ' ' 3. This notice may not be removed or altered from any source distribution. ' '********************************************************************************/ #endregion using System; using System.Collections; using System.Text.RegularExpressions; namespace NClassifier { public class Utilities { /// <summary> /// Gets an array of sentences. /// </summary> /// <param name="input">A string that contains sentences.</param> /// <returns>An array of strings, each element containing a sentence.</returns> public static string[] GetSentences(string input) { if (input == null) return new string[0]; else { // split on a ".", a "!", a "?" followed by a space or EOL // the original Java regex was (.|!|?)+(s|z) string[] result = Regex.Split(input, @"(?:.|!|?)+(?:s+|z)"); // hacky... doing this to pass the unit tests ArrayList list = new ArrayList(); foreach (string s in result) if (s.Length > 0) list.Add(s); return (string[])list.ToArray(typeof(string)); } } } }