using System.Collections; using System.Collections.Generic; using System.Text; namespace MLEM.Font { /// /// A code point source is a wrapper around a or that allows retrieving UTF-32 code points at a given index using . Additionally, it allows enumerating every code point in the underlying or . This class also contains , which converts a code point into its representation, but caches the result to avoid allocating excess memory. /// public readonly struct CodePointSource : IEnumerable { private static readonly Dictionary StringCache = new Dictionary(); private readonly string strg; private readonly StringBuilder builder; private char this[int index] => this.strg?[index] ?? this.builder[index]; /// /// The length of this code point, in characters. /// Note that this is not representative of the amount of code points in this source. /// public int Length => this.strg?.Length ?? this.builder.Length; /// /// Creates a new code point source from the given . /// /// The whose code points to inspect. public CodePointSource(string strg) { this.strg = strg; this.builder = null; } /// /// Creates a new code point source from the given . /// /// The whose code points to inspect. public CodePointSource(StringBuilder builder) { this.strg = null; this.builder = builder; } /// /// Returns the code point at the given in this code point source's underlying string, where the index is measured in characters and not code points. /// The resulting code point will either be a single cast to an , at which point the returned length will be 1, or a UTF-32 character made up of two values, at which point the returned length will be 2. /// /// The index at which to return the code point, which is measured in characters. /// Whether the represents a low surrogate. If this is , the represents a high surrogate and the low surrogate will be looked for in the following character. If this is , the represents a low surrogate and the high surrogate will be looked for in the previous character. /// The code point at the given location, as well as its length. public (int CodePoint, int Length) GetCodePoint(int index, bool indexLowSurrogate = false) { var curr = this[index]; if (indexLowSurrogate) { if (index > 0) { var high = this[index - 1]; if (char.IsSurrogatePair(high, curr)) return (char.ConvertToUtf32(high, curr), 2); } } else { if (index < this.Length - 1) { var low = this[index + 1]; if (char.IsSurrogatePair(curr, low)) return (char.ConvertToUtf32(curr, low), 2); } } return (curr, 1); } /// /// Returns an index in this code point source that is as close to as possible, but not between two members of a surrogate pair. If the is already not between surrogate pairs, it is returned unchanged. /// /// The index to ensure is not between surrogates. /// Whether the returned index should be increased by 1 (instead of decreased by 1) when it is between surrogates. /// An index close to , but not between surrogates. public int EnsureSurrogateBoundary(int index, bool increase) { if (index < this.Length && char.IsLowSurrogate(this[index])) return increase || index <= 0 ? index + 1 : index - 1; return index; } /// Returns an enumerator that iterates through the collection. /// A that can be used to iterate through the collection. /// 1 public IEnumerator GetEnumerator() { var index = 0; while (index < this.Length) { var (codePoint, length) = this.GetCodePoint(index); yield return codePoint; index += length; } } /// Returns an enumerator that iterates through a collection. /// An object that can be used to iterate through the collection. /// 2 IEnumerator IEnumerable.GetEnumerator() { return this.GetEnumerator(); } /// /// Converts the given UTF-32 into a string using , but caches the result in a cache to avoid allocating excess memory. /// /// The UTF-32 code point to convert. /// The string representation of the code point. public static string ToString(int codePoint) { if (!CodePointSource.StringCache.TryGetValue(codePoint, out var ret)) { ret = char.ConvertFromUtf32(codePoint); CodePointSource.StringCache.Add(codePoint, ret); } return ret; } } }