using System.Collections;
using System.Collections.Generic;
using System.Text;
namespace MLEM.Font {
///
/// A code point source is a wrapper around a or that allows retrieving UTF-32 code points at a given index using . Additionally, it allows enumerating every code point in the underlying or . This class also contains , which converts a code point into its representation, but caches the result to avoid allocating excess memory.
///
public readonly struct CodePointSource : IEnumerable {
private static readonly Dictionary StringCache = new Dictionary();
private readonly string strg;
private readonly StringBuilder builder;
private char this[int index] => this.strg?[index] ?? this.builder[index];
///
/// The length of this code point, in characters.
/// Note that this is not representative of the amount of code points in this source.
///
public int Length => this.strg?.Length ?? this.builder.Length;
///
/// Creates a new code point source from the given .
///
/// The whose code points to inspect.
public CodePointSource(string strg) {
this.strg = strg;
this.builder = null;
}
///
/// Creates a new code point source from the given .
///
/// The whose code points to inspect.
public CodePointSource(StringBuilder builder) {
this.strg = null;
this.builder = builder;
}
///
/// Returns the code point at the given in this code point source's underlying string, where the index is measured in characters and not code points.
/// The resulting code point will either be a single cast to an , at which point the returned length will be 1, or a UTF-32 character made up of two values, at which point the returned length will be 2.
///
/// The index at which to return the code point, which is measured in characters.
/// Whether the represents a low surrogate. If this is , the represents a high surrogate and the low surrogate will be looked for in the following character. If this is , the represents a low surrogate and the high surrogate will be looked for in the previous character.
/// The code point at the given location, as well as its length.
public (int CodePoint, int Length) GetCodePoint(int index, bool indexLowSurrogate = false) {
var curr = this[index];
if (indexLowSurrogate) {
if (index > 0) {
var high = this[index - 1];
if (char.IsSurrogatePair(high, curr))
return (char.ConvertToUtf32(high, curr), 2);
}
} else {
if (index < this.Length - 1) {
var low = this[index + 1];
if (char.IsSurrogatePair(curr, low))
return (char.ConvertToUtf32(curr, low), 2);
}
}
return (curr, 1);
}
///
/// Returns an index in this code point source that is as close to as possible, but not between two members of a surrogate pair. If the is already not between surrogate pairs, it is returned unchanged.
///
/// The index to ensure is not between surrogates.
/// Whether the returned index should be increased by 1 (instead of decreased by 1) when it is between surrogates.
/// An index close to , but not between surrogates.
public int EnsureSurrogateBoundary(int index, bool increase) {
if (index < this.Length && char.IsLowSurrogate(this[index]))
return increase || index <= 0 ? index + 1 : index - 1;
return index;
}
/// Returns an enumerator that iterates through the collection.
/// A that can be used to iterate through the collection.
/// 1
public IEnumerator GetEnumerator() {
var index = 0;
while (index < this.Length) {
var (codePoint, length) = this.GetCodePoint(index);
yield return codePoint;
index += length;
}
}
/// Returns an enumerator that iterates through a collection.
/// An object that can be used to iterate through the collection.
/// 2
IEnumerator IEnumerable.GetEnumerator() {
return this.GetEnumerator();
}
///
/// Converts the given UTF-32 into a string using , but caches the result in a cache to avoid allocating excess memory.
///
/// The UTF-32 code point to convert.
/// The string representation of the code point.
public static string ToString(int codePoint) {
if (!CodePointSource.StringCache.TryGetValue(codePoint, out var ret)) {
ret = char.ConvertFromUtf32(codePoint);
CodePointSource.StringCache.Add(codePoint, ret);
}
return ret;
}
}
}