diff --git a/CHANGELOG.md b/CHANGELOG.md index 4826f55..0cd7aae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ Additions Improvements - Stopped the text formatter throwing if a color can't be parsed +- Improved text formatter tokenization performance Fixes - Fixed TextInput not working correctly when using surrogate pairs diff --git a/MLEM/Formatting/Codes/Code.cs b/MLEM/Formatting/Codes/Code.cs index 8b0f75e..393b149 100644 --- a/MLEM/Formatting/Codes/Code.cs +++ b/MLEM/Formatting/Codes/Code.cs @@ -23,9 +23,9 @@ namespace MLEM.Formatting.Codes { public readonly Match Match; /// /// The tokens that this formatting code is a part of. - /// Note that this array only has multiple entries if additional tokens have to be started while this code is still applied. + /// Note that this collection only has multiple entries if additional tokens have to be started while this code is still applied. /// - public IList Tokens { get; internal set; } + public readonly List Tokens = new List(); /// /// Creates a new formatting code based on a formatting code regex and its match. diff --git a/MLEM/Formatting/TextFormatter.cs b/MLEM/Formatting/TextFormatter.cs index 1407572..f17124d 100644 --- a/MLEM/Formatting/TextFormatter.cs +++ b/MLEM/Formatting/TextFormatter.cs @@ -156,11 +156,12 @@ namespace MLEM.Formatting { // resolve macros s = this.ResolveMacros(s); var tokens = new List(); - var codes = new List(); + var applied = new List(); + var allCodes = new List(); // add the formatting code right at the start of the string var firstCode = this.GetNextCode(s, 0, 0); if (firstCode != null) - codes.Add(firstCode); + applied.Add(firstCode); var index = 0; var rawIndex = 0; while (rawIndex < s.Length) { @@ -168,24 +169,25 @@ namespace MLEM.Formatting { // if we've reached the end of the string if (next == null) { var sub = s.Substring(rawIndex, s.Length - rawIndex); - tokens.Add(new Token(codes.ToArray(), index, rawIndex, TextFormatter.StripFormatting(font, sub, codes), sub)); + tokens.Add(new Token(applied.ToArray(), index, rawIndex, TextFormatter.StripFormatting(font, sub, applied), sub)); break; } + allCodes.Add(next); // create a new token for the content up to the next code var ret = s.Substring(rawIndex, next.Match.Index - rawIndex); - var strippedRet = TextFormatter.StripFormatting(font, ret, codes); - tokens.Add(new Token(codes.ToArray(), index, rawIndex, strippedRet, ret)); + var strippedRet = TextFormatter.StripFormatting(font, ret, applied); + tokens.Add(new Token(applied.ToArray(), index, rawIndex, strippedRet, ret)); // move to the start of the next code rawIndex = next.Match.Index; index += strippedRet.Length; // remove all codes that are incompatible with the next one and apply it - codes.RemoveAll(c => c.EndsHere(next) || next.EndsOther(c)); - codes.Add(next); + applied.RemoveAll(c => c.EndsHere(next) || next.EndsOther(c)); + applied.Add(next); } - return new TokenizedString(font, alignment, s, TextFormatter.StripFormatting(font, s, tokens.SelectMany(t => t.AppliedCodes)), tokens.ToArray()); + return new TokenizedString(font, alignment, s, TextFormatter.StripFormatting(font, s, allCodes), tokens.ToArray(), allCodes.ToArray()); } /// diff --git a/MLEM/Formatting/Token.cs b/MLEM/Formatting/Token.cs index 029c8ca..6280351 100644 --- a/MLEM/Formatting/Token.cs +++ b/MLEM/Formatting/Token.cs @@ -50,6 +50,8 @@ namespace MLEM.Formatting { this.RawIndex = rawIndex; this.Substring = substring; this.RawSubstring = rawSubstring; + foreach (var code in appliedCodes) + code.Tokens.Add(this); } /// diff --git a/MLEM/Formatting/TokenizedString.cs b/MLEM/Formatting/TokenizedString.cs index db49998..f27317b 100644 --- a/MLEM/Formatting/TokenizedString.cs +++ b/MLEM/Formatting/TokenizedString.cs @@ -1,11 +1,9 @@ using System; using System.Collections.Generic; -using System.Collections.ObjectModel; using System.Linq; using System.Text; using Microsoft.Xna.Framework; using Microsoft.Xna.Framework.Graphics; -using MLEM.Extensions; using MLEM.Font; using MLEM.Formatting.Codes; using MLEM.Misc; @@ -42,17 +40,11 @@ namespace MLEM.Formatting { private float initialInnerOffset; private RectangleF area; - internal TokenizedString(GenericFont font, TextAlignment alignment, string rawString, string strg, Token[] tokens) { + internal TokenizedString(GenericFont font, TextAlignment alignment, string rawString, string strg, Token[] tokens, Code[] allCodes) { this.RawString = rawString; this.String = strg; this.Tokens = tokens; - - // since a code can be present in multiple tokens, we use Distinct here - this.AllCodes = tokens.SelectMany(t => t.AppliedCodes).Distinct().ToArray(); - // TODO this can probably be optimized by keeping track of a code's tokens while tokenizing - foreach (var code in this.AllCodes) - code.Tokens = new ReadOnlyCollection(this.Tokens.Where(t => t.AppliedCodes.Contains(code)).ToList()); - + this.AllCodes = allCodes; this.Realign(font, alignment); }