1
0
Fork 0
mirror of https://github.com/Ellpeck/MLEM.git synced 2024-11-22 04:53:29 +01:00

Improved text formatter tokenization performance

This commit is contained in:
Ell 2023-10-14 18:58:32 +02:00
parent 5e2f48db9d
commit 476e1dd2a6
5 changed files with 17 additions and 20 deletions

View file

@ -23,6 +23,7 @@ Additions
Improvements Improvements
- Stopped the text formatter throwing if a color can't be parsed - Stopped the text formatter throwing if a color can't be parsed
- Improved text formatter tokenization performance
Fixes Fixes
- Fixed TextInput not working correctly when using surrogate pairs - Fixed TextInput not working correctly when using surrogate pairs

View file

@ -23,9 +23,9 @@ namespace MLEM.Formatting.Codes {
public readonly Match Match; public readonly Match Match;
/// <summary> /// <summary>
/// The tokens that this formatting code is a part of. /// The tokens that this formatting code is a part of.
/// Note that this array only has multiple entries if additional tokens have to be started while this code is still applied. /// Note that this collection only has multiple entries if additional tokens have to be started while this code is still applied.
/// </summary> /// </summary>
public IList<Token> Tokens { get; internal set; } public readonly List<Token> Tokens = new List<Token>();
/// <summary> /// <summary>
/// Creates a new formatting code based on a formatting code regex and its match. /// Creates a new formatting code based on a formatting code regex and its match.

View file

@ -156,11 +156,12 @@ namespace MLEM.Formatting {
// resolve macros // resolve macros
s = this.ResolveMacros(s); s = this.ResolveMacros(s);
var tokens = new List<Token>(); var tokens = new List<Token>();
var codes = new List<Code>(); var applied = new List<Code>();
var allCodes = new List<Code>();
// add the formatting code right at the start of the string // add the formatting code right at the start of the string
var firstCode = this.GetNextCode(s, 0, 0); var firstCode = this.GetNextCode(s, 0, 0);
if (firstCode != null) if (firstCode != null)
codes.Add(firstCode); applied.Add(firstCode);
var index = 0; var index = 0;
var rawIndex = 0; var rawIndex = 0;
while (rawIndex < s.Length) { while (rawIndex < s.Length) {
@ -168,24 +169,25 @@ namespace MLEM.Formatting {
// if we've reached the end of the string // if we've reached the end of the string
if (next == null) { if (next == null) {
var sub = s.Substring(rawIndex, s.Length - rawIndex); var sub = s.Substring(rawIndex, s.Length - rawIndex);
tokens.Add(new Token(codes.ToArray(), index, rawIndex, TextFormatter.StripFormatting(font, sub, codes), sub)); tokens.Add(new Token(applied.ToArray(), index, rawIndex, TextFormatter.StripFormatting(font, sub, applied), sub));
break; break;
} }
allCodes.Add(next);
// create a new token for the content up to the next code // create a new token for the content up to the next code
var ret = s.Substring(rawIndex, next.Match.Index - rawIndex); var ret = s.Substring(rawIndex, next.Match.Index - rawIndex);
var strippedRet = TextFormatter.StripFormatting(font, ret, codes); var strippedRet = TextFormatter.StripFormatting(font, ret, applied);
tokens.Add(new Token(codes.ToArray(), index, rawIndex, strippedRet, ret)); tokens.Add(new Token(applied.ToArray(), index, rawIndex, strippedRet, ret));
// move to the start of the next code // move to the start of the next code
rawIndex = next.Match.Index; rawIndex = next.Match.Index;
index += strippedRet.Length; index += strippedRet.Length;
// remove all codes that are incompatible with the next one and apply it // remove all codes that are incompatible with the next one and apply it
codes.RemoveAll(c => c.EndsHere(next) || next.EndsOther(c)); applied.RemoveAll(c => c.EndsHere(next) || next.EndsOther(c));
codes.Add(next); applied.Add(next);
} }
return new TokenizedString(font, alignment, s, TextFormatter.StripFormatting(font, s, tokens.SelectMany(t => t.AppliedCodes)), tokens.ToArray()); return new TokenizedString(font, alignment, s, TextFormatter.StripFormatting(font, s, allCodes), tokens.ToArray(), allCodes.ToArray());
} }
/// <summary> /// <summary>

View file

@ -50,6 +50,8 @@ namespace MLEM.Formatting {
this.RawIndex = rawIndex; this.RawIndex = rawIndex;
this.Substring = substring; this.Substring = substring;
this.RawSubstring = rawSubstring; this.RawSubstring = rawSubstring;
foreach (var code in appliedCodes)
code.Tokens.Add(this);
} }
/// <summary> /// <summary>

View file

@ -1,11 +1,9 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.Collections.ObjectModel;
using System.Linq; using System.Linq;
using System.Text; using System.Text;
using Microsoft.Xna.Framework; using Microsoft.Xna.Framework;
using Microsoft.Xna.Framework.Graphics; using Microsoft.Xna.Framework.Graphics;
using MLEM.Extensions;
using MLEM.Font; using MLEM.Font;
using MLEM.Formatting.Codes; using MLEM.Formatting.Codes;
using MLEM.Misc; using MLEM.Misc;
@ -42,17 +40,11 @@ namespace MLEM.Formatting {
private float initialInnerOffset; private float initialInnerOffset;
private RectangleF area; private RectangleF area;
internal TokenizedString(GenericFont font, TextAlignment alignment, string rawString, string strg, Token[] tokens) { internal TokenizedString(GenericFont font, TextAlignment alignment, string rawString, string strg, Token[] tokens, Code[] allCodes) {
this.RawString = rawString; this.RawString = rawString;
this.String = strg; this.String = strg;
this.Tokens = tokens; this.Tokens = tokens;
this.AllCodes = allCodes;
// since a code can be present in multiple tokens, we use Distinct here
this.AllCodes = tokens.SelectMany(t => t.AppliedCodes).Distinct().ToArray();
// TODO this can probably be optimized by keeping track of a code's tokens while tokenizing
foreach (var code in this.AllCodes)
code.Tokens = new ReadOnlyCollection<Token>(this.Tokens.Where(t => t.AppliedCodes.Contains(code)).ToList());
this.Realign(font, alignment); this.Realign(font, alignment);
} }