mirror of
https://github.com/Ellpeck/MLEM.git
synced 2024-11-22 12:58:33 +01:00
Improved text formatter tokenization performance
This commit is contained in:
parent
5e2f48db9d
commit
476e1dd2a6
5 changed files with 17 additions and 20 deletions
|
@ -23,6 +23,7 @@ Additions
|
||||||
|
|
||||||
Improvements
|
Improvements
|
||||||
- Stopped the text formatter throwing if a color can't be parsed
|
- Stopped the text formatter throwing if a color can't be parsed
|
||||||
|
- Improved text formatter tokenization performance
|
||||||
|
|
||||||
Fixes
|
Fixes
|
||||||
- Fixed TextInput not working correctly when using surrogate pairs
|
- Fixed TextInput not working correctly when using surrogate pairs
|
||||||
|
|
|
@ -23,9 +23,9 @@ namespace MLEM.Formatting.Codes {
|
||||||
public readonly Match Match;
|
public readonly Match Match;
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The tokens that this formatting code is a part of.
|
/// The tokens that this formatting code is a part of.
|
||||||
/// Note that this array only has multiple entries if additional tokens have to be started while this code is still applied.
|
/// Note that this collection only has multiple entries if additional tokens have to be started while this code is still applied.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
public IList<Token> Tokens { get; internal set; }
|
public readonly List<Token> Tokens = new List<Token>();
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Creates a new formatting code based on a formatting code regex and its match.
|
/// Creates a new formatting code based on a formatting code regex and its match.
|
||||||
|
|
|
@ -156,11 +156,12 @@ namespace MLEM.Formatting {
|
||||||
// resolve macros
|
// resolve macros
|
||||||
s = this.ResolveMacros(s);
|
s = this.ResolveMacros(s);
|
||||||
var tokens = new List<Token>();
|
var tokens = new List<Token>();
|
||||||
var codes = new List<Code>();
|
var applied = new List<Code>();
|
||||||
|
var allCodes = new List<Code>();
|
||||||
// add the formatting code right at the start of the string
|
// add the formatting code right at the start of the string
|
||||||
var firstCode = this.GetNextCode(s, 0, 0);
|
var firstCode = this.GetNextCode(s, 0, 0);
|
||||||
if (firstCode != null)
|
if (firstCode != null)
|
||||||
codes.Add(firstCode);
|
applied.Add(firstCode);
|
||||||
var index = 0;
|
var index = 0;
|
||||||
var rawIndex = 0;
|
var rawIndex = 0;
|
||||||
while (rawIndex < s.Length) {
|
while (rawIndex < s.Length) {
|
||||||
|
@ -168,24 +169,25 @@ namespace MLEM.Formatting {
|
||||||
// if we've reached the end of the string
|
// if we've reached the end of the string
|
||||||
if (next == null) {
|
if (next == null) {
|
||||||
var sub = s.Substring(rawIndex, s.Length - rawIndex);
|
var sub = s.Substring(rawIndex, s.Length - rawIndex);
|
||||||
tokens.Add(new Token(codes.ToArray(), index, rawIndex, TextFormatter.StripFormatting(font, sub, codes), sub));
|
tokens.Add(new Token(applied.ToArray(), index, rawIndex, TextFormatter.StripFormatting(font, sub, applied), sub));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
allCodes.Add(next);
|
||||||
|
|
||||||
// create a new token for the content up to the next code
|
// create a new token for the content up to the next code
|
||||||
var ret = s.Substring(rawIndex, next.Match.Index - rawIndex);
|
var ret = s.Substring(rawIndex, next.Match.Index - rawIndex);
|
||||||
var strippedRet = TextFormatter.StripFormatting(font, ret, codes);
|
var strippedRet = TextFormatter.StripFormatting(font, ret, applied);
|
||||||
tokens.Add(new Token(codes.ToArray(), index, rawIndex, strippedRet, ret));
|
tokens.Add(new Token(applied.ToArray(), index, rawIndex, strippedRet, ret));
|
||||||
|
|
||||||
// move to the start of the next code
|
// move to the start of the next code
|
||||||
rawIndex = next.Match.Index;
|
rawIndex = next.Match.Index;
|
||||||
index += strippedRet.Length;
|
index += strippedRet.Length;
|
||||||
|
|
||||||
// remove all codes that are incompatible with the next one and apply it
|
// remove all codes that are incompatible with the next one and apply it
|
||||||
codes.RemoveAll(c => c.EndsHere(next) || next.EndsOther(c));
|
applied.RemoveAll(c => c.EndsHere(next) || next.EndsOther(c));
|
||||||
codes.Add(next);
|
applied.Add(next);
|
||||||
}
|
}
|
||||||
return new TokenizedString(font, alignment, s, TextFormatter.StripFormatting(font, s, tokens.SelectMany(t => t.AppliedCodes)), tokens.ToArray());
|
return new TokenizedString(font, alignment, s, TextFormatter.StripFormatting(font, s, allCodes), tokens.ToArray(), allCodes.ToArray());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
|
|
@ -50,6 +50,8 @@ namespace MLEM.Formatting {
|
||||||
this.RawIndex = rawIndex;
|
this.RawIndex = rawIndex;
|
||||||
this.Substring = substring;
|
this.Substring = substring;
|
||||||
this.RawSubstring = rawSubstring;
|
this.RawSubstring = rawSubstring;
|
||||||
|
foreach (var code in appliedCodes)
|
||||||
|
code.Tokens.Add(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
|
|
|
@ -1,11 +1,9 @@
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Collections.ObjectModel;
|
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
using Microsoft.Xna.Framework;
|
using Microsoft.Xna.Framework;
|
||||||
using Microsoft.Xna.Framework.Graphics;
|
using Microsoft.Xna.Framework.Graphics;
|
||||||
using MLEM.Extensions;
|
|
||||||
using MLEM.Font;
|
using MLEM.Font;
|
||||||
using MLEM.Formatting.Codes;
|
using MLEM.Formatting.Codes;
|
||||||
using MLEM.Misc;
|
using MLEM.Misc;
|
||||||
|
@ -42,17 +40,11 @@ namespace MLEM.Formatting {
|
||||||
private float initialInnerOffset;
|
private float initialInnerOffset;
|
||||||
private RectangleF area;
|
private RectangleF area;
|
||||||
|
|
||||||
internal TokenizedString(GenericFont font, TextAlignment alignment, string rawString, string strg, Token[] tokens) {
|
internal TokenizedString(GenericFont font, TextAlignment alignment, string rawString, string strg, Token[] tokens, Code[] allCodes) {
|
||||||
this.RawString = rawString;
|
this.RawString = rawString;
|
||||||
this.String = strg;
|
this.String = strg;
|
||||||
this.Tokens = tokens;
|
this.Tokens = tokens;
|
||||||
|
this.AllCodes = allCodes;
|
||||||
// since a code can be present in multiple tokens, we use Distinct here
|
|
||||||
this.AllCodes = tokens.SelectMany(t => t.AppliedCodes).Distinct().ToArray();
|
|
||||||
// TODO this can probably be optimized by keeping track of a code's tokens while tokenizing
|
|
||||||
foreach (var code in this.AllCodes)
|
|
||||||
code.Tokens = new ReadOnlyCollection<Token>(this.Tokens.Where(t => t.AppliedCodes.Contains(code)).ToList());
|
|
||||||
|
|
||||||
this.Realign(font, alignment);
|
this.Realign(font, alignment);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue