Improve HTML formatter

This commit is contained in:
aringenbach
2022-06-09 17:29:14 +02:00
committed by aringenbach
parent 0015993b8e
commit f8cd8fa96c
10 changed files with 230 additions and 271 deletions
@@ -0,0 +1,111 @@
//
// Copyright 2021 New Vector Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
import Foundation
import DTCoreText
import UIKit
@objcMembers
class HTMLFormatter: NSObject {
/// Builds an attributed string from a string containing html.
///
/// - Parameters:
/// - htmlString: The html string to use.
/// - allowedTags: The html tags that should be allowed.
/// - imageHandler: The image handler for the formatted string
/// - extraOptions: Extra options to apply for the format
/// - font: The default font to use.
/// - postFormatOperations: Optional block to provide operations to apply
/// - Returns: The built `NSAttributedString`.
/// - Note: It is recommended to include "p" and "body" tags in `allowedTags` as these are often added when parsing.
func formatHTML(_ htmlString: String,
withAllowedTags allowedTags: [String],
andImageHandler imageHandler: DTHTMLElement.ImageHandler? = nil,
extraOptions: [AnyHashable: Any] = [:],
font: UIFont,
postFormatOperations: ((NSMutableAttributedString) -> Void)? = nil) -> NSAttributedString {
guard let data = htmlString.data(using: .utf8) else {
return NSAttributedString(string: htmlString)
}
let sanitizeCallback: DTHTMLAttributedStringBuilderWillFlushCallback = { [allowedTags, font] (element: DTHTMLElement?) in
element?.sanitize(with: allowedTags, bodyFont: font, imageHandler: imageHandler)
}
var options: [AnyHashable: Any] = [
DTUseiOS6Attributes: true,
DTDefaultFontFamily: font.familyName,
DTDefaultFontName: font.fontName,
DTDefaultFontSize: font.pointSize,
DTDefaultLinkDecoration: false,
DTWillFlushBlockCallBack: sanitizeCallback
]
options.merge(extraOptions) { (_, new) in new }
guard let string = self.formatHTML(data, options: options) else {
return NSAttributedString(string: htmlString)
}
let mutableString = NSMutableAttributedString(attributedString: string)
MXKTools.removeDTCoreTextArtifacts(mutableString)
postFormatOperations?(mutableString)
return mutableString
}
/// Builds an attributed string by replacing a `%@` placeholder with the supplied link text and URL.
/// - Parameters:
/// - string: The string to be formatted.
/// - link: The link text to be inserted.
/// - url: The URL to be linked to.
/// - Returns: An attributed string.
func format(_ string: String, with link: String, using url: URL) -> NSAttributedString {
let baseString = NSMutableAttributedString(string: string)
let attributedLink = NSAttributedString(string: link, attributes: [.link: url])
let linkRange = (baseString.string as NSString).range(of: "%@")
baseString.replaceCharacters(in: linkRange, with: attributedLink)
return baseString
}
}
extension HTMLFormatter {
/// This replicates DTCoreText's NSAttributedString `initWithHTMLData`.
/// It sets the sanitize callback on the builder from Swift to avoid EXC_BAD_ACCESS crashes.
///
/// - Parameters:
/// - data: The data in HTML format from which to create the attributed string.
/// - options: Specifies how the document should be loaded.
/// - Returns: Returns an initialized object, or `nil` if the data cant be decoded.
@objc func formatHTML(_ data: Data,
options: [AnyHashable: Any]) -> NSAttributedString? {
guard !data.isEmpty else {
return nil
}
let stringBuilder = DTHTMLAttributedStringBuilder(html: data,
options: options,
// DTCoreText doesn't use document attributes anyway
documentAttributes: nil)
if let willFlushCallback = options[DTWillFlushBlockCallBack] as? DTHTMLAttributedStringBuilderWillFlushCallback {
stringBuilder?.willFlushCallback = willFlushCallback
}
return stringBuilder?.generatedAttributedString()
}
}
@@ -28,6 +28,7 @@
#import "MXRoom+Sync.h"
#import "MXKRoomNameStringLocalizer.h"
#import "GeneratedInterface-Swift.h"
static NSString *const kHTMLATagRegexPattern = @"<a href=(?:'|\")(.*?)(?:'|\")>([^<]*)</a>";
@@ -1723,7 +1724,8 @@ static NSString *const kHTMLATagRegexPattern = @"<a href=(?:'|\")(.*?)(?:'|\")>(
{
// body font is the same with the whole string font, no need to change body font
// apply additional treatments
return [self postRenderAttributedString:str];
[self postRenderAttributedString:str];
return str;
}
NSString *body;
@@ -1740,13 +1742,14 @@ static NSString *const kHTMLATagRegexPattern = @"<a href=(?:'|\")(.*?)(?:'|\")>(
{
// body not found in the whole string
// apply additional treatments
return [self postRenderAttributedString:str];
[self postRenderAttributedString:str];
return str;
}
NSMutableAttributedString *mutableStr = [str mutableCopy];
[mutableStr addAttribute:NSFontAttributeName value:fontForBody range:bodyRange];
[str addAttribute:NSFontAttributeName value:fontForBody range:bodyRange];
// apply additional treatments
return [self postRenderAttributedString:mutableStr];
[self postRenderAttributedString:str];
return str;
}
- (NSAttributedString*)renderHTMLString:(NSString*)htmlString forEvent:(MXEvent*)event withRoomState:(MXRoomState*)roomState
@@ -1769,81 +1772,25 @@ static NSString *const kHTMLATagRegexPattern = @"<a href=(?:'|\")(.*?)(?:'|\")>(
// Apply the css style that corresponds to the event state
UIFont *fontForWholeString = [self fontForEvent:event string:htmlString];
// Do some sanitisation before finalizing the string
HTMLFormatter *htmlFormatter = [[HTMLFormatter alloc] init];
MXWeakify(self);
DTHTMLAttributedStringBuilderWillFlushCallback sanitizeCallback = ^(DTHTMLElement *element) {
NSAttributedString *str = [htmlFormatter formatHTML:html
withAllowedTags:_allowedHTMLTags
andImageHandler:_htmlImageHandler
extraOptions:@{ DTDefaultTextColor: [self textColorForEvent:event],
DTDefaultStyleSheet: dtCSS }
font:fontForWholeString
postFormatOperations:^(NSMutableAttributedString *mutableStr) {
MXStrongifyAndReturnIfNil(self);
[element sanitizeWith:self.allowedHTMLTags bodyFont:fontForWholeString imageHandler:self.htmlImageHandler];
};
[self postFormatMutableAttributedString:mutableStr
forEvent:event
andRepliedEvent:repliedEvent
defaultFont:fontForWholeString];
}];
NSDictionary *options = @{
DTUseiOS6Attributes: @(YES), // Enable it to be able to display the attributed string in a UITextView
DTDefaultFontFamily: fontForWholeString.familyName,
DTDefaultFontName: fontForWholeString.fontName,
DTDefaultFontSize: @(fontForWholeString.pointSize),
DTDefaultTextColor: [self textColorForEvent:event],
DTDefaultLinkDecoration: @(NO),
DTDefaultStyleSheet: dtCSS,
DTWillFlushBlockCallBack: sanitizeCallback
};
// Do not use the default HTML renderer of NSAttributedString because this method
// runs on the UI thread which we want to avoid because renderHTMLString is called
// most of the time from a background thread.
// Use DTCoreText HTML renderer instead.
// Using DTCoreText, which renders static string, helps to avoid code injection attacks
// that could happen with the default HTML renderer of NSAttributedString which is a
// webview.
NSAttributedString *str = [[NSAttributedString alloc] initWithHTMLData:[html dataUsingEncoding:NSUTF8StringEncoding] options:options documentAttributes:NULL];
// Apply additional treatments
str = [self postRenderAttributedString:str];
// Finalize the attributed string by removing DTCoreText artifacts (Trim trailing newlines).
str = [MXKTools removeDTCoreTextArtifacts:str];
// Finalize HTML blockquote blocks marking
str = [MXKTools removeMarkedBlockquotesArtifacts:str];
if (repliedEvent && repliedEvent.isRedactedEvent)
{
// Replace the description of an empty replied event
NSMutableAttributedString *mutableStr = [[NSMutableAttributedString alloc] initWithAttributedString:str];
NSRange nullRange = [mutableStr.string rangeOfString:@"(null)"];
if (nullRange.location != NSNotFound)
{
[mutableStr replaceCharactersInRange:nullRange withAttributedString:[self redactedMessageReplacementAttributedString]];
str = mutableStr;
}
}
UIFont *fontForBody = [self fontForEvent:event string:nil];
if ([fontForWholeString isEqual:fontForBody])
{
// body font is the same with the whole string font, no need to change body font
return str;
}
NSString *body;
if (event.content[kMXMessageContentKeyNewContent])
{
MXJSONModelSetString(body, event.content[kMXMessageContentKeyNewContent][kMXMessageBodyKey]);
}
else
{
MXJSONModelSetString(body, event.content[kMXMessageBodyKey]);
}
NSRange bodyRange = [str.string rangeOfString:body];
if (bodyRange.location == NSNotFound)
{
// body not found in the whole string
return str;
}
NSMutableAttributedString *mutableStr = [str mutableCopy];
[mutableStr addAttribute:NSFontAttributeName value:fontForBody range:bodyRange];
return mutableStr;
return str;
}
- (NSAttributedString*)redactedMessageReplacementAttributedString
@@ -2017,11 +1964,55 @@ static NSString *const kHTMLATagRegexPattern = @"<a href=(?:'|\")(.*?)(?:'|\")>(
return html;
}
- (NSAttributedString*)postRenderAttributedString:(NSAttributedString*)attributedString
- (void)postFormatMutableAttributedString:(NSMutableAttributedString*)mutableAttributedString
forEvent:(MXEvent*)event
andRepliedEvent:(MXEvent*)repliedEvent
defaultFont:(UIFont*)defaultFont
{
if (!attributedString)
[self postRenderAttributedString:mutableAttributedString];
[MXKTools removeMarkedBlockquotesArtifacts:mutableAttributedString];
if (repliedEvent && repliedEvent.isRedactedEvent)
{
return nil;
// Replace the description of an empty replied event
NSRange nullRange = [mutableAttributedString.string rangeOfString:@"(null)"];
if (nullRange.location != NSNotFound)
{
[mutableAttributedString replaceCharactersInRange:nullRange withAttributedString:[self redactedMessageReplacementAttributedString]];
}
}
UIFont *fontForBody = [self fontForEvent:event string:nil];
if ([defaultFont isEqual:fontForBody])
{
// body font is the same with the whole string font, no need to change body font
return;
}
NSString *body;
if (event.content[kMXMessageContentKeyNewContent])
{
MXJSONModelSetString(body, event.content[kMXMessageContentKeyNewContent][kMXMessageBodyKey]);
}
else
{
MXJSONModelSetString(body, event.content[kMXMessageBodyKey]);
}
NSRange bodyRange = [mutableAttributedString.string rangeOfString:body];
if (bodyRange.location == NSNotFound)
{
// body not found in the whole string
return;
}
[mutableAttributedString addAttribute:NSFontAttributeName value:fontForBody range:bodyRange];
}
- (void)postRenderAttributedString:(NSMutableAttributedString*)mutableAttributedString
{
if (!mutableAttributedString)
{
return;
}
NSInteger enabledMatrixIdsBitMask= 0;
@@ -2056,7 +2047,7 @@ static NSString *const kHTMLATagRegexPattern = @"<a href=(?:'|\")(.*?)(?:'|\")>(
enabledMatrixIdsBitMask |= MXKTOOLS_GROUP_IDENTIFIER_BITWISE;
}
return [MXKTools createLinksInAttributedString:attributedString forEnabledMatrixIds:enabledMatrixIdsBitMask];
[MXKTools createLinksInMutableAttributedString:mutableAttributedString forEnabledMatrixIds:enabledMatrixIdsBitMask];
}
- (NSAttributedString *)renderString:(NSString *)string withPrefix:(NSString *)prefix forEvent:(MXEvent *)event