Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IGNITE-24038 Slightly optimize TupleMarshallerVarlenOnlyBenchmark #4924

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@ public static String parseSimpleName(String name) {
return name;
}

if (name.indexOf('"') < 0) { // Fast-path without StringBuilder for unquoted names.
if (name.indexOf('.') >= 0 || name.indexOf(' ') >= 0) {
throw new IllegalArgumentException("Fully qualified name is not expected [name=" + name + "]");
}

return name.toUpperCase();
}

var tokenizer = new Tokenizer(name);

String parsedName = tokenizer.nextToken();
Expand Down Expand Up @@ -85,11 +93,11 @@ public static String quote(String name) {
return name;
}

if (name.chars().noneMatch(cp -> cp == '\"')) {
if (name.indexOf('\"') < 0) {
return '\"' + name + '\"';
}

StringBuilder sb = new StringBuilder(name.length() + 2).append('\"');
StringBuilder sb = new StringBuilder(name.length() + 4).append('\"');
for (int currentPosition = 0; currentPosition < name.length(); currentPosition++) {
char ch = name.charAt(currentPosition);
if (ch == '\"') {
Expand Down Expand Up @@ -120,7 +128,7 @@ public static String quoteIfNeeded(String name) {
return name.equals(quote(simpleName)) ? name : quote(name);
}

if (!NAME_PATTER.matcher(name).matches()) {
if (!canonicalOrSimpleName(name)) {
return quote(name);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.time.Period;
import java.util.UUID;
import org.apache.ignite.internal.util.ByteUtils;
import org.apache.ignite.internal.util.StringIntrospection;
import org.jetbrains.annotations.Nullable;

/**
Expand Down Expand Up @@ -690,7 +691,17 @@ private void putString(String value) throws CharacterCodingException {

int begin = buffer.position();

byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
byte[] bytes;
if (StringIntrospection.supportsFastGetLatin1Bytes(value)) {
bytes = StringIntrospection.fastAsciiBytes(value);

if (StringIntrospection.hasNegatives(bytes)) {
bytes = value.getBytes(StandardCharsets.UTF_8);
}
} else {
bytes = value.getBytes(StandardCharsets.UTF_8);
}

putBytes(bytes);

// UTF-8 encoded strings should not start with 0x80 (character codes larger than 127 have a multi-byte encoding).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,35 +17,54 @@

package org.apache.ignite.internal.util;

import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.lang.invoke.VarHandle;
import java.lang.reflect.Field;
import java.nio.charset.StandardCharsets;
import java.util.Optional;
import org.jetbrains.annotations.Nullable;

/**
* Utils for introspecting a String efficiently.
*/
public class StringIntrospection {
private static final boolean USE_UNSAFE_TO_GET_LATIN1_BYTES;
private static final long STRING_CODER_FIELD_OFFSET;
private static final long STRING_VALUE_FIELD_OFFSET;

private static final byte LATIN1 = 0;
private static final VarHandle STRING_CODER;
private static final VarHandle STRING_VALUE;

private static final MethodHandle HAS_NEGATIVES;

private static final long NO_OFFSET = Long.MIN_VALUE;
private static final byte LATIN1 = 0;

static {
Optional<Boolean> maybeCompactStrings = compactStrings();
Optional<Long> maybeCoderFieldOffset = coderFieldOffset();

USE_UNSAFE_TO_GET_LATIN1_BYTES = maybeCompactStrings.isPresent() && maybeCoderFieldOffset.isPresent();
STRING_CODER_FIELD_OFFSET = maybeCoderFieldOffset.orElse(NO_OFFSET);
STRING_CODER = privateStringHandle("coder");
STRING_VALUE = privateStringHandle("value");

MethodHandle hasNegatives;
try {
Class<?> stringCodingClass = Class.forName("java.lang.StringCoding");

hasNegatives = MethodHandles.privateLookupIn(stringCodingClass, MethodHandles.lookup()).findStatic(
stringCodingClass,
"hasNegatives",
MethodType.methodType(boolean.class, new Class[]{byte[].class, int.class, int.class})
);
} catch (Exception e) {
hasNegatives = null;
}

HAS_NEGATIVES = hasNegatives;

Optional<Long> maybeValueFieldOffset = byteValueFieldOffset();
STRING_VALUE_FIELD_OFFSET = maybeValueFieldOffset.orElse(NO_OFFSET);
USE_UNSAFE_TO_GET_LATIN1_BYTES = maybeCompactStrings.orElse(false) && STRING_CODER != null && STRING_VALUE != null;
}

private static Optional<Boolean> compactStrings() {
return compactStringsField()
return stringField("COMPACT_STRINGS")
.map(field -> {
try {
return (Boolean) field.get(null);
Expand All @@ -55,32 +74,28 @@ private static Optional<Boolean> compactStrings() {
});
}

private static Optional<Field> compactStringsField() {
return stringField("COMPACT_STRINGS")
.map(field -> {
field.setAccessible(true);
return field;
});
private static @Nullable VarHandle privateStringHandle(String name) {
return stringField(name).map(field -> {
try {
return MethodHandles.privateLookupIn(String.class, MethodHandles.lookup()).unreflectVarHandle(field);
} catch (IllegalAccessException e) {
return null;
}
}).orElse(null);
}

private static Optional<Field> stringField(String name) {
try {
return Optional.of(String.class.getDeclaredField(name));
return Optional.of(String.class.getDeclaredField(name))
.map(field -> {
field.setAccessible(true);
return field;
});
} catch (NoSuchFieldException e) {
return Optional.empty();
}
}

private static Optional<Long> coderFieldOffset() {
return stringField("coder").map(GridUnsafe::objectFieldOffset);
}

private static Optional<Long> byteValueFieldOffset() {
return stringField("value")
.filter(field -> field.getType() == byte[].class)
.map(GridUnsafe::objectFieldOffset);
}

/**
* Returns {@code true} if the current String is represented as Latin1 internally AND we can get access to that
* representation fast.
Expand All @@ -93,7 +108,7 @@ public static boolean supportsFastGetLatin1Bytes(String str) {
if (!USE_UNSAFE_TO_GET_LATIN1_BYTES) {
return false;
}
return GridUnsafe.getByteField(str, STRING_CODER_FIELD_OFFSET) == LATIN1;
return (byte) STRING_CODER.get(str) == LATIN1;
}

/**
Expand All @@ -104,11 +119,11 @@ public static boolean supportsFastGetLatin1Bytes(String str) {
* as encoding/decoding), it just returns the internal String buffer.
*
* @param str string to work with
* @return byte represenation of an ASCII string
* @return byte representation of an ASCII string
*/
public static byte[] fastAsciiBytes(String str) {
if (STRING_VALUE_FIELD_OFFSET != NO_OFFSET) {
return (byte[]) GridUnsafe.getObjectField(str, STRING_VALUE_FIELD_OFFSET);
if (STRING_VALUE != null) {
return (byte[]) STRING_VALUE.get(str);
} else {
// Fallback: something is different, let's not fail here, just pay a performance penalty.
return str.getBytes(StandardCharsets.US_ASCII);
Expand All @@ -123,17 +138,52 @@ public static byte[] fastAsciiBytes(String str) {
* as encoding/decoding), it just returns the internal String buffer.
*
* @param str string to work with
* @return byte represenation of a Latin1 string
* @return byte representation of a Latin1 string
*/
public static byte[] fastLatin1Bytes(String str) {
if (STRING_VALUE_FIELD_OFFSET != NO_OFFSET) {
return (byte[]) GridUnsafe.getObjectField(str, STRING_VALUE_FIELD_OFFSET);
if (STRING_VALUE != null) {
return (byte[]) STRING_VALUE.get(str);
} else {
// Fallback: something is different, let's not fail here, just pay performance penalty.
return str.getBytes(StandardCharsets.ISO_8859_1);
}
}

/**
* Checks if given array has any negative bytes in it.
*/
public static boolean hasNegatives(byte[] bytes) {
try {
if (HAS_NEGATIVES != null) {
// Must be "invokeExact", everything else is slow.
return (boolean) HAS_NEGATIVES.invokeExact(bytes, 0, bytes.length);
}
} catch (Throwable ignore) {
// No-op.
}

// Fallback algorithm if there's an exception or if method handle is missing.
int length = bytes.length;
int off = 0;

// SWAR optimization to check 8 bytes at once.
for (int limit = length & -Long.BYTES; off < limit; off += Long.BYTES) {
long v = GridUnsafe.getLong(bytes, GridUnsafe.BYTE_ARR_OFF + off);

if ((v & 0x8080808080808080L) != 0L) {
return true;
}
}

for (; off < length; off++) {
if (bytes[off] < 0) {
return true;
}
}

return false;
}

private StringIntrospection() {
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,18 @@ public interface Row extends SchemaAware, BinaryRowEx, InternalTuple, BinaryTupl
* @param binaryRow Binary row.
*/
static Row wrapBinaryRow(SchemaDescriptor schema, BinaryRow binaryRow) {
return new RowImpl(false, schema, BinaryTupleSchema.createRowSchema(schema), binaryRow);
return wrapBinaryRow(schema, BinaryTupleSchema.createRowSchema(schema), binaryRow);
}

/**
* Creates a row from a given {@code BinaryRow}.
*
* @param schema Schema.
* @param rowSchema Row schema.
* @param binaryRow Binary row.
*/
static Row wrapBinaryRow(SchemaDescriptor schema, BinaryTupleSchema rowSchema, BinaryRow binaryRow) {
return new RowImpl(false, schema, rowSchema, binaryRow);
}

/**
Expand All @@ -47,7 +58,18 @@ static Row wrapBinaryRow(SchemaDescriptor schema, BinaryRow binaryRow) {
* @param binaryRow Binary row.
*/
static Row wrapKeyOnlyBinaryRow(SchemaDescriptor schema, BinaryRow binaryRow) {
return new RowImpl(true, schema, BinaryTupleSchema.createKeySchema(schema), binaryRow);
return wrapKeyOnlyBinaryRow(schema, BinaryTupleSchema.createKeySchema(schema), binaryRow);
}

/**
* Creates a row from a given {@code BinaryRow} that only contains the key component.
*
* @param schema Schema.
* @param keySchema Key schema.
* @param binaryRow Binary row.
*/
static Row wrapKeyOnlyBinaryRow(SchemaDescriptor schema, BinaryTupleSchema keySchema, BinaryRow binaryRow) {
return new RowImpl(true, schema, keySchema, binaryRow);
}

/** Short-cut method that reads decimal value with a scale from the schema. */
Expand Down
Loading