Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

Commit

Permalink
[NSE-955] Support md5/sha1/sha2 functions (#1055)
Browse files Browse the repository at this point in the history
* Initial commit

* Change arrow branch [will revert at last]

* Fix bug

* Support binary type

* Call correct length function for BinaryType input

* Just support binary type in projection

* Support sha1/sha2

* Revert "Change arrow branch [will revert at last]"

This reverts commit 9ae86d1.

* Fix a grep error: Binary file (standard input) matches
  • Loading branch information
PHILO-HE authored Aug 5, 2022
1 parent bccca32 commit 3317844
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ case class ColumnarConditionProjectExec(
// check datatype
originalInputAttributes.toList.foreach(attr => {
try {
ConverterUtils.checkIfTypeSupported(attr.dataType)
ConverterUtils.checkIfTypeSupportedInProjection(attr.dataType)
} catch {
case e : UnsupportedOperationException =>
throw new UnsupportedOperationException(
Expand All @@ -81,7 +81,7 @@ case class ColumnarConditionProjectExec(
// check expr
if (condExpr != null) {
try {
ConverterUtils.checkIfTypeSupported(condExpr.dataType)
ConverterUtils.checkIfTypeSupportedInProjection(condExpr.dataType)
} catch {
case e : UnsupportedOperationException =>
throw new UnsupportedOperationException(
Expand All @@ -92,7 +92,7 @@ case class ColumnarConditionProjectExec(
if (projectList != null) {
for (expr <- projectList) {
try {
ConverterUtils.checkIfTypeSupported(expr.dataType)
ConverterUtils.checkIfTypeSupportedInProjection(expr.dataType)
} catch {
case e : UnsupportedOperationException =>
throw new UnsupportedOperationException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,25 @@ class ColumnarFindInSet(left: Expression, right: Expression, original: Expressio
}
}

class ColumnarSha2(left: Expression, right: Expression) extends Sha2(left, right)
with ColumnarExpression with Logging {

override def supportColumnarCodegen(args: java.lang.Object): Boolean = {
false
}

override def doColumnarCodeGen(args: java.lang.Object): (TreeNode, ArrowType) = {
val (leftNode, _): (TreeNode, ArrowType) =
left.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
val (rightNode, _): (TreeNode, ArrowType) =
right.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
val resultType = new ArrowType.Utf8()
val funcNode = TreeBuilder.makeFunction("sha2",
Lists.newArrayList(leftNode, rightNode), resultType)
(funcNode, resultType)
}
}

object ColumnarBinaryExpression {

def create(left: Expression, right: Expression, original: Expression): Expression =
Expand All @@ -182,6 +201,8 @@ object ColumnarBinaryExpression {
new ColumnarPow(left, right, pow)
case f: FindInSet =>
new ColumnarFindInSet(left, right, f)
case _: Sha2 =>
new ColumnarSha2(left, right)
case other =>
throw new UnsupportedOperationException(s"not currently supported: $other.")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class ColumnarBoundReference(ordinal: Int, dataType: DataType, nullable: Boolean
dataType match {
case at: ArrayType =>
case _ =>
ConverterUtils.checkIfTypeSupported(dataType)
ConverterUtils.checkIfTypeSupportedInProjection(dataType)
}
} catch {
case e: UnsupportedOperationException =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ class ColumnarCast(
def buildCheck(): Unit = {
if (!datatype.isInstanceOf[DecimalType]) {
try {
ConverterUtils.checkIfTypeSupported(datatype)
ConverterUtils.checkIfTypeSupportedInProjection(datatype)
} catch {
case e: UnsupportedOperationException =>
throw new UnsupportedOperationException(s"${datatype} is not supported in ColumnarCast")
Expand Down Expand Up @@ -577,8 +577,15 @@ class ColumnarCast(
throw new UnsupportedOperationException(
s"${child.dataType} is not supported in castTIMESTAMP")
}
} else if (dataType == BinaryType) {
val supported = List(StringType)
if (supported.indexOf(child.dataType) == -1) {
throw new UnsupportedOperationException(s"${child.dataType}" +
s" is not supported in casting to binary.")
}
} else {
throw new UnsupportedOperationException(s"not currently supported: ${dataType}.")
throw new UnsupportedOperationException(s"not currently supported" +
s" data type in cast: ${dataType}.")
}
}

Expand Down Expand Up @@ -788,6 +795,15 @@ class ColumnarCast(
intermediateType)
}
ConverterUtils.convertTimestampToMicro(funcNode, intermediateType)
} else if (dataType == BinaryType) {
val funcNode = child.dataType match {
case _: StringType =>
TreeBuilder.makeFunction("binary_string",
Lists.newArrayList(child_node0), new ArrowType.Binary())
case _ =>
throw new UnsupportedOperationException (s"not currently supported: ${dataType}.")
}
(funcNode, new ArrowType.Binary())
} else {
throw new UnsupportedOperationException(s"not currently supported: ${dataType}.")
}
Expand Down Expand Up @@ -957,7 +973,7 @@ class ColumnarLength(child: Expression) extends Length(child: Expression)
(TreeBuilder.makeFunction("char_length", Lists.newArrayList(child_node),
resultType), resultType)
case BinaryType =>
(TreeBuilder.makeFunction("length", Lists.newArrayList(child_node),
(TreeBuilder.makeFunction("lengthUtf8", Lists.newArrayList(child_node),
resultType), resultType)
case _ =>
throw new RuntimeException("Fix me. Either StringType or BinaryType is allowed!")
Expand Down Expand Up @@ -1003,6 +1019,40 @@ class ColumnarBin(child: Expression) extends Bin(child: Expression)
}
}

class ColumnarMd5(child: Expression) extends Md5(child) with ColumnarExpression
with Logging {

override def supportColumnarCodegen(args: java.lang.Object): Boolean = {
false
}

override def doColumnarCodeGen(args: java.lang.Object): (TreeNode, ArrowType) = {
val (childNode, _): (TreeNode, ArrowType) =
child.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
val resultType = new ArrowType.Utf8()
val funcNode = TreeBuilder.makeFunction("md5",
Lists.newArrayList(childNode), resultType)
(funcNode, resultType)
}
}

class ColumnarSha1(child: Expression) extends Sha1(child) with ColumnarExpression
with Logging {

override def supportColumnarCodegen(args: java.lang.Object): Boolean = {
false
}

override def doColumnarCodeGen(args: java.lang.Object): (TreeNode, ArrowType) = {
val (childNode, _): (TreeNode, ArrowType) =
child.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)
val resultType = new ArrowType.Utf8()
val funcNode = TreeBuilder.makeFunction("sha1",
Lists.newArrayList(childNode), resultType)
(funcNode, resultType)
}
}

object ColumnarUnaryOperator {

def create(child: Expression, original: Expression): Expression = original match {
Expand Down Expand Up @@ -1078,6 +1128,10 @@ object ColumnarUnaryOperator {
new ColumnarHex(child)
case _: Bin =>
new ColumnarBin(child)
case _: Md5 =>
new ColumnarMd5(child)
case _: Sha1 =>
new ColumnarSha1(child)
case other =>
child.dataType match {
case _: DateType => other match {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,14 @@ object ConverterUtils extends Logging {
builder.build.toByteArray
}

// Currently, we enable projection to support BinaryType.
// TODO: support BinaryType in all other operators.
def checkIfTypeSupportedInProjection(dt: DataType): Unit = dt match {
case _: BinaryType =>
case other =>
checkIfTypeSupported(other)
}

def checkIfTypeSupported(dt: DataType): Unit = dt match {
case d: BooleanType =>
case d: ByteType =>
Expand Down
8 changes: 4 additions & 4 deletions native-sql-engine/tools/run_ut.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,21 @@ tests_total=0
module_tested=0
module_should_test=7
while read -r line ; do
num=$(echo "$line" | grep -o -E '[0-9]+')
num=$(echo "$line" | grep -a -o -E '[0-9]+')
tests_total=$((tests_total+num))
done <<<"$(grep "Total number of tests run:" log-file.log)"
done <<<"$(grep -a "Total number of tests run:" log-file.log)"

succeed_total=0
while read -r line ; do
[[ $line =~ [^0-9]*([0-9]+)\, ]]
num=${BASH_REMATCH[1]}
succeed_total=$((succeed_total+num))
let module_tested++
done <<<"$(grep "succeeded" log-file.log)"
done <<<"$(grep -a "succeeded" log-file.log)"
failed_count=$((tests_total-succeed_total))
echo "Tests total: $tests_total, Succeed Total: $succeed_total, Known Fails: $known_fails, Actual Fails: $failed_count."

cat log-file.log | grep "\*** FAILED \***" | grep -v "TESTS FAILED ***" | grep -v "TEST FAILED ***" &> new_failed_list.log
cat log-file.log | grep -a "\*** FAILED \***" | grep -v -a "TESTS FAILED ***" | grep -v -a "TEST FAILED ***" &> new_failed_list.log
comm -1 -3 <(sort failed_ut_list.log) <(sort new_failed_list.log) &> newly_failed_tests.log
comm -2 -3 <(sort failed_ut_list.log) <(sort new_failed_list.log) &> fixed_tests.log
if [ -s newly_failed_tests.log ]
Expand Down

0 comments on commit 3317844

Please sign in to comment.