Skip to content

Commit

Permalink
Compatibility: Solr v9.1.0 & Lucene v9.3.0
Browse files Browse the repository at this point in the history
  • Loading branch information
azagniotov committed Sep 15, 2023
1 parent 1f6e7b5 commit 7fd9886
Show file tree
Hide file tree
Showing 13 changed files with 1,081 additions and 12 deletions.
2 changes: 1 addition & 1 deletion conf/gradle/dependencies.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ dependencies {
// Relying on the Solr directly would ensure that the correct Lucene version
// is pulled down, instead of trying to guess which Lucene version to use.
api "org.apache.solr:solr-core:${solrVersion}"
api "org.apache.lucene:lucene-analysis-kuromoji:${solrVersion}"
api "org.apache.lucene:lucene-analysis-kuromoji:${luceneVersion}"

// See conf/tests.gradle for test related dependencies
}
3 changes: 2 additions & 1 deletion conf/gradle/tests.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ testing {
dependencies {
implementation project()
implementation "commons-io:commons-io:2.8.0"
implementation "org.apache.lucene:lucene-queryparser:${solrVersion}"
implementation "org.apache.lucene:lucene-queryparser:${luceneVersion}"
implementation "org.apache.solr:solr-test-framework:${solrVersion}"
implementation "junit:junit:4.13.2"
}
Expand All @@ -72,6 +72,7 @@ testing {
testTask.configure {
systemProperty "test.solr.allowed.securerandom", "NativePRNG"
systemProperty "solr.log.dir", "/var/solr/logs"
systemProperty "solr.install.dir", "."
shouldRunAfter(test)
}
}
Expand Down
5 changes: 3 additions & 2 deletions gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@ solrLuceneAnalyzerSudachiProjectGroup=io.github.azagniotov
solrLuceneAnalyzerSudachiProjectVersion=1.0.0-SNAPSHOT

sudachiVersion=0.7.3
solrVersion=9.0.0
log4j2Version=2.17.1
solrVersion=9.1.0
luceneVersion=9.3.0
slf4jVersion=1.7.36
log4j2Version=2.17.1

systemProp.file.encoding=UTF-8

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import java.io.File;
import java.nio.file.Files;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
Expand All @@ -38,6 +37,7 @@
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
import org.junit.Test;

public class LuceneQueryIndexTest extends BaseTokenStreamTestCase {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import java.io.IOException;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
import org.junit.Ignore;
import org.junit.Test;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.ja.JapaneseKatakanaStemFilter;
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.AttributeFactory;
import org.junit.Test;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
import io.github.azagniotov.lucene.analysis.ja.sudachi.test.TestUtils;
import java.io.IOException;
import java.util.Collections;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
import org.junit.Test;

public class SudachiBaseFormFilterTest extends BaseTokenStreamTestCase {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
import io.github.azagniotov.lucene.analysis.ja.sudachi.test.TestUtils;
import java.io.IOException;
import java.util.Collections;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
import org.junit.Test;

public class SudachiNormalizedFormFilterTest extends BaseTokenStreamTestCase {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
import io.github.azagniotov.lucene.analysis.ja.sudachi.util.StringResourceLoader;
import java.io.IOException;
import java.util.HashMap;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.ClasspathResourceLoader;
import org.junit.Test;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@

import com.worksap.nlp.sudachi.Config;
import io.github.azagniotov.lucene.analysis.ja.sudachi.test.TestUtils;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
import org.junit.Before;

public class ImportedLuceneJapaneseTokenizerTest extends BaseTokenStreamTestCase {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@
import java.nio.charset.Charset;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.charfilter.MappingCharFilter;
import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
Expand Down
64 changes: 64 additions & 0 deletions src/smokeTest/solr_9.x.x/solr_9_1_0/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# A few useful Docker commands to build an image and run the Solr container.
#
# Build (run with '--no-cache' to ensure that Git repo new tags will be pulled down, as Docker caches RUN layers):
# docker build -t solr-sudachi . --progress=plain --no-cache
#
# Run:
# docker run -p 8983:8983 --rm solr-sudachi:latest solr-precreate example
#
# cURL requests:
# 1. curl -sS 'http://localhost:8983/solr/example/analysis/field?analysis.fieldtype=text_ja' --get --data-urlencode 'analysis.fieldvalue=ちいかわ' | jq '.analysis.field_types.text_ja.index[1][].text'
# 2. curl -sS 'http://localhost:8983/solr/example/analysis/field?analysis.fieldtype=text_ja' --get --data-urlencode 'analysis.fieldvalue=すもももももももものうち' | jq '.analysis.field_types.text_ja.index[1][].text'
# 3. curl -sS 'http://localhost:8983/solr/example/analysis/field?analysis.fieldtype=text_ja' --get --data-urlencode 'analysis.fieldvalue=聖川真斗' | jq '.analysis.field_types.text_ja.index[1][].text'
#
# See: https://github.com/apache/lucene/pull/12517
#

########################################################################################
# Stage 1 : Solr Lucene Analyzer Sudachi JAR
########################################################################################
FROM gradle:8.1.1-jdk11@sha256:681c18e70745546bf66949861d18019b979810ac151a5e0933d4ff83c76b4f5f AS BUILD_JAR_STAGE

ARG PLUGIN_GIT_TAG=9.1.0

ENV GRADLE_USER_HOME=/home/gradle
WORKDIR $GRADLE_USER_HOME

RUN git clone -b v$PLUGIN_GIT_TAG https://github.com/azagniotov/solr-lucene-analyzer-sudachi.git --depth 1 && \
git config --global user.name "Alexander Zagniotov" && \
git config --global user.email "[email protected]"

# Download the dictionary and assemble the JAR to be placed under Solr /opt/solr/server/solr-webapp/webapp/WEB-INF/lib/
RUN cd solr-lucene-analyzer-sudachi && \
gradle configureDictionariesLocally && \
gradle -PsolrVersion=$PLUGIN_GIT_TAG assemble && \
ls -al ./build/libs/


########################################################################################
# Stage 2 : Run Solr
########################################################################################
FROM solr:9.1.0@sha256:2377b0acf16c68b1c223cf685350584debc49dbc5950e0ddff82317be566cb79

MAINTAINER Alexander Zagniotov <[email protected]>

ENV SOLR_JAVA_MEM="-Xms2g -Xmx2g"
ENV SOLR_SERVER_HOME=/opt/solr/server
ENV SUDACHI_DICT_HOME=/tmp/sudachi
ENV SOLR_WEB_INF_LIB_HOME=$SOLR_SERVER_HOME/solr-webapp/webapp/WEB-INF/lib

USER root

# Removing existing Lucene Kuromoji JAR to avoid polluting the classpath with
# Japanese analysis-related classes. Also, Lucene team has renamed the packages
# from analyzers => analysis at some point, thus using a wildcard.
# RUN rm $SOLR_WEB_INF_LIB_HOME/lucene-*-kuromoji-*.jar

COPY --from=BUILD_JAR_STAGE $SUDACHI_DICT_HOME/system-dict/system_core.dic $SUDACHI_DICT_HOME/system-dict/system_core.dic
COPY --from=BUILD_JAR_STAGE $SUDACHI_DICT_HOME/user_lexicon.dic $SUDACHI_DICT_HOME/user_lexicon.dic
COPY --from=BUILD_JAR_STAGE /home/gradle/solr-lucene-analyzer-sudachi/build/libs/solr-lucene-analyzer-sudachi*.jar $SOLR_WEB_INF_LIB_HOME/
COPY schema.xml $SOLR_SERVER_HOME/solr/configsets/_default/conf/managed-schema.xml

RUN chown -R solr:solr /tmp/sudachi

USER solr
Loading

0 comments on commit 7fd9886

Please sign in to comment.