diff --git a/plugins/search/indexer/indexer-java/src/cc/search/indexer/Context.java b/plugins/search/indexer/indexer-java/src/cc/search/indexer/Context.java index 55558d61d..6bdaf5ee1 100644 --- a/plugins/search/indexer/indexer-java/src/cc/search/indexer/Context.java +++ b/plugins/search/indexer/indexer-java/src/cc/search/indexer/Context.java @@ -14,6 +14,8 @@ import java.util.Map; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; /** * Analysis context. @@ -60,7 +62,7 @@ public Context(String fileId_, File file_, String fileMimeType_) // Read content from a file stream try (FileInputStream stream = new FileInputStream(file_)) { String fileContent = IOHelper.readFullContent( - IOHelper.getReaderForInput(stream)); + new InputStreamReader(stream, StandardCharsets.UTF_8)); // Get line informations try (Reader reader = new StringReader(fileContent)) { diff --git a/plugins/search/indexer/indexer-java/src/cc/search/indexer/util/IOHelper.java b/plugins/search/indexer/indexer-java/src/cc/search/indexer/util/IOHelper.java index 7b50e7dea..cde1f5405 100644 --- a/plugins/search/indexer/indexer-java/src/cc/search/indexer/util/IOHelper.java +++ b/plugins/search/indexer/indexer-java/src/cc/search/indexer/util/IOHelper.java @@ -57,14 +57,12 @@ public static InputStreamReader getReaderForInput(InputStream input_) throws IOE * @throws IOException */ public static String readFullContent(InputStreamReader reader_) throws IOException { - ByteArrayOutputStream out = new ByteArrayOutputStream(); - - int b = reader_.read(); - while (b != -1) { - out.write(b); - b = reader_.read(); + StringBuilder out = new StringBuilder(); + char[] buffer = new char[4096]; + int read; + while ((read = reader_.read(buffer)) != -1) { + out.append(buffer, 0, read); } - - return out.toString(reader_.getEncoding()); + return out.toString(); } } diff --git a/plugins/search/indexer/src/indexerprocess.cpp b/plugins/search/indexer/src/indexerprocess.cpp index c2064f62d..1559720c3 100644 --- a/plugins/search/indexer/src/indexerprocess.cpp +++ b/plugins/search/indexer/src/indexerprocess.cpp @@ -59,6 +59,7 @@ IndexerProcess::IndexerProcess( std::vector execArguments { "java", JAVAMEMORYAMOUNT, + "-Dfile.encoding=UTF-8", "-classpath", classpath.c_str(), "-Djava.util.logging.config.class=cc.search.common.config.LogConfigurator", "-Djava.util.logging.SimpleFormatter.format=%1$tY-%1$tm-%1$td %1$tT [%4$s] %5$s%6$s%n", diff --git a/plugins/search/service/include/service/serviceprocess.h b/plugins/search/service/include/service/serviceprocess.h index 319788aba..6bf5451b9 100644 --- a/plugins/search/service/include/service/serviceprocess.h +++ b/plugins/search/service/include/service/serviceprocess.h @@ -61,6 +61,7 @@ class ServiceProcess : public SearchServiceIf, public util::PipedProcess std::string classpath = compassRoot_ + "/lib/java/*"; ::execlp("java", "java", "-server", + "-Dfile.encoding=UTF-8", "-classpath", classpath.c_str(), //"-Xdebug", "-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8666", "-Djava.util.logging.config.class=cc.search.common.config.LogConfigurator",