diff --git a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java index 8dcda6517..889b58edc 100644 --- a/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java +++ b/src/main/java/org/apache/commons/csv/ExtendedBufferedReader.java @@ -37,26 +37,30 @@ /** * A special buffered reader which supports sophisticated read access. *
- * In particular the reader supports a look-ahead option, which allows you to see the next char returned by - * {@link #read()}. This reader also tracks how many characters have been read with {@link #getPosition()}. + * In particular the reader supports a look-ahead option, which allows you to see the next char returned by {@link #read()}. This reader also tracks how many + * characters have been read with {@link #getPosition()}. *
*/ final class ExtendedBufferedReader extends UnsynchronizedBufferedReader { /** The last char returned */ private int lastChar = UNDEFINED; + private int lastCharMark = UNDEFINED; /** The count of EOLs (CR/LF/CRLF) seen so far */ private long lineNumber; + private long lineNumberMark; /** The position, which is the number of characters read so far */ private long position; + private long positionMark; /** The number of bytes read so far. */ private long bytesRead; + private long bytesReadMark; /** Encoder for calculating the number of bytes for each character read. */ @@ -70,12 +74,11 @@ final class ExtendedBufferedReader extends UnsynchronizedBufferedReader { } /** - * Constructs a new instance with the specified reader, character set, - * and byte tracking option. Initializes an encoder if byte tracking is enabled - * and a character set is provided. + * Constructs a new instance with the specified reader, character set, and byte tracking option. Initializes an encoder if byte tracking is enabled and a + * character set is provided. * - * @param reader the reader supports a look-ahead option. - * @param charset the character set for encoding, or {@code null} if not applicable. + * @param reader the reader supports a look-ahead option. + * @param charset the character set for encoding, or {@code null} if not applicable. * @param trackBytes {@code true} to enable byte tracking; {@code false} to disable it. */ ExtendedBufferedReader(final Reader reader, final Charset charset, final boolean trackBytes) { @@ -86,8 +89,7 @@ final class ExtendedBufferedReader extends UnsynchronizedBufferedReader { /** * Closes the stream. * - * @throws IOException - * If an I/O error occurs + * @throws IOException If an I/O error occurs */ @Override public void close() throws IOException { @@ -105,26 +107,33 @@ long getBytesRead() { return this.bytesRead; } + private long getEncodedCharLength(final char[] buf, final int offset, final int length) throws CharacterCodingException { + int len = 0; + for (int i = offset; i < length; i++) { + len += getEncodedCharLength(buf[i]); + } + return len; + } + /** - * Gets the byte length of the given character based on the original Unicode - * specification, which defined characters as fixed-width 16-bit entities. + * Gets the byte length of the given character based on the original Unicode specification, which defined characters as fixed-width 16-bit entities. ** The Unicode characters are divided into two main ranges: *
* Increments {@link #lineNumber} and updates {@link #position}. *
@@ -272,5 +281,4 @@ public void reset() throws IOException { bytesRead = bytesReadMark; super.reset(); } - } diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index d9dd4e545..e18eee026 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -648,6 +648,24 @@ void testForEach() throws Exception { } } + @Test + void testGetBytePositionMultiCharacterDelimiter() throws IOException { + final String code = "aa[|]bb\ncc[|]dd\n"; + final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter("[|]").get(); + try (CSVParser parser = CSVParser.builder() + .setReader(new StringReader(code)) + .setFormat(format) + .setCharset(StandardCharsets.UTF_8) + .setTrackBytes(true) + .get()) { + final Iterator