diff --git a/ext/json/lib/json.rb b/ext/json/lib/json.rb
index 26d601926f9b8f..f8dc4ccc9ed0dd 100644
--- a/ext/json/lib/json.rb
+++ b/ext/json/lib/json.rb
@@ -145,11 +145,11 @@
# # warning: detected duplicate keys in JSON object.
# # This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`
#
-# When set to `+true+`
+# When set to +true+:
# # The last value is used.
# JSON.parse('{"a": 1, "a":2}') => {"a" => 2}
#
-# When set to `+false+`, the future default:
+# When set to +false+, the future default:
# JSON.parse('{"a": 1, "a":2}') => duplicate key at line 1 column 1 (JSON::ParserError)
#
# ---
@@ -184,6 +184,20 @@
#
# ---
#
+# Option +allow_comments+ (boolean) specifies whether to allow
+# JavaScript style comments (either // comment or /* comment */);
+# defaults to +false+.
+#
+# When not specified, a deprecation warning is emitted if a comment is encountered.
+#
+# When set to +true+, comments are ignored:
+# JSON.parse('/* comment */ {"a": 1, "a":2}') # => {"a" => 2}
+#
+# When set to +false+, the future default:
+# JSON.parse('/* comment */ {"a": 1, "a":2}') # unexpected character: '/' at line 1 column 1 (JSON::ParserError)
+#
+# ---
+#
# Option +allow_control_characters+ (boolean) specifies whether to allow
# unescaped ASCII control characters, such as newlines, in strings;
# defaults to +false+.
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c
index 6b8164c062daca..5559561e26004f 100644
--- a/ext/json/parser/parser.c
+++ b/ext/json/parser/parser.c
@@ -2,14 +2,14 @@
#include "../vendor/ryu.h"
#include "../simd/simd.h"
-static VALUE mJSON, eNestingError, Encoding_UTF_8;
+static VALUE mJSON, eNestingError, eParserError, Encoding_UTF_8;
static VALUE CNaN, CInfinity, CMinusInfinity;
-static ID i_new, i_try_convert, i_uminus, i_encode;
+static ID i_new, i_try_convert, i_uminus, i_encode, i_at_line, i_at_column;
-static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters,
- sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load,
- sym_allow_duplicate_key;
+static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_comments,
+ sym_allow_control_characters, sym_allow_invalid_escape, sym_symbolize_names,
+ sym_freeze, sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
static int binary_encindex;
static int utf8_encindex;
@@ -382,7 +382,7 @@ typedef struct json_frame_stack_struct {
json_frame *ptr;
} json_frame_stack;
-enum duplicate_key_action {
+enum deprecatable_action {
JSON_DEPRECATED = 0,
JSON_IGNORE,
JSON_RAISE,
@@ -392,7 +392,8 @@ typedef struct JSON_ParserStruct {
VALUE on_load_proc;
VALUE decimal_class;
ID decimal_method_id;
- enum duplicate_key_action on_duplicate_key;
+ enum deprecatable_action on_duplicate_key;
+ enum deprecatable_action on_comment;
int max_nesting;
bool allow_nan;
bool allow_trailing_comma;
@@ -590,6 +591,8 @@ static void cursor_position(JSON_ParserState *state, long *line_out, long *colum
*column_out = column;
}
+static const unsigned int MAX_DEPRECATIONS = 5;
+
static void emit_parse_warning(const char *message, JSON_ParserState *state)
{
long line, column;
@@ -642,9 +645,9 @@ static VALUE build_parse_error_message(const char *format, JSON_ParserState *sta
static VALUE parse_error_new(VALUE message, long line, long column)
{
- VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
- rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
- rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
+ VALUE exc = rb_exc_new_str(eParserError, message);
+ rb_ivar_set(exc, i_at_line, LONG2NUM(line));
+ rb_ivar_set(exc, i_at_column, LONG2NUM(column));
return exc;
}
@@ -707,9 +710,14 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const
static const rb_data_type_t JSON_ParserConfig_type;
+const char *COMMENT_DEPRECATION_MESSAGE = "Encountered comment in JSON. This will raise an error in json 3.0 unless enabled via `allow_comments: true`";
NOINLINE(static) void
-json_eat_comments(JSON_ParserState *state)
+json_eat_comments(JSON_ParserState *state, JSON_ParserConfig *config)
{
+ if (config->on_comment == JSON_RAISE) {
+ raise_parse_error("unexpected token %s", state);
+ }
+
const char *start = state->cursor;
state->cursor++;
@@ -744,10 +752,15 @@ json_eat_comments(JSON_ParserState *state)
raise_parse_error_at("unexpected token %s", state, start);
break;
}
+
+ if (config->on_comment == JSON_DEPRECATED && state->emitted_deprecations < MAX_DEPRECATIONS) {
+ state->emitted_deprecations++;
+ emit_parse_warning(COMMENT_DEPRECATION_MESSAGE, state);
+ }
}
ALWAYS_INLINE(static) void
-json_eat_whitespace(JSON_ParserState *state)
+json_eat_whitespace(JSON_ParserState *state, JSON_ParserConfig *config)
{
while (true) {
switch (peek(state)) {
@@ -778,7 +791,7 @@ json_eat_whitespace(JSON_ParserState *state)
state->cursor++;
break;
case '/':
- json_eat_comments(state);
+ json_eat_comments(state, config);
break;
default:
@@ -1127,9 +1140,9 @@ NOINLINE(static) void json_on_duplicate_key(JSON_ParserState *state, JSON_Parser
case JSON_DEPRECATED:
// Only emit the first few deprecations to avoid spamming.
- if (state->emitted_deprecations < 5) {
- emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
+ if (state->emitted_deprecations < MAX_DEPRECATIONS) {
state->emitted_deprecations++;
+ emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
}
return;
@@ -1498,7 +1511,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
}
JSON_PHASE_VALUE: {
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
VALUE value;
switch (peek(state)) {
@@ -1559,7 +1572,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
case '[': {
state->cursor++;
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
if (peek(state) == ']') {
state->cursor++;
@@ -1585,7 +1598,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
const char *object_start_cursor = state->cursor;
state->cursor++;
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
if (peek(state) == '}') {
state->cursor++;
@@ -1632,7 +1645,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
JSON_PHASE_OBJECT_KEY: {
JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
if (RB_LIKELY(peek(state) == '"')) {
json_push_value(state, config, json_parse_string(state, config, true));
@@ -1654,7 +1667,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
JSON_PHASE_OBJECT_COLON: {
JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
if (RB_LIKELY(peek(state) == ':')) {
state->cursor++;
@@ -1675,14 +1688,14 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
JSON_PHASE_ARRAY_COMMA: {
JSON_ASSERT(frame->type == JSON_FRAME_ARRAY);
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
const char next_char = peek(state);
if (RB_LIKELY(next_char == ',')) {
state->cursor++;
if (config->allow_trailing_comma) {
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
if (peek(state) == ']') {
// Trailing comma: stay in COMMA to close on the next iteration.
goto JSON_PHASE_ARRAY_COMMA;
@@ -1717,14 +1730,14 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
JSON_PHASE_OBJECT_COMMA: {
JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
const char next_char = peek(state);
if (RB_LIKELY(next_char == ',')) {
state->cursor++;
if (config->allow_trailing_comma) {
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
if (peek(state) == '}') {
// Trailing comma: stay in COMMA to close on the next iteration.
goto JSON_PHASE_OBJECT_COMMA;
@@ -1766,9 +1779,9 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
JSON_UNREACHABLE_RETURN(Qundef);
}
-static void json_ensure_eof(JSON_ParserState *state)
+static void json_ensure_eof(JSON_ParserState *state, JSON_ParserConfig *config)
{
- json_eat_whitespace(state);
+ json_eat_whitespace(state, config);
if (!eos(state)) {
raise_parse_error("unexpected token at end of stream %s", state);
}
@@ -1825,6 +1838,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
+ else if (key == sym_allow_comments) { config->on_comment = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); }
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
@@ -1977,7 +1991,7 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE src)
RB_GC_GUARD(value_stack_handle);
RB_GC_GUARD(frame_stack_handle);
RB_GC_GUARD(Vsource);
- json_ensure_eof(state);
+ json_ensure_eof(state, config);
return result;
}
@@ -2055,8 +2069,13 @@ void Init_parser(void)
mJSON = rb_define_module("JSON");
VALUE mExt = rb_define_module_under(mJSON, "Ext");
VALUE cParserConfig = rb_define_class_under(mExt, "ParserConfig", rb_cObject);
+
+ rb_global_variable(&eParserError);
+ eParserError = rb_path2class("JSON::ParserError");
+
+ rb_global_variable(&eNestingError);
eNestingError = rb_path2class("JSON::NestingError");
- rb_gc_register_mark_object(eNestingError);
+
rb_define_alloc_func(cParserConfig, cJSON_parser_s_allocate);
rb_define_method(cParserConfig, "initialize", cParserConfig_initialize, 1);
rb_define_method(cParserConfig, "parse", cParserConfig_parse, 1);
@@ -2064,14 +2083,14 @@ void Init_parser(void)
VALUE cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
+ rb_global_variable(&CNaN);
CNaN = rb_const_get(mJSON, rb_intern("NaN"));
- rb_gc_register_mark_object(CNaN);
+ rb_global_variable(&CInfinity);
CInfinity = rb_const_get(mJSON, rb_intern("Infinity"));
- rb_gc_register_mark_object(CInfinity);
+ rb_global_variable(&CMinusInfinity);
CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
- rb_gc_register_mark_object(CMinusInfinity);
rb_global_variable(&Encoding_UTF_8);
Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
@@ -2079,6 +2098,7 @@ void Init_parser(void)
sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
+ sym_allow_comments = ID2SYM(rb_intern("allow_comments"));
sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape"));
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
@@ -2091,6 +2111,8 @@ void Init_parser(void)
i_try_convert = rb_intern("try_convert");
i_uminus = rb_intern("-@");
i_encode = rb_intern("encode");
+ i_at_line = rb_intern("@line");
+ i_at_column = rb_intern("@column");
binary_encindex = rb_ascii8bit_encindex();
utf8_encindex = rb_utf8_encindex();
diff --git a/gc.c b/gc.c
index 0219fa6e78cbee..72c3f2d24720b6 100644
--- a/gc.c
+++ b/gc.c
@@ -1806,18 +1806,21 @@ os_obj_of(VALUE of)
/*
* call-seq:
- * ObjectSpace.each_object([module]) {|obj| ... } -> integer
- * ObjectSpace.each_object([module]) -> an_enumerator
+ * ObjectSpace.each_object {|obj| ... } -> integer
+ * ObjectSpace.each_object(module) {|obj| ... } -> integer
+ * ObjectSpace.each_object -> enumerator
+ * ObjectSpace.each_object(module) -> enumerator
*
- * Calls the block once for each living, nonimmediate object in this
- * Ruby process. If module is specified, calls the block
- * for only those classes or modules that match (or are a subclass of)
- * module. Returns the number of objects found. Immediate
- * objects (such as Fixnums, static Symbols
- * true, false and nil) are
- * never returned.
+ * Calls the block once for each living, non-immediate object in this Ruby
+ * process, and returns the number of objects found.
*
- * If no block is given, an enumerator is returned instead.
+ * If +module+ is given, calls the block only for objects that are an instance
+ * of +module+ or one of its subclasses.
+ *
+ * Immediate objects (such as small integers, static symbols, +true+, +false+,
+ * and +nil+) are never yielded.
+ *
+ * With no block given, returns a new Enumerator.
*
* Job = Class.new
* jobs = [Job.new, Job.new]
@@ -1828,18 +1831,21 @@ os_obj_of(VALUE of)
*
* #
* #
- * Total count: 2
+ * Total count: 2
+ *
+ * Because every live object is visited, this method is mainly useful for
+ * debugging, profiling, and introspecting a running process.
*
* Due to a current Ractor implementation issue, this method does not yield
- * Ractor-unshareable objects when the process is in multi-Ractor mode. Multi-ractor
- * mode is enabled when Ractor.new has been called for the first time.
- * See https://bugs.ruby-lang.org/issues/19387 for more information.
+ * Ractor-unshareable objects when the process is in multi-Ractor mode.
+ * Multi-Ractor mode is enabled when Ractor.new has been called for the first
+ * time. See https://bugs.ruby-lang.org/issues/19387 for more information.
*
* a = 12345678987654321 # shareable
- * b = [].freeze # shareable
- * c = {} # not shareable
+ * b = [].freeze # shareable
+ * c = {} # not shareable
* ObjectSpace.each_object {|x| x } # yields a, b, and c
- * Ractor.new {} # enter multi-Ractor mode
+ * Ractor.new {} # enter multi-Ractor mode
* ObjectSpace.each_object {|x| x } # does not yield c
*
*/
diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb
index 292ca1a6701147..943d932851bdd3 100644
--- a/test/json/json_parser_test.rb
+++ b/test/json/json_parser_test.rb
@@ -489,7 +489,7 @@ def test_parse_comments
JSON
assert_equal(
{ "key1" => "value1", "key2" => "value2", "key3" => "value3" },
- parse(json))
+ parse(json, allow_comments: true))
json = <<~JSON
{
"key1":"value1" /* multi line
@@ -498,7 +498,7 @@ def test_parse_comments
* comment */
}
JSON
- assert_raise(ParserError) { parse(json) }
+ assert_raise(ParserError) { parse(json, allow_comments: true) }
json = <<~JSON
{
"key1":"value1" /* multi line
@@ -506,7 +506,7 @@ def test_parse_comments
/* legal nested multi line comment start sequence */
}
JSON
- assert_equal({ "key1" => "value1" }, parse(json))
+ assert_equal({ "key1" => "value1" }, parse(json, allow_comments: true))
json = <<~JSON
{
"key1":"value1" /* multi line
@@ -515,18 +515,28 @@ def test_parse_comments
and again, throw an Error */
}
JSON
- assert_raise(ParserError) { parse(json) }
+ assert_raise(ParserError) { parse(json, allow_comments: true) }
json = <<~JSON
{
"key1":"value1" /*/*/
}
JSON
- assert_equal({ "key1" => "value1" }, parse(json))
- assert_equal({}, parse('{} /**/'))
- assert_raise(ParserError) { parse('{} /* comment not closed') }
- assert_raise(ParserError) { parse('{} /*/') }
- assert_raise(ParserError) { parse('{} /x wrong comment') }
- assert_raise(ParserError) { parse('{} /') }
+ assert_equal({ "key1" => "value1" }, parse(json, allow_comments: true))
+ assert_equal({}, parse('{} /**/', allow_comments: true))
+ assert_raise(ParserError) { parse('{} /* comment not closed', allow_comments: true) }
+ assert_raise(ParserError) { parse('{} /*/', allow_comments: true) }
+ assert_raise(ParserError) { parse('{} /x wrong comment', allow_comments: true) }
+ assert_raise(ParserError) { parse('{} /', allow_comments: true) }
+ end
+
+ def test_parse_comments_deprecation
+ assert_equal({}, parse('/**/ {}', allow_comments: true))
+ assert_raise(ParserError) { parse('/**/ {}', allow_comments: false) }
+ if RUBY_ENGINE == 'ruby'
+ assert_deprecated_warning(/Encountered comment in JSON/) do
+ parse('/**/ {}')
+ end
+ end
end
def test_nesting