diff --git a/ext/json/lib/json.rb b/ext/json/lib/json.rb index 26d601926f9b8f..f8dc4ccc9ed0dd 100644 --- a/ext/json/lib/json.rb +++ b/ext/json/lib/json.rb @@ -145,11 +145,11 @@ # # warning: detected duplicate keys in JSON object. # # This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true` # -# When set to `+true+` +# When set to +true+: # # The last value is used. # JSON.parse('{"a": 1, "a":2}') => {"a" => 2} # -# When set to `+false+`, the future default: +# When set to +false+, the future default: # JSON.parse('{"a": 1, "a":2}') => duplicate key at line 1 column 1 (JSON::ParserError) # # --- @@ -184,6 +184,20 @@ # # --- # +# Option +allow_comments+ (boolean) specifies whether to allow +# JavaScript style comments (either // comment or /* comment */); +# defaults to +false+. +# +# When not specified, a deprecation warning is emitted if a comment is encountered. +# +# When set to +true+, comments are ignored: +# JSON.parse('/* comment */ {"a": 1, "a":2}') # => {"a" => 2} +# +# When set to +false+, the future default: +# JSON.parse('/* comment */ {"a": 1, "a":2}') # unexpected character: '/' at line 1 column 1 (JSON::ParserError) +# +# --- +# # Option +allow_control_characters+ (boolean) specifies whether to allow # unescaped ASCII control characters, such as newlines, in strings; # defaults to +false+. diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 6b8164c062daca..5559561e26004f 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -2,14 +2,14 @@ #include "../vendor/ryu.h" #include "../simd/simd.h" -static VALUE mJSON, eNestingError, Encoding_UTF_8; +static VALUE mJSON, eNestingError, eParserError, Encoding_UTF_8; static VALUE CNaN, CInfinity, CMinusInfinity; -static ID i_new, i_try_convert, i_uminus, i_encode; +static ID i_new, i_try_convert, i_uminus, i_encode, i_at_line, i_at_column; -static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters, - sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load, - sym_allow_duplicate_key; +static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_comments, + sym_allow_control_characters, sym_allow_invalid_escape, sym_symbolize_names, + sym_freeze, sym_decimal_class, sym_on_load, sym_allow_duplicate_key; static int binary_encindex; static int utf8_encindex; @@ -382,7 +382,7 @@ typedef struct json_frame_stack_struct { json_frame *ptr; } json_frame_stack; -enum duplicate_key_action { +enum deprecatable_action { JSON_DEPRECATED = 0, JSON_IGNORE, JSON_RAISE, @@ -392,7 +392,8 @@ typedef struct JSON_ParserStruct { VALUE on_load_proc; VALUE decimal_class; ID decimal_method_id; - enum duplicate_key_action on_duplicate_key; + enum deprecatable_action on_duplicate_key; + enum deprecatable_action on_comment; int max_nesting; bool allow_nan; bool allow_trailing_comma; @@ -590,6 +591,8 @@ static void cursor_position(JSON_ParserState *state, long *line_out, long *colum *column_out = column; } +static const unsigned int MAX_DEPRECATIONS = 5; + static void emit_parse_warning(const char *message, JSON_ParserState *state) { long line, column; @@ -642,9 +645,9 @@ static VALUE build_parse_error_message(const char *format, JSON_ParserState *sta static VALUE parse_error_new(VALUE message, long line, long column) { - VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message); - rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line)); - rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column)); + VALUE exc = rb_exc_new_str(eParserError, message); + rb_ivar_set(exc, i_at_line, LONG2NUM(line)); + rb_ivar_set(exc, i_at_column, LONG2NUM(column)); return exc; } @@ -707,9 +710,14 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const static const rb_data_type_t JSON_ParserConfig_type; +const char *COMMENT_DEPRECATION_MESSAGE = "Encountered comment in JSON. This will raise an error in json 3.0 unless enabled via `allow_comments: true`"; NOINLINE(static) void -json_eat_comments(JSON_ParserState *state) +json_eat_comments(JSON_ParserState *state, JSON_ParserConfig *config) { + if (config->on_comment == JSON_RAISE) { + raise_parse_error("unexpected token %s", state); + } + const char *start = state->cursor; state->cursor++; @@ -744,10 +752,15 @@ json_eat_comments(JSON_ParserState *state) raise_parse_error_at("unexpected token %s", state, start); break; } + + if (config->on_comment == JSON_DEPRECATED && state->emitted_deprecations < MAX_DEPRECATIONS) { + state->emitted_deprecations++; + emit_parse_warning(COMMENT_DEPRECATION_MESSAGE, state); + } } ALWAYS_INLINE(static) void -json_eat_whitespace(JSON_ParserState *state) +json_eat_whitespace(JSON_ParserState *state, JSON_ParserConfig *config) { while (true) { switch (peek(state)) { @@ -778,7 +791,7 @@ json_eat_whitespace(JSON_ParserState *state) state->cursor++; break; case '/': - json_eat_comments(state); + json_eat_comments(state, config); break; default: @@ -1127,9 +1140,9 @@ NOINLINE(static) void json_on_duplicate_key(JSON_ParserState *state, JSON_Parser case JSON_DEPRECATED: // Only emit the first few deprecations to avoid spamming. - if (state->emitted_deprecations < 5) { - emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs)); + if (state->emitted_deprecations < MAX_DEPRECATIONS) { state->emitted_deprecations++; + emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs)); } return; @@ -1498,7 +1511,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) } JSON_PHASE_VALUE: { - json_eat_whitespace(state); + json_eat_whitespace(state, config); VALUE value; switch (peek(state)) { @@ -1559,7 +1572,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) case '[': { state->cursor++; - json_eat_whitespace(state); + json_eat_whitespace(state, config); if (peek(state) == ']') { state->cursor++; @@ -1585,7 +1598,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) const char *object_start_cursor = state->cursor; state->cursor++; - json_eat_whitespace(state); + json_eat_whitespace(state, config); if (peek(state) == '}') { state->cursor++; @@ -1632,7 +1645,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) JSON_PHASE_OBJECT_KEY: { JSON_ASSERT(frame->type == JSON_FRAME_OBJECT); - json_eat_whitespace(state); + json_eat_whitespace(state, config); if (RB_LIKELY(peek(state) == '"')) { json_push_value(state, config, json_parse_string(state, config, true)); @@ -1654,7 +1667,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) JSON_PHASE_OBJECT_COLON: { JSON_ASSERT(frame->type == JSON_FRAME_OBJECT); - json_eat_whitespace(state); + json_eat_whitespace(state, config); if (RB_LIKELY(peek(state) == ':')) { state->cursor++; @@ -1675,14 +1688,14 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) JSON_PHASE_ARRAY_COMMA: { JSON_ASSERT(frame->type == JSON_FRAME_ARRAY); - json_eat_whitespace(state); + json_eat_whitespace(state, config); const char next_char = peek(state); if (RB_LIKELY(next_char == ',')) { state->cursor++; if (config->allow_trailing_comma) { - json_eat_whitespace(state); + json_eat_whitespace(state, config); if (peek(state) == ']') { // Trailing comma: stay in COMMA to close on the next iteration. goto JSON_PHASE_ARRAY_COMMA; @@ -1717,14 +1730,14 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) JSON_PHASE_OBJECT_COMMA: { JSON_ASSERT(frame->type == JSON_FRAME_OBJECT); - json_eat_whitespace(state); + json_eat_whitespace(state, config); const char next_char = peek(state); if (RB_LIKELY(next_char == ',')) { state->cursor++; if (config->allow_trailing_comma) { - json_eat_whitespace(state); + json_eat_whitespace(state, config); if (peek(state) == '}') { // Trailing comma: stay in COMMA to close on the next iteration. goto JSON_PHASE_OBJECT_COMMA; @@ -1766,9 +1779,9 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) JSON_UNREACHABLE_RETURN(Qundef); } -static void json_ensure_eof(JSON_ParserState *state) +static void json_ensure_eof(JSON_ParserState *state, JSON_ParserConfig *config) { - json_eat_whitespace(state); + json_eat_whitespace(state, config); if (!eos(state)) { raise_parse_error("unexpected token at end of stream %s", state); } @@ -1825,6 +1838,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data) if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); } else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); } + else if (key == sym_allow_comments) { config->on_comment = RTEST(val) ? JSON_IGNORE : JSON_RAISE; } else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); } else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); } else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); } @@ -1977,7 +1991,7 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE src) RB_GC_GUARD(value_stack_handle); RB_GC_GUARD(frame_stack_handle); RB_GC_GUARD(Vsource); - json_ensure_eof(state); + json_ensure_eof(state, config); return result; } @@ -2055,8 +2069,13 @@ void Init_parser(void) mJSON = rb_define_module("JSON"); VALUE mExt = rb_define_module_under(mJSON, "Ext"); VALUE cParserConfig = rb_define_class_under(mExt, "ParserConfig", rb_cObject); + + rb_global_variable(&eParserError); + eParserError = rb_path2class("JSON::ParserError"); + + rb_global_variable(&eNestingError); eNestingError = rb_path2class("JSON::NestingError"); - rb_gc_register_mark_object(eNestingError); + rb_define_alloc_func(cParserConfig, cJSON_parser_s_allocate); rb_define_method(cParserConfig, "initialize", cParserConfig_initialize, 1); rb_define_method(cParserConfig, "parse", cParserConfig_parse, 1); @@ -2064,14 +2083,14 @@ void Init_parser(void) VALUE cParser = rb_define_class_under(mExt, "Parser", rb_cObject); rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2); + rb_global_variable(&CNaN); CNaN = rb_const_get(mJSON, rb_intern("NaN")); - rb_gc_register_mark_object(CNaN); + rb_global_variable(&CInfinity); CInfinity = rb_const_get(mJSON, rb_intern("Infinity")); - rb_gc_register_mark_object(CInfinity); + rb_global_variable(&CMinusInfinity); CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); - rb_gc_register_mark_object(CMinusInfinity); rb_global_variable(&Encoding_UTF_8); Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); @@ -2079,6 +2098,7 @@ void Init_parser(void) sym_max_nesting = ID2SYM(rb_intern("max_nesting")); sym_allow_nan = ID2SYM(rb_intern("allow_nan")); sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); + sym_allow_comments = ID2SYM(rb_intern("allow_comments")); sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters")); sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape")); sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); @@ -2091,6 +2111,8 @@ void Init_parser(void) i_try_convert = rb_intern("try_convert"); i_uminus = rb_intern("-@"); i_encode = rb_intern("encode"); + i_at_line = rb_intern("@line"); + i_at_column = rb_intern("@column"); binary_encindex = rb_ascii8bit_encindex(); utf8_encindex = rb_utf8_encindex(); diff --git a/gc.c b/gc.c index 0219fa6e78cbee..72c3f2d24720b6 100644 --- a/gc.c +++ b/gc.c @@ -1806,18 +1806,21 @@ os_obj_of(VALUE of) /* * call-seq: - * ObjectSpace.each_object([module]) {|obj| ... } -> integer - * ObjectSpace.each_object([module]) -> an_enumerator + * ObjectSpace.each_object {|obj| ... } -> integer + * ObjectSpace.each_object(module) {|obj| ... } -> integer + * ObjectSpace.each_object -> enumerator + * ObjectSpace.each_object(module) -> enumerator * - * Calls the block once for each living, nonimmediate object in this - * Ruby process. If module is specified, calls the block - * for only those classes or modules that match (or are a subclass of) - * module. Returns the number of objects found. Immediate - * objects (such as Fixnums, static Symbols - * true, false and nil) are - * never returned. + * Calls the block once for each living, non-immediate object in this Ruby + * process, and returns the number of objects found. * - * If no block is given, an enumerator is returned instead. + * If +module+ is given, calls the block only for objects that are an instance + * of +module+ or one of its subclasses. + * + * Immediate objects (such as small integers, static symbols, +true+, +false+, + * and +nil+) are never yielded. + * + * With no block given, returns a new Enumerator. * * Job = Class.new * jobs = [Job.new, Job.new] @@ -1828,18 +1831,21 @@ os_obj_of(VALUE of) * * # * # - * Total count: 2 + * Total count: 2 + * + * Because every live object is visited, this method is mainly useful for + * debugging, profiling, and introspecting a running process. * * Due to a current Ractor implementation issue, this method does not yield - * Ractor-unshareable objects when the process is in multi-Ractor mode. Multi-ractor - * mode is enabled when Ractor.new has been called for the first time. - * See https://bugs.ruby-lang.org/issues/19387 for more information. + * Ractor-unshareable objects when the process is in multi-Ractor mode. + * Multi-Ractor mode is enabled when Ractor.new has been called for the first + * time. See https://bugs.ruby-lang.org/issues/19387 for more information. * * a = 12345678987654321 # shareable - * b = [].freeze # shareable - * c = {} # not shareable + * b = [].freeze # shareable + * c = {} # not shareable * ObjectSpace.each_object {|x| x } # yields a, b, and c - * Ractor.new {} # enter multi-Ractor mode + * Ractor.new {} # enter multi-Ractor mode * ObjectSpace.each_object {|x| x } # does not yield c * */ diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index 292ca1a6701147..943d932851bdd3 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -489,7 +489,7 @@ def test_parse_comments JSON assert_equal( { "key1" => "value1", "key2" => "value2", "key3" => "value3" }, - parse(json)) + parse(json, allow_comments: true)) json = <<~JSON { "key1":"value1" /* multi line @@ -498,7 +498,7 @@ def test_parse_comments * comment */ } JSON - assert_raise(ParserError) { parse(json) } + assert_raise(ParserError) { parse(json, allow_comments: true) } json = <<~JSON { "key1":"value1" /* multi line @@ -506,7 +506,7 @@ def test_parse_comments /* legal nested multi line comment start sequence */ } JSON - assert_equal({ "key1" => "value1" }, parse(json)) + assert_equal({ "key1" => "value1" }, parse(json, allow_comments: true)) json = <<~JSON { "key1":"value1" /* multi line @@ -515,18 +515,28 @@ def test_parse_comments and again, throw an Error */ } JSON - assert_raise(ParserError) { parse(json) } + assert_raise(ParserError) { parse(json, allow_comments: true) } json = <<~JSON { "key1":"value1" /*/*/ } JSON - assert_equal({ "key1" => "value1" }, parse(json)) - assert_equal({}, parse('{} /**/')) - assert_raise(ParserError) { parse('{} /* comment not closed') } - assert_raise(ParserError) { parse('{} /*/') } - assert_raise(ParserError) { parse('{} /x wrong comment') } - assert_raise(ParserError) { parse('{} /') } + assert_equal({ "key1" => "value1" }, parse(json, allow_comments: true)) + assert_equal({}, parse('{} /**/', allow_comments: true)) + assert_raise(ParserError) { parse('{} /* comment not closed', allow_comments: true) } + assert_raise(ParserError) { parse('{} /*/', allow_comments: true) } + assert_raise(ParserError) { parse('{} /x wrong comment', allow_comments: true) } + assert_raise(ParserError) { parse('{} /', allow_comments: true) } + end + + def test_parse_comments_deprecation + assert_equal({}, parse('/**/ {}', allow_comments: true)) + assert_raise(ParserError) { parse('/**/ {}', allow_comments: false) } + if RUBY_ENGINE == 'ruby' + assert_deprecated_warning(/Encountered comment in JSON/) do + parse('/**/ {}') + end + end end def test_nesting