Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions ext/json/lib/json.rb
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,11 @@
# # warning: detected duplicate keys in JSON object.
# # This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`
#
# When set to `+true+`
# When set to +true+:
# # The last value is used.
# JSON.parse('{"a": 1, "a":2}') => {"a" => 2}
#
# When set to `+false+`, the future default:
# When set to +false+, the future default:
# JSON.parse('{"a": 1, "a":2}') => duplicate key at line 1 column 1 (JSON::ParserError)
#
# ---
Expand Down Expand Up @@ -184,6 +184,20 @@
#
# ---
#
# Option +allow_comments+ (boolean) specifies whether to allow
# JavaScript style comments (either <tt>// comment</tt> or <tt>/* comment */</tt>);
# defaults to +false+.
#
# When not specified, a deprecation warning is emitted if a comment is encountered.
#
# When set to +true+, comments are ignored:
# JSON.parse('/* comment */ {"a": 1, "a":2}') # => {"a" => 2}
#
# When set to +false+, the future default:
# JSON.parse('/* comment */ {"a": 1, "a":2}') # unexpected character: '/' at line 1 column 1 (JSON::ParserError)
#
# ---
#
# Option +allow_control_characters+ (boolean) specifies whether to allow
# unescaped ASCII control characters, such as newlines, in strings;
# defaults to +false+.
Expand Down
84 changes: 53 additions & 31 deletions ext/json/parser/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
#include "../vendor/ryu.h"
#include "../simd/simd.h"

static VALUE mJSON, eNestingError, Encoding_UTF_8;
static VALUE mJSON, eNestingError, eParserError, Encoding_UTF_8;
static VALUE CNaN, CInfinity, CMinusInfinity;

static ID i_new, i_try_convert, i_uminus, i_encode;
static ID i_new, i_try_convert, i_uminus, i_encode, i_at_line, i_at_column;

static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters,
sym_allow_invalid_escape, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load,
sym_allow_duplicate_key;
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_comments,
sym_allow_control_characters, sym_allow_invalid_escape, sym_symbolize_names,
sym_freeze, sym_decimal_class, sym_on_load, sym_allow_duplicate_key;

static int binary_encindex;
static int utf8_encindex;
Expand Down Expand Up @@ -382,7 +382,7 @@ typedef struct json_frame_stack_struct {
json_frame *ptr;
} json_frame_stack;

enum duplicate_key_action {
enum deprecatable_action {
JSON_DEPRECATED = 0,
JSON_IGNORE,
JSON_RAISE,
Expand All @@ -392,7 +392,8 @@ typedef struct JSON_ParserStruct {
VALUE on_load_proc;
VALUE decimal_class;
ID decimal_method_id;
enum duplicate_key_action on_duplicate_key;
enum deprecatable_action on_duplicate_key;
enum deprecatable_action on_comment;
int max_nesting;
bool allow_nan;
bool allow_trailing_comma;
Expand Down Expand Up @@ -590,6 +591,8 @@ static void cursor_position(JSON_ParserState *state, long *line_out, long *colum
*column_out = column;
}

static const unsigned int MAX_DEPRECATIONS = 5;

static void emit_parse_warning(const char *message, JSON_ParserState *state)
{
long line, column;
Expand Down Expand Up @@ -642,9 +645,9 @@ static VALUE build_parse_error_message(const char *format, JSON_ParserState *sta

static VALUE parse_error_new(VALUE message, long line, long column)
{
VALUE exc = rb_exc_new_str(rb_path2class("JSON::ParserError"), message);
rb_ivar_set(exc, rb_intern("@line"), LONG2NUM(line));
rb_ivar_set(exc, rb_intern("@column"), LONG2NUM(column));
VALUE exc = rb_exc_new_str(eParserError, message);
rb_ivar_set(exc, i_at_line, LONG2NUM(line));
rb_ivar_set(exc, i_at_column, LONG2NUM(column));
return exc;
}

Expand Down Expand Up @@ -707,9 +710,14 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const

static const rb_data_type_t JSON_ParserConfig_type;

const char *COMMENT_DEPRECATION_MESSAGE = "Encountered comment in JSON. This will raise an error in json 3.0 unless enabled via `allow_comments: true`";
NOINLINE(static) void
json_eat_comments(JSON_ParserState *state)
json_eat_comments(JSON_ParserState *state, JSON_ParserConfig *config)
{
if (config->on_comment == JSON_RAISE) {
raise_parse_error("unexpected token %s", state);
}

const char *start = state->cursor;
state->cursor++;

Expand Down Expand Up @@ -744,10 +752,15 @@ json_eat_comments(JSON_ParserState *state)
raise_parse_error_at("unexpected token %s", state, start);
break;
}

if (config->on_comment == JSON_DEPRECATED && state->emitted_deprecations < MAX_DEPRECATIONS) {
state->emitted_deprecations++;
emit_parse_warning(COMMENT_DEPRECATION_MESSAGE, state);
}
}

ALWAYS_INLINE(static) void
json_eat_whitespace(JSON_ParserState *state)
json_eat_whitespace(JSON_ParserState *state, JSON_ParserConfig *config)
{
while (true) {
switch (peek(state)) {
Expand Down Expand Up @@ -778,7 +791,7 @@ json_eat_whitespace(JSON_ParserState *state)
state->cursor++;
break;
case '/':
json_eat_comments(state);
json_eat_comments(state, config);
break;

default:
Expand Down Expand Up @@ -1127,9 +1140,9 @@ NOINLINE(static) void json_on_duplicate_key(JSON_ParserState *state, JSON_Parser

case JSON_DEPRECATED:
// Only emit the first few deprecations to avoid spamming.
if (state->emitted_deprecations < 5) {
emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
if (state->emitted_deprecations < MAX_DEPRECATIONS) {
state->emitted_deprecations++;
emit_duplicate_key_warning(state, json_find_duplicated_key(count, pairs));
}
return;

Expand Down Expand Up @@ -1498,7 +1511,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
}

JSON_PHASE_VALUE: {
json_eat_whitespace(state);
json_eat_whitespace(state, config);

VALUE value;
switch (peek(state)) {
Expand Down Expand Up @@ -1559,7 +1572,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)

case '[': {
state->cursor++;
json_eat_whitespace(state);
json_eat_whitespace(state, config);

if (peek(state) == ']') {
state->cursor++;
Expand All @@ -1585,7 +1598,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
const char *object_start_cursor = state->cursor;

state->cursor++;
json_eat_whitespace(state);
json_eat_whitespace(state, config);

if (peek(state) == '}') {
state->cursor++;
Expand Down Expand Up @@ -1632,7 +1645,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
JSON_PHASE_OBJECT_KEY: {
JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);

json_eat_whitespace(state);
json_eat_whitespace(state, config);

if (RB_LIKELY(peek(state) == '"')) {
json_push_value(state, config, json_parse_string(state, config, true));
Expand All @@ -1654,7 +1667,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
JSON_PHASE_OBJECT_COLON: {
JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);

json_eat_whitespace(state);
json_eat_whitespace(state, config);

if (RB_LIKELY(peek(state) == ':')) {
state->cursor++;
Expand All @@ -1675,14 +1688,14 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
JSON_PHASE_ARRAY_COMMA: {
JSON_ASSERT(frame->type == JSON_FRAME_ARRAY);

json_eat_whitespace(state);
json_eat_whitespace(state, config);

const char next_char = peek(state);

if (RB_LIKELY(next_char == ',')) {
state->cursor++;
if (config->allow_trailing_comma) {
json_eat_whitespace(state);
json_eat_whitespace(state, config);
if (peek(state) == ']') {
// Trailing comma: stay in COMMA to close on the next iteration.
goto JSON_PHASE_ARRAY_COMMA;
Expand Down Expand Up @@ -1717,14 +1730,14 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
JSON_PHASE_OBJECT_COMMA: {
JSON_ASSERT(frame->type == JSON_FRAME_OBJECT);

json_eat_whitespace(state);
json_eat_whitespace(state, config);
const char next_char = peek(state);

if (RB_LIKELY(next_char == ',')) {
state->cursor++;

if (config->allow_trailing_comma) {
json_eat_whitespace(state);
json_eat_whitespace(state, config);
if (peek(state) == '}') {
// Trailing comma: stay in COMMA to close on the next iteration.
goto JSON_PHASE_OBJECT_COMMA;
Expand Down Expand Up @@ -1766,9 +1779,9 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
JSON_UNREACHABLE_RETURN(Qundef);
}

static void json_ensure_eof(JSON_ParserState *state)
static void json_ensure_eof(JSON_ParserState *state, JSON_ParserConfig *config)
{
json_eat_whitespace(state);
json_eat_whitespace(state, config);
if (!eos(state)) {
raise_parse_error("unexpected token at end of stream %s", state);
}
Expand Down Expand Up @@ -1825,6 +1838,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); }
else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); }
else if (key == sym_allow_comments) { config->on_comment = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); }
else if (key == sym_allow_invalid_escape) { config->allow_invalid_escape = RTEST(val); }
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
Expand Down Expand Up @@ -1977,7 +1991,7 @@ static VALUE cParser_parse(JSON_ParserConfig *config, VALUE src)
RB_GC_GUARD(value_stack_handle);
RB_GC_GUARD(frame_stack_handle);
RB_GC_GUARD(Vsource);
json_ensure_eof(state);
json_ensure_eof(state, config);

return result;
}
Expand Down Expand Up @@ -2055,30 +2069,36 @@ void Init_parser(void)
mJSON = rb_define_module("JSON");
VALUE mExt = rb_define_module_under(mJSON, "Ext");
VALUE cParserConfig = rb_define_class_under(mExt, "ParserConfig", rb_cObject);

rb_global_variable(&eParserError);
eParserError = rb_path2class("JSON::ParserError");

rb_global_variable(&eNestingError);
eNestingError = rb_path2class("JSON::NestingError");
rb_gc_register_mark_object(eNestingError);

rb_define_alloc_func(cParserConfig, cJSON_parser_s_allocate);
rb_define_method(cParserConfig, "initialize", cParserConfig_initialize, 1);
rb_define_method(cParserConfig, "parse", cParserConfig_parse, 1);

VALUE cParser = rb_define_class_under(mExt, "Parser", rb_cObject);
rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);

rb_global_variable(&CNaN);
CNaN = rb_const_get(mJSON, rb_intern("NaN"));
rb_gc_register_mark_object(CNaN);

rb_global_variable(&CInfinity);
CInfinity = rb_const_get(mJSON, rb_intern("Infinity"));
rb_gc_register_mark_object(CInfinity);

rb_global_variable(&CMinusInfinity);
CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
rb_gc_register_mark_object(CMinusInfinity);

rb_global_variable(&Encoding_UTF_8);
Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));

sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
sym_allow_comments = ID2SYM(rb_intern("allow_comments"));
sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters"));
sym_allow_invalid_escape = ID2SYM(rb_intern("allow_invalid_escape"));
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
Expand All @@ -2091,6 +2111,8 @@ void Init_parser(void)
i_try_convert = rb_intern("try_convert");
i_uminus = rb_intern("-@");
i_encode = rb_intern("encode");
i_at_line = rb_intern("@line");
i_at_column = rb_intern("@column");

binary_encindex = rb_ascii8bit_encindex();
utf8_encindex = rb_utf8_encindex();
Expand Down
40 changes: 23 additions & 17 deletions gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1806,18 +1806,21 @@ os_obj_of(VALUE of)

/*
* call-seq:
* ObjectSpace.each_object([module]) {|obj| ... } -> integer
* ObjectSpace.each_object([module]) -> an_enumerator
* ObjectSpace.each_object {|obj| ... } -> integer
* ObjectSpace.each_object(module) {|obj| ... } -> integer
* ObjectSpace.each_object -> enumerator
* ObjectSpace.each_object(module) -> enumerator
*
* Calls the block once for each living, nonimmediate object in this
* Ruby process. If <i>module</i> is specified, calls the block
* for only those classes or modules that match (or are a subclass of)
* <i>module</i>. Returns the number of objects found. Immediate
* objects (such as <code>Fixnum</code>s, static <code>Symbol</code>s
* <code>true</code>, <code>false</code> and <code>nil</code>) are
* never returned.
* Calls the block once for each living, non-immediate object in this Ruby
* process, and returns the number of objects found.
*
* If no block is given, an enumerator is returned instead.
* If +module+ is given, calls the block only for objects that are an instance
* of +module+ or one of its subclasses.
*
* Immediate objects (such as small integers, static symbols, +true+, +false+,
* and +nil+) are never yielded.
*
* With no block given, returns a new Enumerator.
*
* Job = Class.new
* jobs = [Job.new, Job.new]
Expand All @@ -1828,18 +1831,21 @@ os_obj_of(VALUE of)
*
* #<Job:0x000000011d6cbbf0>
* #<Job:0x000000011d6cbc68>
* Total count: 2
* Total count: 2
*
* Because every live object is visited, this method is mainly useful for
* debugging, profiling, and introspecting a running process.
*
* Due to a current Ractor implementation issue, this method does not yield
* Ractor-unshareable objects when the process is in multi-Ractor mode. Multi-ractor
* mode is enabled when <code>Ractor.new</code> has been called for the first time.
* See https://bugs.ruby-lang.org/issues/19387 for more information.
* Ractor-unshareable objects when the process is in multi-Ractor mode.
* Multi-Ractor mode is enabled when Ractor.new has been called for the first
* time. See https://bugs.ruby-lang.org/issues/19387 for more information.
*
* a = 12345678987654321 # shareable
* b = [].freeze # shareable
* c = {} # not shareable
* b = [].freeze # shareable
* c = {} # not shareable
* ObjectSpace.each_object {|x| x } # yields a, b, and c
* Ractor.new {} # enter multi-Ractor mode
* Ractor.new {} # enter multi-Ractor mode
* ObjectSpace.each_object {|x| x } # does not yield c
*
*/
Expand Down
Loading