From 2a5394cee70570f722ed063d5db59f2bf7d4835d Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sun, 11 Jul 2021 15:19:30 +0900 Subject: [PATCH 1/5] Dump FreeBSD socket credential data wholely --- ext/socket/ancdata.c | 69 ++++++++++++++++++++++--------------- test/socket/test_ancdata.rb | 18 ++++++++++ 2 files changed, 60 insertions(+), 27 deletions(-) diff --git a/ext/socket/ancdata.c b/ext/socket/ancdata.c index f1e9e425248d3e..0e17d9e87383ce 100644 --- a/ext/socket/ancdata.c +++ b/ext/socket/ancdata.c @@ -711,6 +711,9 @@ anc_inspect_passcred_credentials(int level, int type, VALUE data, VALUE ret) static int anc_inspect_socket_creds(int level, int type, VALUE data, VALUE ret) { + long len; + const char *ptr; + if (level != SOL_SOCKET && type != SCM_CREDS) return 0; @@ -725,48 +728,59 @@ anc_inspect_socket_creds(int level, int type, VALUE data, VALUE ret) * This heuristics works well except when sc_ngroups == CMGROUP_MAX. */ + RSTRING_GETMEM(data, ptr, len); #if defined(HAVE_TYPE_STRUCT_CMSGCRED) /* FreeBSD */ - if (RSTRING_LEN(data) == sizeof(struct cmsgcred)) { + if (len == sizeof(struct cmsgcred)) { struct cmsgcred cred; - memcpy(&cred, RSTRING_PTR(data), sizeof(struct cmsgcred)); + int ngroups; + memcpy(&cred, ptr, sizeof(struct cmsgcred)); rb_str_catf(ret, " pid=%u", cred.cmcred_pid); rb_str_catf(ret, " uid=%u", cred.cmcred_uid); rb_str_catf(ret, " euid=%u", cred.cmcred_euid); rb_str_catf(ret, " gid=%u", cred.cmcred_gid); - if (cred.cmcred_ngroups) { + rb_str_catf(ret, " groups[%d]=[", cred.cmcred_ngroups); + ngroups = cred.cmcred_ngroups; + if (ngroups > 0) { int i; - const char *sep = " groups="; - for (i = 0; i < cred.cmcred_ngroups; i++) { - rb_str_catf(ret, "%s%u", sep, cred.cmcred_groups[i]); - sep = ","; + rb_str_catf(ret, "%u", cred.cmcred_groups[0]); + if (ngroups > CMGROUP_MAX) ngroups = CMGROUP_MAX; + for (i = 1; i < ngroups; i++) { + rb_str_catf(ret, ",%u", cred.cmcred_groups[i]); } } - rb_str_cat2(ret, " (cmsgcred)"); + rb_str_cat2(ret, "] (cmsgcred)"); return 1; } #endif #if defined(HAVE_TYPE_STRUCT_SOCKCRED) /* FreeBSD, NetBSD */ - if ((size_t)RSTRING_LEN(data) >= SOCKCREDSIZE(0)) { - struct sockcred cred0, *cred; - memcpy(&cred0, RSTRING_PTR(data), SOCKCREDSIZE(0)); - if ((size_t)RSTRING_LEN(data) == SOCKCREDSIZE(cred0.sc_ngroups)) { - cred = (struct sockcred *)ALLOCA_N(char, SOCKCREDSIZE(cred0.sc_ngroups)); - memcpy(cred, RSTRING_PTR(data), SOCKCREDSIZE(cred0.sc_ngroups)); - rb_str_catf(ret, " uid=%u", cred->sc_uid); - rb_str_catf(ret, " euid=%u", cred->sc_euid); - rb_str_catf(ret, " gid=%u", cred->sc_gid); - rb_str_catf(ret, " egid=%u", cred->sc_egid); - if (cred0.sc_ngroups) { - int i; - const char *sep = " groups="; - for (i = 0; i < cred0.sc_ngroups; i++) { - rb_str_catf(ret, "%s%u", sep, cred->sc_groups[i]); - sep = ","; - } + if ((size_t)len >= SOCKCREDSIZE(0)) { + struct sockcred cred; + int ngroups; + memcpy(&cred, ptr, SOCKCREDSIZE(0)); + rb_str_catf(ret, " uid=%u", cred.sc_uid); + rb_str_catf(ret, " euid=%u", cred.sc_euid); + rb_str_catf(ret, " gid=%u", cred.sc_gid); + rb_str_catf(ret, " egid=%u", cred.sc_egid); + rb_str_catf(ret, " groups[%d]=[", cred.sc_ngroups); + ngroups = cred.sc_ngroups; + if (ngroups <= 0) { + ngroups = 0; + } + else { + size_t max = ((size_t)len - SOCKCREDSIZE(0)) / sizeof(gid_t); + if ((size_t)ngroups > max) ngroups = (int)max; + } + if (ngroups > 0) { + int i; + const void *gp = ptr + offsetof(struct sockcred, sc_groups); + const gid_t *groups = MEMCPY(ALLOCA_N(gid_t, ngroups), gp, gid_t, ngroups); + rb_str_catf(ret, "%u", groups[0]); + for (i = 1; i < ngroups; i++) { + rb_str_catf(ret, ",%u", groups[i]); } - rb_str_cat2(ret, " (sockcred)"); - return 1; } + rb_str_cat2(ret, "] (sockcred)"); + return 1; } #endif return 0; @@ -1000,6 +1014,7 @@ ancillary_inspect(VALUE self) rb_str_catf(ret, " %"PRIsVALUE, rb_sym2str(vtype)); else rb_str_catf(ret, " cmsg_type:%d", type); + RB_GC_GUARD(vtype); } else { rb_str_catf(ret, " cmsg_level:%d", level); diff --git a/test/socket/test_ancdata.rb b/test/socket/test_ancdata.rb index b2f86a0bb1e076..387be6080ff15c 100644 --- a/test/socket/test_ancdata.rb +++ b/test/socket/test_ancdata.rb @@ -65,4 +65,22 @@ def test_unix_rights } end end + + if /freebsd/ =~ RUBY_PLATFORM + def test_cmsgcred_inspect + cred = [0, 0, 0, 0, 9999, *Array.new(16, 0)].pack('L4I!L*') + s = Socket::AncillaryData.new(:UNIX, :SOCKET, :SCM_CREDS, cred).inspect + assert_include(s, 'groups[9999]') + assert_include(s, '(cmsgcred)') + end + end + + if /netbsd|freebsd/ =~ RUBY_PLATFORM + def test_sockcred_inspect + cred = [0, 0, 0, 0, 9999, 0].pack('L4S!L') + s = Socket::AncillaryData.new(:UNIX, :SOCKET, :SCM_CREDS, cred).inspect + assert_include(s, 'groups[9999]') + assert_include(s, '(sockcred)') + end + end end if defined? Socket::AncillaryData From 24ce2563d771301e3b52c82c97f6a76fc6cfa3d4 Mon Sep 17 00:00:00 2001 From: Scott Myron Date: Fri, 5 Jun 2026 09:04:22 -0500 Subject: [PATCH 2/5] [ruby/json] Reorder the json_frame_type and json_frame_phase enum to simplify the transition from a JSON_PHASE_VALUE to the next phase. https://github.com/ruby/json/commit/887274e642 --- ext/json/parser/parser.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index ca082812e65bd2..b820066c596492 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -345,22 +345,25 @@ static void rvalue_stack_eagerly_release(VALUE handle) #define JSON_FRAME_STACK_INITIAL_CAPA 32 enum json_frame_type { - JSON_FRAME_ROOT, - JSON_FRAME_ARRAY, - JSON_FRAME_OBJECT, + JSON_FRAME_ROOT, // == JSON_PHASE_DONE + JSON_FRAME_ARRAY, // == JSON_PHASE_ARRAY_COMMA + JSON_FRAME_OBJECT, // = JSON_PHASE_OBJECT_COMMA }; // Where a frame is within its container's grammar. This is the entirety of the // parser's "what to do next" state: json_parse_any dispatches on the top // frame's phase and holds no resume state in C locals, so a parse can stop at // any value boundary and be resumed purely from the (persistable) frame stack. +// +// The first three phases are deliberately equal to the corresponding json_frame_type +// to simplify the transition of phase in json_value_completed. enum json_frame_phase { + JSON_PHASE_DONE = JSON_FRAME_ROOT, // root only: the document value has been parsed + JSON_PHASE_ARRAY_COMMA = JSON_FRAME_ARRAY, // after a value: expecting ',' or the closing ']' + JSON_PHASE_OBJECT_COMMA = JSON_FRAME_OBJECT, // after a value: expecting ',' or the closing '}' JSON_PHASE_VALUE, // expecting a value (document root, array element, or object value after ':') - JSON_PHASE_ARRAY_COMMA, // after a value: expecting ',' or the closing ']' JSON_PHASE_OBJECT_KEY, // expecting a '"' key (after '{' or ',') - JSON_PHASE_OBJECT_COMMA, // after a value: expecting ',' or the closing '}' JSON_PHASE_OBJECT_COLON, // object only: after a key, expecting ':' - JSON_PHASE_DONE, // root only: the document value has been parsed }; typedef struct json_frame_struct { @@ -1442,18 +1445,11 @@ static inline long json_frame_entry_count(const json_frame *frame, const rvalue_ // after a container close is the freshly re-exposed parent. static inline void json_value_completed(json_frame *frame) { - switch (frame->type) { - case JSON_FRAME_ROOT: - frame->phase = JSON_PHASE_DONE; - return; - case JSON_FRAME_ARRAY: - frame->phase = JSON_PHASE_ARRAY_COMMA; - return; - case JSON_FRAME_OBJECT: - frame->phase = JSON_PHASE_OBJECT_COMMA; - return; - } - UNREACHABLE; + JSON_ASSERT((int)JSON_PHASE_DONE == (int)JSON_FRAME_ROOT); + JSON_ASSERT((int)JSON_PHASE_ARRAY_COMMA == (int)JSON_FRAME_ARRAY); + JSON_ASSERT((int)JSON_PHASE_OBJECT_COMMA == (int)JSON_FRAME_OBJECT); + + frame->phase = (enum json_frame_phase) frame->type; } // Parse an arbitrary JSON value iteratively. This is a state machine driven From dba570e575ff67096b44b60831a34a7eac79d7c1 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Fri, 5 Jun 2026 18:23:15 +0200 Subject: [PATCH 3/5] [ruby/json] parser.c: Extract json_match_keyword Extracted from: https://github.com/ruby/json/pull/994 Modern compilers shouldn't have problem computing `strlen` at compile time and generating the same code. https://github.com/ruby/json/commit/b07f74bd73 --- ext/json/parser/parser.c | 52 +++++++++++++++++-------------- test/json/json_ext_parser_test.rb | 2 +- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index b820066c596492..a61f997a112db1 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -1425,9 +1425,7 @@ static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_Par static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config) { - const char *start = state->cursor; - state->cursor++; - return json_parse_number(state, config, true, start); + return json_parse_number(state, config, true, state->cursor - 1); } // How many values (array elements, or interleaved object keys+values) have been @@ -1452,6 +1450,22 @@ static inline void json_value_completed(json_frame *frame) frame->phase = (enum json_frame_phase) frame->type; } +static inline bool json_match_keyword(JSON_ParserState *state, const char *keyword, size_t offset) +{ + // It is assumed that since `keyword` is always a literal, the compiler is able to constantize this + // `strlen` and several other computations in that routine, such as eliminating the `if (resumable)` branch. + + size_t len = strlen(keyword); + + // Note: memcmp with a small power of two and a literal string compile to an integer comparison / + // That's why we sometime compare starting from the first byte and sometimes from the second. + if (rest(state) >= len && (memcmp(state->cursor + offset, keyword + offset, len - offset) == 0)) { + state->cursor += len; + return true; + } + return false; +} + // Parse an arbitrary JSON value iteratively. This is a state machine driven // entirely by the top frame's phase so it can stop at any value boundary and // resume purely from the frame stack. A JSON_FRAME_ROOT frame sits at the @@ -1475,8 +1489,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) switch (peek(state)) { case 'n': - if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) { - state->cursor += 4; + if (json_match_keyword(state, "null", 0)) { json_push_value(state, config, Qnil); json_value_completed(frame); break; @@ -1484,8 +1497,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) raise_parse_error("unexpected token %s", state); case 't': - if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) { - state->cursor += 4; + if (json_match_keyword(state, "true", 0)) { json_push_value(state, config, Qtrue); json_value_completed(frame); break; @@ -1493,9 +1505,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) raise_parse_error("unexpected token %s", state); case 'f': - // Note: memcmp with a small power of two compile to an integer comparison - if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) { - state->cursor += 5; + if (json_match_keyword(state, "false", 1)) { json_push_value(state, config, Qfalse); json_value_completed(frame); break; @@ -1504,8 +1514,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) raise_parse_error("unexpected token %s", state); case 'N': // Note: memcmp with a small power of two compile to an integer comparison - if (config->allow_nan && rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) { - state->cursor += 3; + if (config->allow_nan && json_match_keyword(state, "NaN", 1)) { json_push_value(state, config, CNaN); json_value_completed(frame); break; @@ -1513,8 +1522,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) raise_parse_error("unexpected token %s", state); case 'I': - if (config->allow_nan && rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) { - state->cursor += 8; + if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) { json_push_value(state, config, CInfinity); json_value_completed(frame); break; @@ -1522,17 +1530,13 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config) raise_parse_error("unexpected token %s", state); case '-': { - // Note: memcmp with a small power of two compile to an integer comparison - if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) { - if (config->allow_nan) { - state->cursor += 9; - json_push_value(state, config, CMinusInfinity); - json_value_completed(frame); - break; - } else { - raise_parse_error("unexpected token %s", state); - } + state->cursor++; + if (config->allow_nan && json_match_keyword(state, "Infinity", 0)) { + json_push_value(state, config, CMinusInfinity); + json_value_completed(frame); + break; } + json_push_value(state, config, json_parse_negative_number(state, config)); json_value_completed(frame); break; diff --git a/test/json/json_ext_parser_test.rb b/test/json/json_ext_parser_test.rb index e610f642f199a6..d585b8d0dcd4ae 100644 --- a/test/json/json_ext_parser_test.rb +++ b/test/json/json_ext_parser_test.rb @@ -26,7 +26,7 @@ def test_error_messages ex = assert_raise(ParserError) { parse('-Infinity something') } unless RUBY_PLATFORM =~ /java/ - assert_equal "unexpected token '-Infinity' at line 1 column 1", ex.message + assert_equal "invalid number: '-Infinity' at line 1 column 1", ex.message end ex = assert_raise(ParserError) { parse('NaN something') } From a2433947b767b7387bd70c71221df4e22f38985e Mon Sep 17 00:00:00 2001 From: Kevin Menard Date: Fri, 5 Jun 2026 12:39:52 -0400 Subject: [PATCH 4/5] ZJIT: Avoid type checker mismatch when forwarding `LoadField` (GH-17185) `LoadField` has an associated `return_type` field that's set depending on the type of load. When an object's shape transitions its storage strategy (embedded->heap), the return type may change (e.g., `BasicObject` to `CPtr`). If that shape layout transition is sandwiched between two `LoadField` inside the same block, it's possible we will see two different return types when reading from the same offset of the same object. Load-store forwarding will rightfully eliminate the second `LoadField`. However, had it not been removed, we'd have two resulting bit-for-bit equal values with two different associated types. That confuses the type checker and so semantically valid code will panic with a `MismatchedOperandType`. I ran across this while working on GH-16966. I've pulled it out here to make review easier and because the fix doesn't touch any inlining code. We haven't run into this thus far because a same-block shape storage transition emits a `SetIvar`, which has a broad enough effect that load-store forwarding reset its cache. If anything along that chain changes then we could get into this state without inlining. It's a lot to keep track of so I think we should tackle this broadly. I chatted with @tekknolagi about the general problem. A simple or obvious solution did not present itself. This PR does not fix the broader problem; it temporarily patches over the `MismatchedOperandType` by preserving duplicate loads if the return value would be different. This is not ideal and will need to be reverted when a comprehensive solution is implemented. But, it's enough to satisfy the type checker. --- zjit/src/hir.rs | 99 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 94 insertions(+), 5 deletions(-) diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 82e50cdd36c817..805503b82bd2d6 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -4987,14 +4987,28 @@ impl Function { compile_time_heap.insert(key, val); insn_id }, - Insn::LoadField { recv, offset, .. } => { + Insn::LoadField { recv, offset, return_type, .. } => { let key = (self.chase_insn(recv), offset); match compile_time_heap.entry(key) { std::collections::hash_map::Entry::Occupied(entry) => { - // If the value is stored already, we should short circuit. - // However, we need to replace insn_id with its representative in the SSA union. - self.make_equal_to(insn_id, *entry.get()); - continue + let cached_insn = *entry.get(); + + // TODO (nirvdrum 2026-06-04): Remove the return type guard and supporting code when the type checker becomes more accurate. + // If there's an an embedded<=>heap shape storage transition, it's possible for this `LoadField` to have a different return + // type than the cached entry (`CPtr` vs `BasicObject`). While the loaded value would be the same in either case, the + // difference in associated type causes type checking to fail. Consequently, we conservatively retain the duplicate `LoadField`. + // The `optimize_load_store_does_not_alias_loads_with_incompatible_return_types` test checks the problematic case. + let can_forward_cached_insn = match self.find(cached_insn) { + Insn::LoadField { return_type : cached_return_type,.. } => cached_return_type.is_subtype(return_type), + _ => true + }; + + if can_forward_cached_insn { + // If the value is stored already, we should short circuit. + // However, we need to replace insn_id with its representative in the SSA union. + self.make_equal_to(insn_id, cached_insn); + continue + } } std::collections::hash_map::Entry::Vacant(_) => { // If the value has not been accessed, cache a copy to optimize future loads or stores. @@ -9307,6 +9321,81 @@ mod validation_tests { function.seal_entries(); assert_matches_err(function.validate(), ValidationError::DuplicateInstruction(exit, val)); } + + // The heap-fields pointer (`as_heap`, a CPtr) and the first embedded + // instance variable both live at ROBJECT_OFFSET_AS_HEAP_FIELDS == + // ROBJECT_OFFSET_AS_ARY == 0x10 on a Ruby object. They are distinct fields + // with incompatible value types that happen to share a base and an offset. + // Since we could end up with two `LoadField` on different shape types + // (e.g., as the result of inlining), `optimize_load_store` must not satisfy + // one load from another cached load with a different return type. The fault + // surfaces here as the forwarded value flowing into a `Return` with the + // wrong type (`CPtr` rather than `BasicObject`). + #[test] + fn optimize_load_store_does_not_alias_loads_with_incompatible_return_types() { + assert_eq!(ROBJECT_OFFSET_AS_HEAP_FIELDS, ROBJECT_OFFSET_AS_ARY, + "Conflicting field offsets changed, rendering the rest of this test incorrect"); + + let mut function = Function::new(std::ptr::null()); + let entry = function.entry_block; + let recv = function.push_insn(entry, Insn::Const { val: Const::Value(Qnil) }); + function.push_insn(entry, Insn::LoadField { + recv, + id: FieldName::as_heap, + offset: ROBJECT_OFFSET_AS_HEAP_FIELDS as i32, + return_type: types::CPtr, + }); + let ivar = function.push_insn(entry, Insn::LoadField { + recv, + id: FieldName::Id(ID(1)), + offset: ROBJECT_OFFSET_AS_ARY as i32, + return_type: types::BasicObject, + }); + function.push_insn(entry, Insn::Return { val: ivar }); + function.seal_entries(); + + function.infer_types(); + function.optimize_load_store(); + + assert!( + function.validate().is_ok(), + "optimize_load_store aliased two loads with different return types: {:?}", + function.validate(), + ); + } + + #[test] + fn optimize_load_store_does_not_alias_loads_with_compatible_return_types() { + assert_eq!(ROBJECT_OFFSET_AS_HEAP_FIELDS, ROBJECT_OFFSET_AS_ARY, + "Conflicting field offsets changed, rendering the rest of this test incorrect"); + + let mut function = Function::new(std::ptr::null()); + let entry = function.entry_block; + let recv = function.push_insn(entry, Insn::Const { val: Const::Value(Qnil) }); + function.push_insn(entry, Insn::LoadField { + recv, + id: FieldName::as_heap, + offset: ROBJECT_OFFSET_AS_HEAP_FIELDS as i32, + return_type: types::BasicObject, + }); + let ivar = function.push_insn(entry, Insn::LoadField { + recv, + id: FieldName::Id(ID(1)), + offset: ROBJECT_OFFSET_AS_ARY as i32, + return_type: types::Array, + }); + function.push_insn(entry, Insn::Return { val: ivar }); + function.seal_entries(); + + function.infer_types(); + function.optimize_load_store(); + + assert!( + function.validate().is_ok(), + "optimize_load_store failed to alias two loads with different, but compatible, return types: {:?}", + function.validate(), + ); + } } #[cfg(test)] From dd9213c41de9447383708aed32d64211f0697506 Mon Sep 17 00:00:00 2001 From: John Hawthorn Date: Wed, 3 Jun 2026 17:39:07 -0700 Subject: [PATCH 5/5] Convert object_tracing to use weak references Object tracing listens to the NEWOBJ hook to see all objects allocated while it is active. Previously it also enabled a FREEOBJ tracepoint to drop each object's record as the object was freed. However the FREEOBJ tracepoint only fires while tracing is active. An object recorded during tracing can be freed after tracing stops, and that free was missed, leaving a stale record in object_table keyed by a freed object. These dangling keys are unsafe during compaction, which was previously mitigated with rb_gc_pointer_to_heap_p (which I'd like to stop using). These dangling keys could also become incorrectly associated with a new object allocated in its place. Instead we can declare object_table's keys as weak references. The GC then invokes our weak reference callback on every collection, whether or not tracing is running, letting us reap the records of objects about to be freed. This callback runs before the FREEOBJ hook, which makes the FREEOBJ tracepoint unnecessary. --- ext/objspace/object_tracing.c | 92 ++++++++++++++++------------------ internal/gc.h | 5 +- test/objspace/test_objspace.rb | 15 ++++++ 3 files changed, 60 insertions(+), 52 deletions(-) diff --git a/ext/objspace/object_tracing.c b/ext/objspace/object_tracing.c index 1c18bf02eeaf0b..74d793a6e232c1 100644 --- a/ext/objspace/object_tracing.c +++ b/ext/objspace/object_tracing.c @@ -22,7 +22,6 @@ struct traceobj_arg { int running; int keep_remains; VALUE newobj_trace; - VALUE freeobj_trace; st_table *object_table; /* obj (VALUE) -> allocation_info */ st_table *str_table; /* cstr -> refcount */ struct traceobj_arg *prev_traceobj_arg; @@ -96,13 +95,11 @@ newobj_i(VALUE tpval, void *data) st_data_t v; if (st_lookup(arg->object_table, (st_data_t)obj, &v)) { + /* keep_remains kept this slot's entry after its object was freed. The + * allocator has now reused that address, so recycle the dead entry's + * info. A living entry here would mean two live objects at one address. */ info = (struct allocation_info *)v; - if (arg->keep_remains) { - if (info->living) { - /* do nothing. there is possibility to keep living if FREEOBJ events while suppressing tracing */ - } - } - /* reuse info */ + assert(!info->living); delete_unique_str(arg->str_table, info->path); delete_unique_str(arg->str_table, info->class_path); } @@ -121,37 +118,6 @@ newobj_i(VALUE tpval, void *data) st_insert(arg->object_table, (st_data_t)obj, (st_data_t)info); } -static void -freeobj_i(VALUE tpval, void *data) -{ - struct traceobj_arg *arg = (struct traceobj_arg *)data; - rb_trace_arg_t *tparg = rb_tracearg_from_tracepoint(tpval); - st_data_t obj = (st_data_t)rb_tracearg_object(tparg); - st_data_t v; - struct allocation_info *info; - - /* Modifying the st table can cause allocations, which can trigger GC. - * Since freeobj_i is called during GC, it must not trigger another GC. */ - VALUE gc_disabled = rb_gc_disable_no_rest(); - - if (arg->keep_remains) { - if (st_lookup(arg->object_table, obj, &v)) { - info = (struct allocation_info *)v; - info->living = 0; - } - } - else { - if (st_delete(arg->object_table, &obj, &v)) { - info = (struct allocation_info *)v; - delete_unique_str(arg->str_table, info->path); - delete_unique_str(arg->str_table, info->class_path); - ruby_xfree(info); - } - } - - if (gc_disabled == Qfalse) rb_gc_enable(); -} - static int free_keys_i(st_data_t key, st_data_t value, st_data_t data) { @@ -171,7 +137,6 @@ allocation_info_tracer_mark(void *ptr) { struct traceobj_arg *trace_arg = (struct traceobj_arg *)ptr; rb_gc_mark(trace_arg->newobj_trace); - rb_gc_mark(trace_arg->freeobj_trace); } static void @@ -197,15 +162,47 @@ allocation_info_tracer_memsize(const void *ptr) return size; } +static int +allocation_info_tracer_weak_reference_i(st_data_t key, st_data_t value, st_data_t data) +{ + struct traceobj_arg *arg = (struct traceobj_arg *)data; + struct allocation_info *info = (struct allocation_info *)value; + + if (rb_gc_handle_weak_references_alive_p((VALUE)key)) { + return ST_CONTINUE; + } + + /* Object was collected. keep_remains keeps the dead entry for reporting. */ + if (arg->keep_remains) { + info->living = 0; + return ST_CONTINUE; + } + else { + delete_unique_str(arg->str_table, info->path); + delete_unique_str(arg->str_table, info->class_path); + ruby_xfree(info); + return ST_DELETE; + } +} + +static void +allocation_info_tracer_weak_reference(void *ptr) +{ + struct traceobj_arg *arg = (struct traceobj_arg *)ptr; + + st_foreach(arg->object_table, allocation_info_tracer_weak_reference_i, (st_data_t)arg); +} + static int allocation_info_tracer_compact_update_object_table_i(st_data_t key, st_data_t value, st_data_t data) { st_table *table = (st_table *)data; + struct allocation_info *info = (struct allocation_info *)value; - if (!rb_gc_pointer_to_heap_p(key)) { - struct allocation_info *info = (struct allocation_info *)value; - xfree(info); - return ST_DELETE; + /* In keep_remains mode the table keeps entries for freed objects. Their keys + * are dangling, so skip them instead of passing them to rb_gc_location. */ + if (!info->living) { + return ST_CONTINUE; } if (key != rb_gc_location(key)) { @@ -242,6 +239,7 @@ static const rb_data_type_t allocation_info_tracer_type = { allocation_info_tracer_free, /* Never called because global */ allocation_info_tracer_memsize, allocation_info_tracer_compact, + allocation_info_tracer_weak_reference, }, 0, 0, RUBY_TYPED_FREE_IMMEDIATELY }; @@ -260,9 +258,10 @@ get_traceobj_arg(void) tmp_trace_arg->running = 0; tmp_trace_arg->keep_remains = tmp_keep_remains; tmp_trace_arg->newobj_trace = 0; - tmp_trace_arg->freeobj_trace = 0; tmp_trace_arg->object_table = st_init_numtable(); tmp_trace_arg->str_table = st_init_strtable(); + + rb_gc_declare_weak_references(obj); } return tmp_trace_arg; } @@ -284,10 +283,8 @@ trace_object_allocations_start(VALUE self) else { if (arg->newobj_trace == 0) { arg->newobj_trace = rb_tracepoint_new(0, RUBY_INTERNAL_EVENT_NEWOBJ, newobj_i, arg); - arg->freeobj_trace = rb_tracepoint_new(0, RUBY_INTERNAL_EVENT_FREEOBJ, freeobj_i, arg); } rb_tracepoint_enable(arg->newobj_trace); - rb_tracepoint_enable(arg->freeobj_trace); } return Qnil; @@ -315,9 +312,6 @@ trace_object_allocations_stop(VALUE self) if (arg->newobj_trace != 0) { rb_tracepoint_disable(arg->newobj_trace); } - if (arg->freeobj_trace != 0) { - rb_tracepoint_disable(arg->freeobj_trace); - } } return Qnil; diff --git a/internal/gc.h b/internal/gc.h index 8b136e6572022c..e21fb892676f61 100644 --- a/internal/gc.h +++ b/internal/gc.h @@ -210,9 +210,6 @@ size_t rb_gc_heap_id_for_size(size_t size); void rb_gc_mark_and_move(VALUE *ptr); -void rb_gc_declare_weak_references(VALUE obj); -bool rb_gc_handle_weak_references_alive_p(VALUE obj); - void rb_gc_ref_update_table_values_only(st_table *tbl); void rb_gc_initial_stress_set(VALUE flag); @@ -233,6 +230,8 @@ void rb_objspace_reachable_objects_from_root(void (func)(const char *category, V int rb_objspace_internal_object_p(VALUE obj); int rb_objspace_garbage_object_p(VALUE obj); bool rb_gc_pointer_to_heap_p(VALUE obj); +void rb_gc_declare_weak_references(VALUE obj); +bool rb_gc_handle_weak_references_alive_p(VALUE obj); void rb_objspace_each_objects( int (*callback)(void *start, void *end, size_t stride, void *data), diff --git a/test/objspace/test_objspace.rb b/test/objspace/test_objspace.rb index faa22f1424f90a..a9b902ed458d44 100644 --- a/test/objspace/test_objspace.rb +++ b/test/objspace/test_objspace.rb @@ -350,6 +350,21 @@ def test_trace_object_allocations_compaction_freed_pages RUBY end + def test_trace_object_allocations_does_not_reuse_freed_allocation_info + assert_separately(%w(-robjspace), <<~RUBY) + ObjectSpace.trace_object_allocations do + 1_000_000.times.map { Object.new } + end + + GC.start + + objs = 1_000_000.times.map { Object.new } + + leaked = objs.count { |obj| ObjectSpace.allocation_sourcefile(obj) } + assert_equal 0, leaked + RUBY + end + def test_dump_flags # Ensure that the fstring is promoted to old generation 4.times { GC.start }