From 6633fde16618c5723edd3fd0345e3fcbcab85a1c Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Thu, 19 Mar 2026 11:01:44 -0500 Subject: [PATCH 01/16] Add WASM backend for Ruby API --- lib/prism.rb | 7 +- lib/prism/wasm.rb | 377 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 383 insertions(+), 1 deletion(-) create mode 100644 lib/prism/wasm.rb diff --git a/lib/prism.rb b/lib/prism.rb index 8f0342724a..14c79c4501 100644 --- a/lib/prism.rb +++ b/lib/prism.rb @@ -141,5 +141,10 @@ def self.find(callable, rubyvm: !!defined?(RubyVM)) # The FFI backend is used on other Ruby implementations. Prism::BACKEND = :FFI - require_relative "prism/ffi" + begin + require_relative "prism/ffi" + rescue LoadError + raise $! unless RUBY_ENGINE == "jruby" + require_relative "prism/wasm" + end end diff --git a/lib/prism/wasm.rb b/lib/prism/wasm.rb new file mode 100644 index 0000000000..e57c6bb375 --- /dev/null +++ b/lib/prism/wasm.rb @@ -0,0 +1,377 @@ +# frozen_string_literal: true +# :markup: markdown +# typed: ignore + +# This file is responsible for mirroring the API provided by the C extension by +# using FFI to call into the shared library. + +require "rbconfig" +require "ffi" + +# We want to eagerly load this file if there are Ractors so that it does not get +# autoloaded from within a non-main Ractor. +require "prism/serialize" if defined?(Ractor) + +# Load the prism-parser-wasm jar +require 'jar-dependencies' +require_jar('org.ruby-lang', 'prism-parser-wasm', '0.0.1-SNAPSHOT') +require_jar('com.dylibso.chicory', 'runtime', '1.6.1') +require_jar('com.dylibso.chicory', 'wasi', '1.6.1') +require_jar('com.dylibso.chicory', 'wasm', '1.6.1') +require_jar('com.dylibso.chicory', 'log', '1.6.1') + +module Prism # :nodoc: + module WASM + java_import org.ruby_lang.prism.wasm.Prism + + # TODO: concurrency + PRISM = org.ruby_lang.prism.wasm.Prism.new + end + private_constant :WASM + + # The version constant is set by reading the result of calling pm_version. + VERSION = WASM::PRISM.version + + class << self + # Mirror the Prism.dump API by using the serialization API. + def dump(source, **options) + parsed = WASM::PRISM.parse(source.to_java_bytes, dump_options(options).to_java_bytes) + String.from_java_bytes(parsed) + end + + # Mirror the Prism.dump_file API by using the serialization API. + def dump_file(filepath, **options) + dump_file(File.read(filepath), filepath: filepath, **options) + end + + # Mirror the Prism.lex API by using the serialization API. + def lex(source, **options) + lexed = WASM::PRISM.lex(source.to_java_bytes, dump_options(options).to_java_bytes) + Serialize.load_lex(source, lexed, options.fetch(:freeze, false)) + end + + # Mirror the Prism.lex_file API by using the serialization API. + def lex_file(filepath, **options) + lex_file(File.read(filepath), filepath: filepath, **options) + end + + # Mirror the Prism.parse API by using the serialization API. + def parse(source, **options) + serialized = dump(source, **options) + Serialize.load_parse(source, serialized, options.fetch(:freeze, false)) + end + + # Mirror the Prism.parse_file API by using the serialization API. This uses + # native strings instead of Ruby strings because it allows us to use mmap + # when it is available. + def parse_file(filepath, **options) + parse(File.read(filepath), filepath: filepath, **options) + end + + # Mirror the Prism.parse_stream API by using the serialization API. + def parse_stream(stream, **options) + LibRubyParser::PrismBuffer.with do |buffer| + source = +"" + callback = -> (string, size, _) { + raise "Expected size to be >= 0, got: #{size}" if size <= 0 + + if !(line = stream.gets(size - 1)).nil? + source << line + string.write_string("#{line}\x00", line.bytesize + 1) + end + } + + eof_callback = -> (_) { stream.eof? } + + # In the pm_serialize_parse_stream function it accepts a pointer to the + # IO object as a void* and then passes it through to the callback as the + # third argument, but it never touches it itself. As such, since we have + # access to the IO object already through the closure of the lambda, we + # can pass a null pointer here and not worry. + LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, eof_callback, dump_options(options)) + Prism.load(source, buffer.read, options.fetch(:freeze, false)) + end + end + + # Mirror the Prism.parse_comments API by using the serialization API. + def parse_comments(code, **options) + LibRubyParser::PrismString.with_string(code) { |string| parse_comments_common(string, code, options) } + end + + # Mirror the Prism.parse_file_comments API by using the serialization + # API. This uses native strings instead of Ruby strings because it allows us + # to use mmap when it is available. + def parse_file_comments(filepath, **options) + options[:filepath] = filepath + LibRubyParser::PrismString.with_file(filepath) { |string| parse_comments_common(string, string.read, options) } + end + + # Mirror the Prism.parse_lex API by using the serialization API. + def parse_lex(code, **options) + LibRubyParser::PrismString.with_string(code) { |string| parse_lex_common(string, code, options) } + end + + # Mirror the Prism.parse_lex_file API by using the serialization API. + def parse_lex_file(filepath, **options) + options[:filepath] = filepath + LibRubyParser::PrismString.with_file(filepath) { |string| parse_lex_common(string, string.read, options) } + end + + # Mirror the Prism.parse_success? API by using the serialization API. + def parse_success?(code, **options) + LibRubyParser::PrismString.with_string(code) { |string| parse_file_success_common(string, options) } + end + + # Mirror the Prism.parse_failure? API by using the serialization API. + def parse_failure?(code, **options) + !parse_success?(code, **options) + end + + # Mirror the Prism.parse_file_success? API by using the serialization API. + def parse_file_success?(filepath, **options) + options[:filepath] = filepath + LibRubyParser::PrismString.with_file(filepath) { |string| parse_file_success_common(string, options) } + end + + # Mirror the Prism.parse_file_failure? API by using the serialization API. + def parse_file_failure?(filepath, **options) + !parse_file_success?(filepath, **options) + end + + # Mirror the Prism.profile API by using the serialization API. + def profile(source, **options) + LibRubyParser::PrismString.with_string(source) do |string| + LibRubyParser::PrismBuffer.with do |buffer| + LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) + nil + end + end + end + + # Mirror the Prism.profile_file API by using the serialization API. + def profile_file(filepath, **options) + LibRubyParser::PrismString.with_file(filepath) do |string| + LibRubyParser::PrismBuffer.with do |buffer| + options[:filepath] = filepath + LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) + nil + end + end + end + + private + + def lex_common(string, code, options) # :nodoc: + LibRubyParser::PrismBuffer.with do |buffer| + LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) + Serialize.load_lex(code, buffer.read, options.fetch(:freeze, false)) + end + end + + def parse_common(string, code, options) # :nodoc: + serialized = dump_common(string, options) + Serialize.load_parse(code, serialized, options.fetch(:freeze, false)) + end + + def parse_comments_common(string, code, options) # :nodoc: + LibRubyParser::PrismBuffer.with do |buffer| + LibRubyParser.pm_serialize_parse_comments(buffer.pointer, string.pointer, string.length, dump_options(options)) + Serialize.load_parse_comments(code, buffer.read, options.fetch(:freeze, false)) + end + end + + def parse_lex_common(string, code, options) # :nodoc: + LibRubyParser::PrismBuffer.with do |buffer| + LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) + Serialize.load_parse_lex(code, buffer.read, options.fetch(:freeze, false)) + end + end + + def parse_file_success_common(string, options) # :nodoc: + LibRubyParser.pm_parse_success_p(string.pointer, string.length, dump_options(options)) + end + + # Return the value that should be dumped for the command_line option. + def dump_options_command_line(options) + command_line = options.fetch(:command_line, "") + raise ArgumentError, "command_line must be a string" unless command_line.is_a?(String) + + command_line.each_char.inject(0) do |value, char| + case char + when "a" then value | 0b000001 + when "e" then value | 0b000010 + when "l" then value | 0b000100 + when "n" then value | 0b001000 + when "p" then value | 0b010000 + when "x" then value | 0b100000 + else raise ArgumentError, "invalid command_line option: #{char}" + end + end + end + + # Return the value that should be dumped for the version option. + def dump_options_version(version) + case version + when "current" + version_string_to_number(RUBY_VERSION) || raise(CurrentVersionError, RUBY_VERSION) + when "latest", nil + 0 # Handled in pm_parser_init + when "nearest" + dump = version_string_to_number(RUBY_VERSION) + return dump if dump + if RUBY_VERSION < "3.3" + version_string_to_number("3.3") + else + 0 # Handled in pm_parser_init + end + else + version_string_to_number(version) || raise(ArgumentError, "invalid version: #{version}") + end + end + + # Converts a version string like "4.0.0" or "4.0" into a number. + # Returns nil if the version is unknown. + def version_string_to_number(version) + case version + when /\A3\.3(\.\d+)?\z/ + 1 + when /\A3\.4(\.\d+)?\z/ + 2 + when /\A3\.5(\.\d+)?\z/, /\A4\.0(\.\d+)?\z/ + 3 + when /\A4\.1(\.\d+)?\z/ + 4 + end + end + + # Convert the given options into a serialized options string. + def dump_options(options) + template = +"" + values = [] + + template << "L" + if (filepath = options[:filepath]) + values.push(filepath.bytesize, filepath.b) + template << "A*" + else + values << 0 + end + + template << "l" + values << options.fetch(:line, 1) + + template << "L" + if (encoding = options[:encoding]) + name = encoding.is_a?(Encoding) ? encoding.name : encoding + values.push(name.bytesize, name.b) + template << "A*" + else + values << 0 + end + + template << "C" + values << (options.fetch(:frozen_string_literal, false) ? 1 : 0) + + template << "C" + values << dump_options_command_line(options) + + template << "C" + values << dump_options_version(options[:version]) + + template << "C" + values << (options[:encoding] == false ? 1 : 0) + + template << "C" + values << (options.fetch(:main_script, false) ? 1 : 0) + + template << "C" + values << (options.fetch(:partial_script, false) ? 1 : 0) + + template << "C" + values << (options.fetch(:freeze, false) ? 1 : 0) + + template << "L" + if (scopes = options[:scopes]) + values << scopes.length + + scopes.each do |scope| + locals = nil + forwarding = 0 + + case scope + when Array + locals = scope + when Scope + locals = scope.locals + + scope.forwarding.each do |forward| + case forward + when :* then forwarding |= 0x1 + when :** then forwarding |= 0x2 + when :& then forwarding |= 0x4 + when :"..." then forwarding |= 0x8 + else raise ArgumentError, "invalid forwarding value: #{forward}" + end + end + else + raise TypeError, "wrong argument type #{scope.class.inspect} (expected Array or Prism::Scope)" + end + + template << "L" + values << locals.length + + template << "C" + values << forwarding + + locals.each do |local| + name = local.name + template << "L" + values << name.bytesize + + template << "A*" + values << name.b + end + end + else + values << 0 + end + + values.pack(template) + end + end + + # Here we are going to patch StringQuery to put in the class-level methods so + # that it can maintain a consistent interface + class StringQuery # :nodoc: + class << self + # Mirrors the C extension's StringQuery::local? method. + def local?(string) + query(LibRubyParser.pm_string_query_local(string, string.bytesize, string.encoding.name)) + end + + # Mirrors the C extension's StringQuery::constant? method. + def constant?(string) + query(LibRubyParser.pm_string_query_constant(string, string.bytesize, string.encoding.name)) + end + + # Mirrors the C extension's StringQuery::method_name? method. + def method_name?(string) + query(LibRubyParser.pm_string_query_method_name(string, string.bytesize, string.encoding.name)) + end + + private + + # Parse the enum result and return an appropriate boolean. + def query(result) + case result + when :PM_STRING_QUERY_ERROR + raise ArgumentError, "Invalid or non ascii-compatible encoding" + when :PM_STRING_QUERY_FALSE + false + when :PM_STRING_QUERY_TRUE + true + end + end + end + end +end From a1dab667cc9b76fc2d52fb1d08eeabe8a46f2f90 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Fri, 20 Mar 2026 13:00:12 -0500 Subject: [PATCH 02/16] Separate common parts of FFI for WASM use --- lib/prism.rb | 7 +- lib/prism/ffi.rb | 502 +++--------------------------------- lib/prism/ffi/common.rb | 229 ++++++++++++++++ lib/prism/ffi/native_ffi.rb | 325 +++++++++++++++++++++++ lib/prism/ffi/wasm_ffi.rb | 104 ++++++++ lib/prism/wasm.rb | 377 --------------------------- 6 files changed, 694 insertions(+), 850 deletions(-) create mode 100644 lib/prism/ffi/common.rb create mode 100644 lib/prism/ffi/native_ffi.rb create mode 100644 lib/prism/ffi/wasm_ffi.rb delete mode 100644 lib/prism/wasm.rb diff --git a/lib/prism.rb b/lib/prism.rb index 14c79c4501..8f0342724a 100644 --- a/lib/prism.rb +++ b/lib/prism.rb @@ -141,10 +141,5 @@ def self.find(callable, rubyvm: !!defined?(RubyVM)) # The FFI backend is used on other Ruby implementations. Prism::BACKEND = :FFI - begin - require_relative "prism/ffi" - rescue LoadError - raise $! unless RUBY_ENGINE == "jruby" - require_relative "prism/wasm" - end + require_relative "prism/ffi" end diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 6b9bde51ea..fb5ca055b7 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -2,285 +2,49 @@ # :markup: markdown # typed: ignore -# This file is responsible for mirroring the API provided by the C extension by -# using FFI to call into the shared library. - -require "rbconfig" -require "ffi" - -# We want to eagerly load this file if there are Ractors so that it does not get -# autoloaded from within a non-main Ractor. -require "prism/serialize" if defined?(Ractor) - -module Prism # :nodoc: - module LibRubyParser # :nodoc: - extend FFI::Library - - # Define the library that we will be pulling functions from. Note that this - # must align with the build shared library from make/rake. - libprism_in_build = File.expand_path("../../build/libprism.#{RbConfig::CONFIG["SOEXT"]}", __dir__) - libprism_in_libdir = "#{RbConfig::CONFIG["libdir"]}/prism/libprism.#{RbConfig::CONFIG["SOEXT"]}" - - if File.exist?(libprism_in_build) - INCLUDE_DIR = File.expand_path("../../include", __dir__) - ffi_lib libprism_in_build - else - INCLUDE_DIR = "#{RbConfig::CONFIG["libdir"]}/prism/include" - ffi_lib libprism_in_libdir - end - - # Convert a native C type declaration into a symbol that FFI understands. - # For example: - # - # const char * -> :pointer - # bool -> :bool - # size_t -> :size_t - # void -> :void - # - def self.resolve_type(type, callbacks) - type = type.strip - - if !type.end_with?("*") - type.delete_prefix("const ").to_sym - else - type = type.delete_suffix("*").rstrip - callbacks.include?(type.to_sym) ? type.to_sym : :pointer - end - end - - # Read through the given header file and find the declaration of each of the - # given functions. For each one, define a function with the same name and - # signature as the C function. - def self.load_exported_functions_from(header, *functions, callbacks) - File.foreach("#{INCLUDE_DIR}/#{header}") do |line| - # We only want to attempt to load exported functions. - next unless line.start_with?("PRISM_EXPORTED_FUNCTION ") - - # We only want to load the functions that we are interested in. - next unless functions.any? { |function| line.include?(function) } - - # Strip trailing attributes (PRISM_NODISCARD, PRISM_NONNULL(...), etc.) - line = line.sub(/\)(\s+PRISM_\w+(?:\([^)]*\))?)+\s*;/, ");") - - # Parse the function declaration. - unless /^PRISM_EXPORTED_FUNCTION (?.+) (?\w+)\((?.+)\);$/ =~ line - raise "Could not parse #{line}" - end - - # Delete the function from the list of functions we are looking for to - # mark it as having been found. - functions.delete(name) - - # Split up the argument types into an array, ensure we handle the case - # where there are no arguments (by explicit void). - arg_types = arg_types.split(",").map(&:strip) - arg_types = [] if arg_types == %w[void] - - # Resolve the type of the argument by dropping the name of the argument - # first if it is present. - arg_types.map! { |type| resolve_type(type.sub(/\w+$/, ""), callbacks) } - - # Attach the function using the FFI library. - attach_function name, arg_types, resolve_type(return_type, []) - end - - # If we didn't find all of the functions, raise an error. - raise "Could not find functions #{functions.inspect}" unless functions.empty? - end - - callback :pm_source_stream_fgets_t, [:pointer, :int, :pointer], :pointer - callback :pm_source_stream_feof_t, [:pointer], :int - pm_source_init_result_values = %i[PM_SOURCE_INIT_SUCCESS PM_SOURCE_INIT_ERROR_GENERIC PM_SOURCE_INIT_ERROR_DIRECTORY PM_SOURCE_INIT_ERROR_NON_REGULAR] - enum :pm_source_init_result_t, pm_source_init_result_values - enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE] - - # Ractor-safe lookup table for pm_source_init_result_t, since FFI's - # enum_type accesses module instance variables that are not shareable. - SOURCE_INIT_RESULT = pm_source_init_result_values.freeze - - load_exported_functions_from( - "prism/version.h", - "pm_version", - [] - ) - - load_exported_functions_from( - "prism/serialize.h", - "pm_serialize_parse", - "pm_serialize_parse_stream", - "pm_serialize_parse_comments", - "pm_serialize_lex", - "pm_serialize_parse_lex", - "pm_serialize_parse_success_p", - [] - ) - - load_exported_functions_from( - "prism/string_query.h", - "pm_string_query_local", - "pm_string_query_constant", - "pm_string_query_method_name", - [] - ) - - load_exported_functions_from( - "prism/buffer.h", - "pm_buffer_new", - "pm_buffer_value", - "pm_buffer_length", - "pm_buffer_free", - [] - ) - - load_exported_functions_from( - "prism/source.h", - "pm_source_file_new", - "pm_source_mapped_new", - "pm_source_stream_new", - "pm_source_free", - "pm_source_source", - "pm_source_length", - [:pm_source_stream_fgets_t, :pm_source_stream_feof_t] - ) - - # This object represents a pm_buffer_t. We only use it as an opaque pointer, - # so it doesn't need to know the fields of pm_buffer_t. - class PrismBuffer # :nodoc: - attr_reader :pointer - - def initialize(pointer) - @pointer = pointer - end - - def value - LibRubyParser.pm_buffer_value(pointer) - end - - def length - LibRubyParser.pm_buffer_length(pointer) - end - - def read - value.read_string(length) - end - - # Initialize a new buffer and yield it to the block. The buffer will be - # automatically freed when the block returns. - def self.with - buffer = LibRubyParser.pm_buffer_new - raise unless buffer - - begin - yield new(buffer) - ensure - LibRubyParser.pm_buffer_free(buffer) - end - end - end - - # This object represents source code to be parsed. For strings it wraps a - # pointer directly; for files it uses a pm_source_t under the hood. - class PrismSource # :nodoc: - PLATFORM_EXPECTS_UTF8 = - RbConfig::CONFIG["host_os"].match?(/bccwin|cygwin|djgpp|mingw|mswin|wince|darwin/i) - - attr_reader :pointer, :length - - def initialize(pointer, length, from_string) - @pointer = pointer - @length = length - @from_string = from_string - end - - def read - raise "should use the original String instead" if @from_string - @pointer.read_string(@length) - end - - # Yields a PrismSource backed by the given string to the block. - def self.with_string(string) - raise TypeError unless string.is_a?(String) - - length = string.bytesize - # + 1 to never get an address of 0, which pm_parser_init() asserts - FFI::MemoryPointer.new(:char, length + 1, false) do |pointer| - pointer.write_string(string) - # since we have the extra byte we might as well \0-terminate - pointer.put_char(length, 0) - return yield new(pointer, length, true) - end - end - - # Yields a PrismSource to the given block, backed by a pm_source_t. - def self.with_file(filepath) - raise TypeError unless filepath.is_a?(String) - - # On Windows and Mac, it's expected that filepaths will be encoded in - # UTF-8. If they are not, we need to convert them to UTF-8 before - # passing them into pm_source_mapped_new. - if PLATFORM_EXPECTS_UTF8 && (encoding = filepath.encoding) != Encoding::ASCII_8BIT && encoding != Encoding::UTF_8 - filepath = filepath.encode(Encoding::UTF_8) - end - - FFI::MemoryPointer.new(:int) do |result_ptr| - pm_source = LibRubyParser.pm_source_mapped_new(filepath, 0, result_ptr) - - case SOURCE_INIT_RESULT[result_ptr.read_int] - when :PM_SOURCE_INIT_SUCCESS - pointer = LibRubyParser.pm_source_source(pm_source) - length = LibRubyParser.pm_source_length(pm_source) - return yield new(pointer, length, false) - when :PM_SOURCE_INIT_ERROR_GENERIC - raise SystemCallError.new(filepath, FFI.errno) - when :PM_SOURCE_INIT_ERROR_DIRECTORY - raise Errno::EISDIR.new(filepath) - when :PM_SOURCE_INIT_ERROR_NON_REGULAR - # Fall back to reading the file through Ruby IO for non-regular - # files (pipes, character devices, etc.) - return with_string(File.read(filepath)) { |string| yield string } - else - raise "Unknown error initializing pm_source_t: #{result_ptr.read_int}" - end - ensure - LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null? - end - end - end +require_relative "ffi/common" + +begin + require_relative "ffi/native_ffi.rb" +rescue LoadError + if RUBY_ENGINE == "jruby" + require_relative "ffi/wasm_ffi.rb" + else + raise end +end - # Mark the LibRubyParser module as private as it should only be called through - # the prism module. - private_constant :LibRubyParser +module Prism # :nodoc: # The version constant is set by reading the result of calling pm_version. - VERSION = LibRubyParser.pm_version.read_string.freeze + VERSION = FFICommon.version class << self # Mirror the Prism.dump API by using the serialization API. def dump(source, **options) - LibRubyParser::PrismSource.with_string(source) { |string| dump_common(string, options) } + FFICommon.with_string(source) { |string| FFICommon.dump(string, options) } end # Mirror the Prism.dump_file API by using the serialization API. def dump_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismSource.with_file(filepath) { |string| dump_common(string, options) } + FFICommon.with_file(filepath) { |string| FFICommon.dump(string, options) } end # Mirror the Prism.lex API by using the serialization API. def lex(code, **options) - LibRubyParser::PrismSource.with_string(code) { |string| lex_common(string, code, options) } + FFICommon.with_string(code) { |string| FFICommon.lex(string, code, options) } end # Mirror the Prism.lex_file API by using the serialization API. def lex_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismSource.with_file(filepath) { |string| lex_common(string, string.read, options) } + FFICommon.with_file(filepath) { |string| FFICommon.lex(string, string.read, options) } end # Mirror the Prism.parse API by using the serialization API. def parse(code, **options) - LibRubyParser::PrismSource.with_string(code) { |string| parse_common(string, code, options) } + FFICommon.with_string(code) { |string| FFICommon.parse(string, code, options) } end # Mirror the Prism.parse_file API by using the serialization API. This uses @@ -288,12 +52,12 @@ def parse(code, **options) # when it is available. def parse_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismSource.with_file(filepath) { |string| parse_common(string, string.read, options) } + FFICommon.with_file(filepath) { |string| FFICommon.parse(string, string.read, options) } end # Mirror the Prism.parse_stream API by using the serialization API. def parse_stream(stream, **options) - LibRubyParser::PrismBuffer.with do |buffer| + FFICommon.with_buffer do |buffer| source = +"" callback = -> (string, size, _) { raise "Expected size to be >= 0, got: #{size}" if size <= 0 @@ -306,19 +70,13 @@ def parse_stream(stream, **options) eof_callback = -> (_) { stream.eof? } - pm_source = LibRubyParser.pm_source_stream_new(nil, callback, eof_callback) - begin - LibRubyParser.pm_serialize_parse_stream(buffer.pointer, pm_source, dump_options(options)) - Prism.load(source, buffer.read, options.fetch(:freeze, false)) - ensure - LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null? - end + FFICommon.parse_stream(buffer, callback, eof_callback, options, source) end end # Mirror the Prism.parse_comments API by using the serialization API. def parse_comments(code, **options) - LibRubyParser::PrismSource.with_string(code) { |string| parse_comments_common(string, code, options) } + FFICommon.with_string(code) { |string| FFICommon.parse_comments(string, code, options) } end # Mirror the Prism.parse_file_comments API by using the serialization @@ -326,23 +84,23 @@ def parse_comments(code, **options) # to use mmap when it is available. def parse_file_comments(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismSource.with_file(filepath) { |string| parse_comments_common(string, string.read, options) } + FFICommon.with_file(filepath) { |string| FFICommon.parse_comments(string, string.read, options) } end # Mirror the Prism.parse_lex API by using the serialization API. def parse_lex(code, **options) - LibRubyParser::PrismSource.with_string(code) { |string| parse_lex_common(string, code, options) } + FFICommon.with_string(code) { |string| FFICommon.parse_lex(string, code, options) } end # Mirror the Prism.parse_lex_file API by using the serialization API. def parse_lex_file(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismSource.with_file(filepath) { |string| parse_lex_common(string, string.read, options) } + FFICommon.with_file(filepath) { |string| FFICommon.parse_lex(string, string.read, options) } end # Mirror the Prism.parse_success? API by using the serialization API. def parse_success?(code, **options) - LibRubyParser::PrismSource.with_string(code) { |string| parse_file_success_common(string, options) } + FFICommon.with_string(code) { |string| FFICommon.parse_file_success(string, options) } end # Mirror the Prism.parse_failure? API by using the serialization API. @@ -353,7 +111,7 @@ def parse_failure?(code, **options) # Mirror the Prism.parse_file_success? API by using the serialization API. def parse_file_success?(filepath, **options) options[:filepath] = filepath - LibRubyParser::PrismSource.with_file(filepath) { |string| parse_file_success_common(string, options) } + FFICommon.with_file(filepath) { |string| FFICommon.parse_file_success(string, options) } end # Mirror the Prism.parse_file_failure? API by using the serialization API. @@ -363,9 +121,9 @@ def parse_file_failure?(filepath, **options) # Mirror the Prism.profile API by using the serialization API. def profile(source, **options) - LibRubyParser::PrismSource.with_string(source) do |string| - LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) + FFICommon.with_string(source) do |string| + FFICommon.with_buffer do |buffer| + FFICommon.parse_only(buffer, string, options) nil end end @@ -373,205 +131,15 @@ def profile(source, **options) # Mirror the Prism.profile_file API by using the serialization API. def profile_file(filepath, **options) - LibRubyParser::PrismSource.with_file(filepath) do |string| - LibRubyParser::PrismBuffer.with do |buffer| + FFICommon.with_file(filepath) do |string| + FFICommon.with_buffer do |buffer| options[:filepath] = filepath - LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) + FFICommon.parse_only(buffer, string, options) nil end end end - private - - def dump_common(string, options) # :nodoc: - LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) - - dumped = buffer.read - dumped.freeze if options.fetch(:freeze, false) - - dumped - end - end - - def lex_common(string, code, options) # :nodoc: - LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) - Serialize.load_lex(code, buffer.read, options.fetch(:freeze, false)) - end - end - - def parse_common(string, code, options) # :nodoc: - serialized = dump_common(string, options) - Serialize.load_parse(code, serialized, options.fetch(:freeze, false)) - end - - def parse_comments_common(string, code, options) # :nodoc: - LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_serialize_parse_comments(buffer.pointer, string.pointer, string.length, dump_options(options)) - Serialize.load_parse_comments(code, buffer.read, options.fetch(:freeze, false)) - end - end - - def parse_lex_common(string, code, options) # :nodoc: - LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) - Serialize.load_parse_lex(code, buffer.read, options.fetch(:freeze, false)) - end - end - - def parse_file_success_common(string, options) # :nodoc: - LibRubyParser.pm_serialize_parse_success_p(string.pointer, string.length, dump_options(options)) - end - - # Return the value that should be dumped for the command_line option. - def dump_options_command_line(options) - command_line = options.fetch(:command_line, "") - raise ArgumentError, "command_line must be a string" unless command_line.is_a?(String) - - command_line.each_char.inject(0) do |value, char| - case char - when "a" then value | 0b000001 - when "e" then value | 0b000010 - when "l" then value | 0b000100 - when "n" then value | 0b001000 - when "p" then value | 0b010000 - when "x" then value | 0b100000 - else raise ArgumentError, "invalid command_line option: #{char}" - end - end - end - - # Return the value that should be dumped for the version option. - def dump_options_version(version) - case version - when "current" - version_string_to_number(RUBY_VERSION) || raise(CurrentVersionError, RUBY_VERSION) - when "latest", nil - 0 # Handled in pm_parser_init - when "nearest" - dump = version_string_to_number(RUBY_VERSION) - return dump if dump - if RUBY_VERSION < "3.3" - version_string_to_number("3.3") - else - 0 # Handled in pm_parser_init - end - else - version_string_to_number(version) || raise(ArgumentError, "invalid version: #{version}") - end - end - - # Converts a version string like "4.0.0" or "4.0" into a number. - # Returns nil if the version is unknown. - def version_string_to_number(version) - case version - when /\A3\.3(\.\d+)?\z/ - 1 - when /\A3\.4(\.\d+)?\z/ - 2 - when /\A3\.5(\.\d+)?\z/, /\A4\.0(\.\d+)?\z/ - 3 - when /\A4\.1(\.\d+)?\z/ - 4 - end - end - - # Convert the given options into a serialized options string. - def dump_options(options) - template = +"" - values = [] - - template << "L" - if (filepath = options[:filepath]) - values.push(filepath.bytesize, filepath.b) - template << "A*" - else - values << 0 - end - - template << "l" - values << options.fetch(:line, 1) - - template << "L" - if (encoding = options[:encoding]) - name = encoding.is_a?(Encoding) ? encoding.name : encoding - values.push(name.bytesize, name.b) - template << "A*" - else - values << 0 - end - - template << "C" - values << (options.fetch(:frozen_string_literal, false) ? 1 : 0) - - template << "C" - values << dump_options_command_line(options) - - template << "C" - values << dump_options_version(options[:version]) - - template << "C" - values << (options[:encoding] == false ? 1 : 0) - - template << "C" - values << (options.fetch(:main_script, false) ? 1 : 0) - - template << "C" - values << (options.fetch(:partial_script, false) ? 1 : 0) - - template << "C" - values << (options.fetch(:freeze, false) ? 1 : 0) - - template << "L" - if (scopes = options[:scopes]) - values << scopes.length - - scopes.each do |scope| - locals = nil - forwarding = 0 - - case scope - when Array - locals = scope - when Scope - locals = scope.locals - - scope.forwarding.each do |forward| - case forward - when :* then forwarding |= 0x1 - when :** then forwarding |= 0x2 - when :& then forwarding |= 0x4 - when :"..." then forwarding |= 0x8 - else raise ArgumentError, "invalid forwarding value: #{forward}" - end - end - else - raise TypeError, "wrong argument type #{scope.class.inspect} (expected Array or Prism::Scope)" - end - - template << "L" - values << locals.length - - template << "C" - values << forwarding - - locals.each do |local| - name = local.name - template << "L" - values << name.bytesize - - template << "A*" - values << name.b - end - end - else - values << 0 - end - - values.pack(template) - end end # Here we are going to patch StringQuery to put in the class-level methods so @@ -580,17 +148,17 @@ class StringQuery # :nodoc: class << self # Mirrors the C extension's StringQuery::local? method. def local?(string) - query(LibRubyParser.pm_string_query_local(string, string.bytesize, string.encoding.name)) + query(FFICommon.string_query_local(string)) end # Mirrors the C extension's StringQuery::constant? method. def constant?(string) - query(LibRubyParser.pm_string_query_constant(string, string.bytesize, string.encoding.name)) + query(FFICommon.string_query_constant(string)) end # Mirrors the C extension's StringQuery::method_name? method. def method_name?(string) - query(LibRubyParser.pm_string_query_method_name(string, string.bytesize, string.encoding.name)) + query(FFICommon.string_query_method_name(string)) end private diff --git a/lib/prism/ffi/common.rb b/lib/prism/ffi/common.rb new file mode 100644 index 0000000000..2600273318 --- /dev/null +++ b/lib/prism/ffi/common.rb @@ -0,0 +1,229 @@ +# frozen_string_literal: true +# :markup: markdown +# typed: ignore + +module Prism + + class Common + def dump(string, options) # :nodoc: + with_buffer do |buffer| + parse_only(buffer, string, options) + + dumped = buffer.read + dumped.freeze if options.fetch(:freeze, false) + + dumped + end + end + + def parse(string, code, options) # :nodoc: + serialized = dump(string, options) + Serialize.load_parse(code, serialized, options.fetch(:freeze, false)) + end + + def lex(string, code, options) # :nodoc: + with_buffer do |buffer| + lex_only(buffer, string, options) + Serialize.load_lex(code, buffer.read, options.fetch(:freeze, false)) + end + end + + # Return the value that should be dumped for the command_line option. + def dump_options_command_line(options) + command_line = options.fetch(:command_line, "") + raise ArgumentError, "command_line must be a string" unless command_line.is_a?(String) + + command_line.each_char.inject(0) do |value, char| + case char + when "a" then value | 0b000001 + when "e" then value | 0b000010 + when "l" then value | 0b000100 + when "n" then value | 0b001000 + when "p" then value | 0b010000 + when "x" then value | 0b100000 + else raise ArgumentError, "invalid command_line option: #{char}" + end + end + end + + # Return the value that should be dumped for the version option. + def dump_options_version(version) + case version + when "current" + version_string_to_number(RUBY_VERSION) || raise(CurrentVersionError, RUBY_VERSION) + when "latest", nil + 0 # Handled in pm_parser_init + when "nearest" + dump = version_string_to_number(RUBY_VERSION) + return dump if dump + if RUBY_VERSION < "3.3" + version_string_to_number("3.3") + else + 0 # Handled in pm_parser_init + end + else + version_string_to_number(version) || raise(ArgumentError, "invalid version: #{version}") + end + end + + # Converts a version string like "4.0.0" or "4.0" into a number. + # Returns nil if the version is unknown. + def version_string_to_number(version) + case version + when /\A3\.3(\.\d+)?\z/ + 1 + when /\A3\.4(\.\d+)?\z/ + 2 + when /\A3\.5(\.\d+)?\z/, /\A4\.0(\.\d+)?\z/ + 3 + when /\A4\.1(\.\d+)?\z/ + 4 + end + end + + # Convert the given options into a serialized options string. + def dump_options(options) + template = +"" + values = [] + + template << "L" + if (filepath = options[:filepath]) + values.push(filepath.bytesize, filepath.b) + template << "A*" + else + values << 0 + end + + template << "l" + values << options.fetch(:line, 1) + + template << "L" + if (encoding = options[:encoding]) + name = encoding.is_a?(Encoding) ? encoding.name : encoding + values.push(name.bytesize, name.b) + template << "A*" + else + values << 0 + end + + template << "C" + values << (options.fetch(:frozen_string_literal, false) ? 1 : 0) + + template << "C" + values << dump_options_command_line(options) + + template << "C" + values << dump_options_version(options[:version]) + + template << "C" + values << (options[:encoding] == false ? 1 : 0) + + template << "C" + values << (options.fetch(:main_script, false) ? 1 : 0) + + template << "C" + values << (options.fetch(:partial_script, false) ? 1 : 0) + + template << "C" + values << (options.fetch(:freeze, false) ? 1 : 0) + + template << "L" + if (scopes = options[:scopes]) + values << scopes.length + + scopes.each do |scope| + locals = nil + forwarding = 0 + + case scope + when Array + locals = scope + when Scope + locals = scope.locals + + scope.forwarding.each do |forward| + case forward + when :* then forwarding |= 0x1 + when :** then forwarding |= 0x2 + when :& then forwarding |= 0x4 + when :"..." then forwarding |= 0x8 + else raise ArgumentError, "invalid forwarding value: #{forward}" + end + end + else + raise TypeError, "wrong argument type #{scope.class.inspect} (expected Array or Prism::Scope)" + end + + template << "L" + values << locals.length + + template << "C" + values << forwarding + + locals.each do |local| + name = local.name + template << "L" + values << name.bytesize + + template << "A*" + values << name.b + end + end + else + values << 0 + end + + values.pack(template) + end + + # Required APIs below + + def with_buffer(&b) + raise NotImplementedError + end + + def with_string(string, &b) + raise NotImplementedError + end + + def with_file(string, &b) + raise NotImplementedError + end + + def lex_only(buffer, string, options) + raise NotImplementedError + end + + def parse_only(buffer, string, options) + raise NotImplementedError + end + + def parse_stream(buffer, callback, eof_callback, options, source) + raise NotImplementedError + end + + def parse_comments(string, code, options) # :nodoc: + raise NotImplementedError + end + + def parse_lex(string, code, options) # :nodoc: + raise NotImplementedError + end + + def parse_file_success(string, options) # :nodoc: + raise NotImplementedError + end + + def string_query_method_name(string) + raise NotImplementedError + end + + def string_query_constant(string) + raise NotImplementedError + end + + def string_query_local(string) + raise NotImplementedError + end + end +end diff --git a/lib/prism/ffi/native_ffi.rb b/lib/prism/ffi/native_ffi.rb new file mode 100644 index 0000000000..600e4236ec --- /dev/null +++ b/lib/prism/ffi/native_ffi.rb @@ -0,0 +1,325 @@ +# frozen_string_literal: true +# :markup: markdown +# typed: ignore + +# This file is responsible for mirroring the API provided by the C extension by +# using FFI to call into the shared library. + +require "rbconfig" +require "ffi" + +# We want to eagerly load this file if there are Ractors so that it does not get +# autoloaded from within a non-main Ractor. +require "prism/serialize" if defined?(Ractor) + +module Prism # :nodoc: + module LibRubyParser # :nodoc: + extend FFI::Library + + # Define the library that we will be pulling functions from. Note that this + # must align with the build shared library from make/rake. + libprism_in_build = File.expand_path("../../../build/libprism.#{RbConfig::CONFIG["SOEXT"]}", __dir__) + libprism_in_libdir = "#{RbConfig::CONFIG["libdir"]}/prism/libprism.#{RbConfig::CONFIG["SOEXT"]}" + + if File.exist?(libprism_in_build) + INCLUDE_DIR = File.expand_path("../../../include", __dir__) + ffi_lib libprism_in_build + else + INCLUDE_DIR = "#{RbConfig::CONFIG["libdir"]}/prism/include" + ffi_lib libprism_in_libdir + end + + # Convert a native C type declaration into a symbol that FFI understands. + # For example: + # + # const char * -> :pointer + # bool -> :bool + # size_t -> :size_t + # void -> :void + # + def self.resolve_type(type, callbacks) + type = type.strip + + if !type.end_with?("*") + type.delete_prefix("const ").to_sym + else + type = type.delete_suffix("*").rstrip + callbacks.include?(type.to_sym) ? type.to_sym : :pointer + end + end + + # Read through the given header file and find the declaration of each of the + # given functions. For each one, define a function with the same name and + # signature as the C function. + def self.load_exported_functions_from(header, *functions, callbacks) + File.foreach("#{INCLUDE_DIR}/#{header}") do |line| + # We only want to attempt to load exported functions. + next unless line.start_with?("PRISM_EXPORTED_FUNCTION ") + + # We only want to load the functions that we are interested in. + next unless functions.any? { |function| line.include?(function) } + + # Strip trailing attributes (PRISM_NODISCARD, PRISM_NONNULL(...), etc.) + line = line.sub(/\)(\s+PRISM_\w+(?:\([^)]*\))?)+\s*;/, ");") + + # Parse the function declaration. + unless /^PRISM_EXPORTED_FUNCTION (?.+) (?\w+)\((?.+)\);$/ =~ line + raise "Could not parse #{line}" + end + + # Delete the function from the list of functions we are looking for to + # mark it as having been found. + functions.delete(name) + + # Split up the argument types into an array, ensure we handle the case + # where there are no arguments (by explicit void). + arg_types = arg_types.split(",").map(&:strip) + arg_types = [] if arg_types == %w[void] + + # Resolve the type of the argument by dropping the name of the argument + # first if it is present. + arg_types.map! { |type| resolve_type(type.sub(/\w+$/, ""), callbacks) } + + # Attach the function using the FFI library. + attach_function name, arg_types, resolve_type(return_type, []) + end + + # If we didn't find all of the functions, raise an error. + raise "Could not find functions #{functions.inspect}" unless functions.empty? + end + + callback :pm_source_stream_fgets_t, [:pointer, :int, :pointer], :pointer + callback :pm_source_stream_feof_t, [:pointer], :int + pm_source_init_result_values = %i[PM_SOURCE_INIT_SUCCESS PM_SOURCE_INIT_ERROR_GENERIC PM_SOURCE_INIT_ERROR_DIRECTORY PM_SOURCE_INIT_ERROR_NON_REGULAR] + enum :pm_source_init_result_t, pm_source_init_result_values + enum :pm_string_query_t, [:PM_STRING_QUERY_ERROR, -1, :PM_STRING_QUERY_FALSE, :PM_STRING_QUERY_TRUE] + + # Ractor-safe lookup table for pm_source_init_result_t, since FFI's + # enum_type accesses module instance variables that are not shareable. + SOURCE_INIT_RESULT = pm_source_init_result_values.freeze + + load_exported_functions_from( + "prism/version.h", + "pm_version", + [] + ) + + load_exported_functions_from( + "prism/serialize.h", + "pm_serialize_parse", + "pm_serialize_parse_stream", + "pm_serialize_parse_comments", + "pm_serialize_lex", + "pm_serialize_parse_lex", + "pm_serialize_parse_success_p", + [] + ) + + load_exported_functions_from( + "prism/string_query.h", + "pm_string_query_local", + "pm_string_query_constant", + "pm_string_query_method_name", + [] + ) + + load_exported_functions_from( + "prism/buffer.h", + "pm_buffer_new", + "pm_buffer_value", + "pm_buffer_length", + "pm_buffer_free", + [] + ) + + load_exported_functions_from( + "prism/source.h", + "pm_source_file_new", + "pm_source_mapped_new", + "pm_source_stream_new", + "pm_source_free", + "pm_source_source", + "pm_source_length", + [:pm_source_stream_fgets_t, :pm_source_stream_feof_t] + ) + + # This object represents a pm_buffer_t. We only use it as an opaque pointer, + # so it doesn't need to know the fields of pm_buffer_t. + class NativeBuffer # :nodoc: + attr_reader :pointer + + def initialize(pointer) + @pointer = pointer + end + + def value + LibRubyParser.pm_buffer_value(pointer) + end + + def length + LibRubyParser.pm_buffer_length(pointer) + end + + def read + value.read_string(length) + end + + # Initialize a new buffer and yield it to the block. The buffer will be + # automatically freed when the block returns. + def self.with + buffer = LibRubyParser.pm_buffer_new + raise unless buffer + + begin + yield new(buffer) + ensure + LibRubyParser.pm_buffer_free(buffer) + end + end + end + + # This object represents source code to be parsed. For strings it wraps a + # pointer directly; for files it uses a pm_source_t under the hood. + class NativeSource # :nodoc: + PLATFORM_EXPECTS_UTF8 = + RbConfig::CONFIG["host_os"].match?(/bccwin|cygwin|djgpp|mingw|mswin|wince|darwin/i) + + attr_reader :pointer, :length + + def initialize(pointer, length, from_string) + @pointer = pointer + @length = length + @from_string = from_string + end + + def read + raise "should use the original String instead" if @from_string + @pointer.read_string(@length) + end + + # Yields a PrismSource backed by the given string to the block. + def self.with_string(string) + raise TypeError unless string.is_a?(String) + + length = string.bytesize + # + 1 to never get an address of 0, which pm_parser_init() asserts + FFI::MemoryPointer.new(:char, length + 1, false) do |pointer| + pointer.write_string(string) + # since we have the extra byte we might as well \0-terminate + pointer.put_char(length, 0) + return yield new(pointer, length, true) + end + end + + # Yields a PrismSource to the given block, backed by a pm_source_t. + def self.with_file(filepath) + raise TypeError unless filepath.is_a?(String) + + # On Windows and Mac, it's expected that filepaths will be encoded in + # UTF-8. If they are not, we need to convert them to UTF-8 before + # passing them into pm_source_mapped_new. + if PLATFORM_EXPECTS_UTF8 && (encoding = filepath.encoding) != Encoding::ASCII_8BIT && encoding != Encoding::UTF_8 + filepath = filepath.encode(Encoding::UTF_8) + end + + FFI::MemoryPointer.new(:int) do |result_ptr| + pm_source = LibRubyParser.pm_source_mapped_new(filepath, 0, result_ptr) + + case SOURCE_INIT_RESULT[result_ptr.read_int] + when :PM_SOURCE_INIT_SUCCESS + pointer = LibRubyParser.pm_source_source(pm_source) + length = LibRubyParser.pm_source_length(pm_source) + return yield new(pointer, length, false) + when :PM_SOURCE_INIT_ERROR_GENERIC + raise SystemCallError.new(filepath, FFI.errno) + when :PM_SOURCE_INIT_ERROR_DIRECTORY + raise Errno::EISDIR.new(filepath) + when :PM_SOURCE_INIT_ERROR_NON_REGULAR + # Fall back to reading the file through Ruby IO for non-regular + # files (pipes, character devices, etc.) + return with_string(File.read(filepath)) { |string| yield string } + else + raise "Unknown error initializing pm_source_t: #{result_ptr.read_int}" + end + ensure + LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null? + end + end + end + end + + # Mark the LibRubyParser module as private as it should only be called through + # the prism module. + private_constant :LibRubyParser + + class NativeCommon < Common + + # The version constant is set by reading the result of calling pm_version. + def version + LibRubyParser.pm_version.read_string.freeze + end + + def with_buffer(&b) + LibRubyParser::NativeBuffer.with(&b) + end + + def with_string(string, &b) + LibRubyParser::NativeSource.with_string(string, &b) + end + + def with_file(string, &b) + LibRubyParser::NativeSource.with_file(string, &b) + end + + def lex_only(buffer, string, options) + LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) + end + + def parse_only(buffer, string, options) + LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) + end + + def parse_stream(buffer, callback, eof_callback, options, source) + pm_source = LibRubyParser.pm_source_stream_new(nil, callback, eof_callback) + begin + LibRubyParser.pm_serialize_parse_stream(buffer.pointer, pm_source, dump_options(options)) + Prism.load(source, buffer.read, options.fetch(:freeze, false)) + ensure + LibRubyParser.pm_source_free(pm_source) if pm_source && !pm_source.null? + end + end + + def parse_comments(string, code, options) # :nodoc: + with_buffer do |buffer| + LibRubyParser.pm_serialize_parse_comments(buffer.pointer, string.pointer, string.length, dump_options(options)) + Serialize.load_parse_comments(code, buffer.read, options.fetch(:freeze, false)) + end + end + + def parse_lex(string, code, options) # :nodoc: + with_buffer do |buffer| + LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) + Serialize.load_parse_lex(code, buffer.read, options.fetch(:freeze, false)) + end + end + + def parse_file_success(string, options) # :nodoc: + LibRubyParser.pm_serialize_parse_success_p(string.pointer, string.length, dump_options(options)) + end + + def string_query_method_name(string) + LibRubyParser.pm_string_query_method_name(string, string.bytesize, string.encoding.name) + end + + def string_query_constant(string) + LibRubyParser.pm_string_query_constant(string, string.bytesize, string.encoding.name) + end + + def string_query_local(string) + LibRubyParser.pm_string_query_local(string, string.bytesize, string.encoding.name) + end + end + + FFICommon = NativeCommon.new + private_constant(:FFICommon) +end diff --git a/lib/prism/ffi/wasm_ffi.rb b/lib/prism/ffi/wasm_ffi.rb new file mode 100644 index 0000000000..37ad421474 --- /dev/null +++ b/lib/prism/ffi/wasm_ffi.rb @@ -0,0 +1,104 @@ +# frozen_string_literal: true +# :markup: markdown +# typed: ignore + +# This file is responsible for mirroring the API provided by the C extension by +# using FFI to call into the shared library. + +require "rbconfig" +require "ffi" + +# We want to eagerly load this file if there are Ractors so that it does not get +# autoloaded from within a non-main Ractor. +require "prism/serialize" if defined?(Ractor) + +# Load the prism-parser-wasm jar +require 'jar-dependencies' +require_jar('org.ruby-lang', 'prism-parser-wasm', '0.0.1-SNAPSHOT') +require_jar('com.dylibso.chicory', 'runtime', '1.6.1') +require_jar('com.dylibso.chicory', 'wasi', '1.6.1') +require_jar('com.dylibso.chicory', 'wasm', '1.6.1') +require_jar('com.dylibso.chicory', 'log', '1.6.1') + +module Prism # :nodoc: + class WASMCommon < Common + java_import org.ruby_lang.prism.wasm.Prism + + # TODO: concurrency + PRISM = org.ruby_lang.prism.wasm.Prism.new + + def version + # The version constant is set by reading the result of calling pm_version. + WASM::PRISM.version + end + + # Prototype WASM code + # def dump(source, **options) + # parsed = WASM::PRISM.parse(source.to_java_bytes, dump_options(options).to_java_bytes) + # end + # + # # Mirror the Prism.dump_file API by using the serialization API. + # def dump_file(filepath, **options) + # dump_file(File.read(filepath), filepath: filepath, **options) + # end + # + # # Mirror the Prism.lex API by using the serialization API. + # def lex(source, **options) + # lexed = WASM::PRISM.lex(source.to_java_bytes, dump_options(options).to_java_bytes) + # Serialize.load_lex(source, lexed, options.fetch(:freeze, false)) + # end + # + # # Mirror the Prism.lex_file API by using the serialization API. + # def lex_file(filepath, **options) + # lex_file(File.read(filepath), filepath: filepath, **options) + # end + + def with_buffer(&b) + raise NotImplementedError + end + + def with_string(string, &b) + raise NotImplementedError + end + + def with_file(string, &b) + raise NotImplementedError + end + + def lex_only(buffer, string, options) + raise NotImplementedError + end + + def parse_only(buffer, string, options) + raise NotImplementedError + end + + def parse_stream(buffer, callback, eof_callback, options, source) + raise NotImplementedError + end + + def parse_comments(string, code, options) # :nodoc: + raise NotImplementedError + end + + def parse_lex(string, code, options) # :nodoc: + raise NotImplementedError + end + + def parse_file_success(string, options) # :nodoc: + raise NotImplementedError + end + + def string_query_method_name(string) + raise NotImplementedError + end + + def string_query_constant(string) + raise NotImplementedError + end + + def string_query_local(string) + raise NotImplementedError + end + end +end diff --git a/lib/prism/wasm.rb b/lib/prism/wasm.rb deleted file mode 100644 index e57c6bb375..0000000000 --- a/lib/prism/wasm.rb +++ /dev/null @@ -1,377 +0,0 @@ -# frozen_string_literal: true -# :markup: markdown -# typed: ignore - -# This file is responsible for mirroring the API provided by the C extension by -# using FFI to call into the shared library. - -require "rbconfig" -require "ffi" - -# We want to eagerly load this file if there are Ractors so that it does not get -# autoloaded from within a non-main Ractor. -require "prism/serialize" if defined?(Ractor) - -# Load the prism-parser-wasm jar -require 'jar-dependencies' -require_jar('org.ruby-lang', 'prism-parser-wasm', '0.0.1-SNAPSHOT') -require_jar('com.dylibso.chicory', 'runtime', '1.6.1') -require_jar('com.dylibso.chicory', 'wasi', '1.6.1') -require_jar('com.dylibso.chicory', 'wasm', '1.6.1') -require_jar('com.dylibso.chicory', 'log', '1.6.1') - -module Prism # :nodoc: - module WASM - java_import org.ruby_lang.prism.wasm.Prism - - # TODO: concurrency - PRISM = org.ruby_lang.prism.wasm.Prism.new - end - private_constant :WASM - - # The version constant is set by reading the result of calling pm_version. - VERSION = WASM::PRISM.version - - class << self - # Mirror the Prism.dump API by using the serialization API. - def dump(source, **options) - parsed = WASM::PRISM.parse(source.to_java_bytes, dump_options(options).to_java_bytes) - String.from_java_bytes(parsed) - end - - # Mirror the Prism.dump_file API by using the serialization API. - def dump_file(filepath, **options) - dump_file(File.read(filepath), filepath: filepath, **options) - end - - # Mirror the Prism.lex API by using the serialization API. - def lex(source, **options) - lexed = WASM::PRISM.lex(source.to_java_bytes, dump_options(options).to_java_bytes) - Serialize.load_lex(source, lexed, options.fetch(:freeze, false)) - end - - # Mirror the Prism.lex_file API by using the serialization API. - def lex_file(filepath, **options) - lex_file(File.read(filepath), filepath: filepath, **options) - end - - # Mirror the Prism.parse API by using the serialization API. - def parse(source, **options) - serialized = dump(source, **options) - Serialize.load_parse(source, serialized, options.fetch(:freeze, false)) - end - - # Mirror the Prism.parse_file API by using the serialization API. This uses - # native strings instead of Ruby strings because it allows us to use mmap - # when it is available. - def parse_file(filepath, **options) - parse(File.read(filepath), filepath: filepath, **options) - end - - # Mirror the Prism.parse_stream API by using the serialization API. - def parse_stream(stream, **options) - LibRubyParser::PrismBuffer.with do |buffer| - source = +"" - callback = -> (string, size, _) { - raise "Expected size to be >= 0, got: #{size}" if size <= 0 - - if !(line = stream.gets(size - 1)).nil? - source << line - string.write_string("#{line}\x00", line.bytesize + 1) - end - } - - eof_callback = -> (_) { stream.eof? } - - # In the pm_serialize_parse_stream function it accepts a pointer to the - # IO object as a void* and then passes it through to the callback as the - # third argument, but it never touches it itself. As such, since we have - # access to the IO object already through the closure of the lambda, we - # can pass a null pointer here and not worry. - LibRubyParser.pm_serialize_parse_stream(buffer.pointer, nil, callback, eof_callback, dump_options(options)) - Prism.load(source, buffer.read, options.fetch(:freeze, false)) - end - end - - # Mirror the Prism.parse_comments API by using the serialization API. - def parse_comments(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| parse_comments_common(string, code, options) } - end - - # Mirror the Prism.parse_file_comments API by using the serialization - # API. This uses native strings instead of Ruby strings because it allows us - # to use mmap when it is available. - def parse_file_comments(filepath, **options) - options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| parse_comments_common(string, string.read, options) } - end - - # Mirror the Prism.parse_lex API by using the serialization API. - def parse_lex(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| parse_lex_common(string, code, options) } - end - - # Mirror the Prism.parse_lex_file API by using the serialization API. - def parse_lex_file(filepath, **options) - options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| parse_lex_common(string, string.read, options) } - end - - # Mirror the Prism.parse_success? API by using the serialization API. - def parse_success?(code, **options) - LibRubyParser::PrismString.with_string(code) { |string| parse_file_success_common(string, options) } - end - - # Mirror the Prism.parse_failure? API by using the serialization API. - def parse_failure?(code, **options) - !parse_success?(code, **options) - end - - # Mirror the Prism.parse_file_success? API by using the serialization API. - def parse_file_success?(filepath, **options) - options[:filepath] = filepath - LibRubyParser::PrismString.with_file(filepath) { |string| parse_file_success_common(string, options) } - end - - # Mirror the Prism.parse_file_failure? API by using the serialization API. - def parse_file_failure?(filepath, **options) - !parse_file_success?(filepath, **options) - end - - # Mirror the Prism.profile API by using the serialization API. - def profile(source, **options) - LibRubyParser::PrismString.with_string(source) do |string| - LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) - nil - end - end - end - - # Mirror the Prism.profile_file API by using the serialization API. - def profile_file(filepath, **options) - LibRubyParser::PrismString.with_file(filepath) do |string| - LibRubyParser::PrismBuffer.with do |buffer| - options[:filepath] = filepath - LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) - nil - end - end - end - - private - - def lex_common(string, code, options) # :nodoc: - LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) - Serialize.load_lex(code, buffer.read, options.fetch(:freeze, false)) - end - end - - def parse_common(string, code, options) # :nodoc: - serialized = dump_common(string, options) - Serialize.load_parse(code, serialized, options.fetch(:freeze, false)) - end - - def parse_comments_common(string, code, options) # :nodoc: - LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_serialize_parse_comments(buffer.pointer, string.pointer, string.length, dump_options(options)) - Serialize.load_parse_comments(code, buffer.read, options.fetch(:freeze, false)) - end - end - - def parse_lex_common(string, code, options) # :nodoc: - LibRubyParser::PrismBuffer.with do |buffer| - LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) - Serialize.load_parse_lex(code, buffer.read, options.fetch(:freeze, false)) - end - end - - def parse_file_success_common(string, options) # :nodoc: - LibRubyParser.pm_parse_success_p(string.pointer, string.length, dump_options(options)) - end - - # Return the value that should be dumped for the command_line option. - def dump_options_command_line(options) - command_line = options.fetch(:command_line, "") - raise ArgumentError, "command_line must be a string" unless command_line.is_a?(String) - - command_line.each_char.inject(0) do |value, char| - case char - when "a" then value | 0b000001 - when "e" then value | 0b000010 - when "l" then value | 0b000100 - when "n" then value | 0b001000 - when "p" then value | 0b010000 - when "x" then value | 0b100000 - else raise ArgumentError, "invalid command_line option: #{char}" - end - end - end - - # Return the value that should be dumped for the version option. - def dump_options_version(version) - case version - when "current" - version_string_to_number(RUBY_VERSION) || raise(CurrentVersionError, RUBY_VERSION) - when "latest", nil - 0 # Handled in pm_parser_init - when "nearest" - dump = version_string_to_number(RUBY_VERSION) - return dump if dump - if RUBY_VERSION < "3.3" - version_string_to_number("3.3") - else - 0 # Handled in pm_parser_init - end - else - version_string_to_number(version) || raise(ArgumentError, "invalid version: #{version}") - end - end - - # Converts a version string like "4.0.0" or "4.0" into a number. - # Returns nil if the version is unknown. - def version_string_to_number(version) - case version - when /\A3\.3(\.\d+)?\z/ - 1 - when /\A3\.4(\.\d+)?\z/ - 2 - when /\A3\.5(\.\d+)?\z/, /\A4\.0(\.\d+)?\z/ - 3 - when /\A4\.1(\.\d+)?\z/ - 4 - end - end - - # Convert the given options into a serialized options string. - def dump_options(options) - template = +"" - values = [] - - template << "L" - if (filepath = options[:filepath]) - values.push(filepath.bytesize, filepath.b) - template << "A*" - else - values << 0 - end - - template << "l" - values << options.fetch(:line, 1) - - template << "L" - if (encoding = options[:encoding]) - name = encoding.is_a?(Encoding) ? encoding.name : encoding - values.push(name.bytesize, name.b) - template << "A*" - else - values << 0 - end - - template << "C" - values << (options.fetch(:frozen_string_literal, false) ? 1 : 0) - - template << "C" - values << dump_options_command_line(options) - - template << "C" - values << dump_options_version(options[:version]) - - template << "C" - values << (options[:encoding] == false ? 1 : 0) - - template << "C" - values << (options.fetch(:main_script, false) ? 1 : 0) - - template << "C" - values << (options.fetch(:partial_script, false) ? 1 : 0) - - template << "C" - values << (options.fetch(:freeze, false) ? 1 : 0) - - template << "L" - if (scopes = options[:scopes]) - values << scopes.length - - scopes.each do |scope| - locals = nil - forwarding = 0 - - case scope - when Array - locals = scope - when Scope - locals = scope.locals - - scope.forwarding.each do |forward| - case forward - when :* then forwarding |= 0x1 - when :** then forwarding |= 0x2 - when :& then forwarding |= 0x4 - when :"..." then forwarding |= 0x8 - else raise ArgumentError, "invalid forwarding value: #{forward}" - end - end - else - raise TypeError, "wrong argument type #{scope.class.inspect} (expected Array or Prism::Scope)" - end - - template << "L" - values << locals.length - - template << "C" - values << forwarding - - locals.each do |local| - name = local.name - template << "L" - values << name.bytesize - - template << "A*" - values << name.b - end - end - else - values << 0 - end - - values.pack(template) - end - end - - # Here we are going to patch StringQuery to put in the class-level methods so - # that it can maintain a consistent interface - class StringQuery # :nodoc: - class << self - # Mirrors the C extension's StringQuery::local? method. - def local?(string) - query(LibRubyParser.pm_string_query_local(string, string.bytesize, string.encoding.name)) - end - - # Mirrors the C extension's StringQuery::constant? method. - def constant?(string) - query(LibRubyParser.pm_string_query_constant(string, string.bytesize, string.encoding.name)) - end - - # Mirrors the C extension's StringQuery::method_name? method. - def method_name?(string) - query(LibRubyParser.pm_string_query_method_name(string, string.bytesize, string.encoding.name)) - end - - private - - # Parse the enum result and return an appropriate boolean. - def query(result) - case result - when :PM_STRING_QUERY_ERROR - raise ArgumentError, "Invalid or non ascii-compatible encoding" - when :PM_STRING_QUERY_FALSE - false - when :PM_STRING_QUERY_TRUE - true - end - end - end - end -end From 937be344e1cd58909b2ab75fd1e7c97737f2db3d Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 1 Apr 2026 19:09:05 -0500 Subject: [PATCH 03/16] Move semantic field flag into C sources Rather than templating two versions of sources with and without non-semantic fields, we can make that determination at build time. Passing -DPRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1 to `make` will force that variable to be literally true, and the compiler will eliminate `if` blocks that use it to conditionally serialize non- semantic data. This simplifies the templates by removing that variation and allows building both forms of the library from a single generated set of sources. --- .github/workflows/java-wasm-bindings.yml | 2 +- Makefile | 2 +- java/README.md | 2 +- rakelib/serialization.rake | 7 ++--- src/prism.c | 2 +- templates/include/prism/ast.h.erb | 14 +++++----- .../org/ruby_lang/prism/Loader.java.erb | 6 ++--- .../org/ruby_lang/prism/Nodes.java.erb | 10 +++---- templates/src/serialize.c.erb | 26 ++++++++++++++----- templates/template.rb | 7 +---- 10 files changed, 42 insertions(+), 36 deletions(-) diff --git a/.github/workflows/java-wasm-bindings.yml b/.github/workflows/java-wasm-bindings.yml index 173b69ba1e..042784f532 100644 --- a/.github/workflows/java-wasm-bindings.yml +++ b/.github/workflows/java-wasm-bindings.yml @@ -28,7 +28,7 @@ jobs: bundler-cache: true - name: rake templates - run: PRISM_EXCLUDE_PRETTYPRINT=1 PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1 bundle exec rake templates + run: PRISM_EXCLUDE_PRETTYPRINT=1 PRISM_OMIT_NODE_ID=1 bundle exec rake templates - name: Set up WASI-SDK run: | diff --git a/Makefile b/Makefile index 0f6f5264d1..2aaec05478 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,7 @@ java/wasm/src/main/wasm/prism.wasm: Makefile $(SOURCES) $(HEADERS) $(Q) $(MAKEDIRS) $(@D) $(Q) $(WASI_SDK_PATH)/bin/clang \ $(DEBUG_FLAGS) \ - -DPRISM_EXCLUDE_PRETTYPRINT -DPRISM_EXPORT_SYMBOLS -D_WASI_EMULATED_MMAN \ + -DPRISM_EXCLUDE_PRETTYPRINT -DPRISM_EXPORT_SYMBOLS -D_WASI_EMULATED_MMAN -DPRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1 \ -lwasi-emulated-mman $(CPPFLAGS) $(JAVA_WASM_CFLAGS) \ -Wl,--export-all -Wl,--no-entry -mexec-model=reactor -lc++ -lc++abi \ -o $@ $(SOURCES) diff --git a/java/README.md b/java/README.md index 0dd4215777..bd06639f67 100644 --- a/java/README.md +++ b/java/README.md @@ -15,7 +15,7 @@ Some files need to be generated before the Maven artifacts can build: Sources under `api` are generated from templates in `../templates`. Those sources are generated using the follow command line: ``` -$ PRISM_EXCLUDE_PRETTYPRINT=1 PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1 bundle exec rake templates +$ PRISM_EXCLUDE_PRETTYPRINT=1 bundle exec rake templates ``` The files are generated under `api/src/main/java-templates` and will not be removed with `mvn clean`. diff --git a/rakelib/serialization.rake b/rakelib/serialization.rake index 516e8fe5ba..65c382fcc3 100644 --- a/rakelib/serialization.rake +++ b/rakelib/serialization.rake @@ -1,12 +1,9 @@ # frozen_string_literal: true task "test:java_loader" do - # Recompile with PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1 - # Due to some JRuby bug this does not get propagated to the compile task, so require the caller to set the env var - # ENV["PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS"] = "1" - raise "this task requires $SERIALIZE_ONLY_SEMANTICS_FIELDS to be set" unless ENV["PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS"] - Rake::Task["clobber"].invoke + # All Java API consumers want semantic-only build + ENV["CFLAGS"] = "-DPRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1" Rake::Task["test:java_loader:internal"].invoke end diff --git a/src/prism.c b/src/prism.c index ffc617e93f..e813fddcd9 100644 --- a/src/prism.c +++ b/src/prism.c @@ -22862,7 +22862,7 @@ pm_serialize_header(pm_buffer_t *buffer) { pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR); pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR); pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH); - pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0); + pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS); } /** diff --git a/templates/include/prism/ast.h.erb b/templates/include/prism/ast.h.erb index 3b3be25e76..44e2385d7c 100644 --- a/templates/include/prism/ast.h.erb +++ b/templates/include/prism/ast.h.erb @@ -268,11 +268,13 @@ PRISM_EXPORTED_FUNCTION pm_<%= node.human %>_t * pm_<%= node.human %>_new(pm_are <%- end -%> /** - * When we're serializing to Java, we want to skip serializing the location - * fields as they won't be used by JRuby or TruffleRuby. This boolean allows us - * to specify that through the environment. It will never be true except for in - * those build systems. - */ -#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS <%= Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0 %> +* When we're serializing to Java, we want to skip serializing the location +* fields as they won't be used by JRuby or TruffleRuby. This boolean allows us +* to specify that through the environment. It will never be true except for in +* those build systems. +*/ +#ifndef PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS +#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS 0 +#endif #endif diff --git a/templates/java/api/src/main/java-templates/org/ruby_lang/prism/Loader.java.erb b/templates/java/api/src/main/java-templates/org/ruby_lang/prism/Loader.java.erb index f1785e5d30..a4eec51a8f 100644 --- a/templates/java/api/src/main/java-templates/org/ruby_lang/prism/Loader.java.erb +++ b/templates/java/api/src/main/java-templates/org/ruby_lang/prism/Loader.java.erb @@ -324,12 +324,12 @@ public class Loader { } <%- - base_params = [*("nodeId" if Prism::Template::INCLUDE_NODE_ID), "startOffset", "length"] + base_params = [*("nodeId" unless Prism::Template::OMIT_NODE_ID), "startOffset", "length"] base_params_sig = base_params.map { "int #{_1}" }.join(", ") -%> private Nodes.Node loadNode() { int type = buffer.get() & 0xFF; - <%- if Prism::Template::INCLUDE_NODE_ID -%> + <%- unless Prism::Template::OMIT_NODE_ID -%> int nodeId = loadVarUInt(); <%- end -%> int startOffset = loadVarUInt(); @@ -341,7 +341,7 @@ public class Loader { case <%= index + 1 %>: <%- params = [] - params << "nodeId" if Prism::Template::INCLUDE_NODE_ID + params << "nodeId" unless Prism::Template::OMIT_NODE_ID params << "startOffset" << "length" params << "buffer.getInt()" << "null" if node.needs_serialized_length? params << "loadFlags()" if node.flags diff --git a/templates/java/api/src/main/java-templates/org/ruby_lang/prism/Nodes.java.erb b/templates/java/api/src/main/java-templates/org/ruby_lang/prism/Nodes.java.erb index bcfe47f7c6..61d5900584 100644 --- a/templates/java/api/src/main/java-templates/org/ruby_lang/prism/Nodes.java.erb +++ b/templates/java/api/src/main/java-templates/org/ruby_lang/prism/Nodes.java.erb @@ -88,14 +88,14 @@ public abstract class Nodes { public static final Node[] EMPTY_ARRAY = {}; - <%- if Prism::Template::INCLUDE_NODE_ID -%> + <%- unless Prism::Template::OMIT_NODE_ID -%> public final int nodeId; <%- end -%> public final int startOffset; public final int length; private boolean newLineFlag = false; - <%- if Prism::Template::INCLUDE_NODE_ID -%> + <%- unless Prism::Template::OMIT_NODE_ID -%> public Node(int nodeId, int startOffset, int length) { this.nodeId = nodeId; <%- else -%> @@ -245,7 +245,7 @@ public abstract class Nodes { <%- params = [] - params << "int nodeId" if Prism::Template::INCLUDE_NODE_ID + params << "int nodeId" unless Prism::Template::OMIT_NODE_ID params << "int startOffset" << "int length" if node.needs_serialized_length? params << "int serializedLength" @@ -255,7 +255,7 @@ public abstract class Nodes { params.concat(node.semantic_fields.map { |field| "#{field.java_type} #{field.name}" }) -%> public <%= node.name -%>(<%= params.join(", ") %>) { - <%- if Prism::Template::INCLUDE_NODE_ID -%> + <%- unless Prism::Template::OMIT_NODE_ID -%> super(nodeId, startOffset, length); <%- else -%> super(startOffset, length); @@ -280,7 +280,7 @@ public abstract class Nodes { public <%= node.name -%> getNonLazy() { if (isLazy()) { - return loader.createDefNodeFromSavedPosition(<%= "nodeId, " if Prism::Template::INCLUDE_NODE_ID %>startOffset, length, -serializedLength); + return loader.createDefNodeFromSavedPosition(<%= "nodeId, " unless Prism::Template::OMIT_NODE_ID %>startOffset, length, -serializedLength); } else { return this; } diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index 3d9811e5db..a98692054f 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -73,7 +73,7 @@ static void pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node)); - <%- if Prism::Template::INCLUDE_NODE_ID -%> + <%- unless Prism::Template::OMIT_NODE_ID -%> pm_buffer_append_varuint(buffer, node->node_id); <%- end -%> pm_serialize_location(&node->location, buffer); @@ -91,8 +91,12 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { size_t length_offset = buffer->length; pm_buffer_append_string(buffer, "\0\0\0\0", 4); /* consume 4 bytes, updated below */ <%- end -%> - <%- unless Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS && !node.flags -%> + <%- if node.flags -%> pm_buffer_append_varuint(buffer, (uint32_t) node->flags); + <%- else -%> + if (!PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS) { + pm_buffer_append_varuint(buffer, (uint32_t) node->flags); + } <%- end -%> <%- node.fields.each do |field| -%> <%- case field -%> @@ -121,17 +125,25 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_<%= node.human %>_t *)node)-><%= field.name %>.ids[index])); } <%- when Prism::Template::LocationField -%> - <%- if field.should_be_serialized? -%> + <%- unless field.semantic_field? -%> + if (!PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS) { + <%- end -%> pm_serialize_location(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer); + <%- unless field.semantic_field? -%> + } <%- end -%> <%- when Prism::Template::OptionalLocationField -%> - <%- if field.should_be_serialized? -%> + <%- unless field.semantic_field? -%> + if (!PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS) { + <%- end -%> if (((pm_<%= node.human %>_t *)node)-><%= field.name %>.length == 0) { pm_buffer_append_byte(buffer, 0); } else { pm_buffer_append_byte(buffer, 1); pm_serialize_location(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer); } + <%- unless field.semantic_field? -%> + } <%- end -%> <%- when Prism::Template::UInt8Field -%> pm_buffer_append_byte(buffer, ((pm_<%= node.human %>_t *)node)-><%= field.name %>); @@ -261,9 +273,9 @@ pm_serialize_metadata(pm_parser_t *parser, pm_buffer_t *buffer) { pm_serialize_encoding(parser->encoding, buffer); pm_buffer_append_varsint(buffer, parser->start_line); pm_serialize_line_offset_list(&parser->line_offsets, buffer); -<%- unless Prism::Template::SERIALIZE_ONLY_SEMANTICS_FIELDS -%> - pm_serialize_comment_list(&parser->comment_list, buffer); -<%- end -%> + if (!PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS) { + pm_serialize_comment_list(&parser->comment_list, buffer); + } pm_serialize_magic_comment_list(&parser->magic_comment_list, buffer); pm_serialize_data_loc(parser, buffer); pm_serialize_diagnostic_list(&parser->error_list, buffer); diff --git a/templates/template.rb b/templates/template.rb index 0fdeda561f..3636304a30 100755 --- a/templates/template.rb +++ b/templates/template.rb @@ -7,12 +7,11 @@ module Prism module Template # :nodoc: all - SERIALIZE_ONLY_SEMANTICS_FIELDS = ENV.fetch("PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS", false) CHECK_FIELD_KIND = ENV.fetch("CHECK_FIELD_KIND", false) JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "default" JAVA_IDENTIFIER_TYPE = JAVA_BACKEND == "truffleruby" ? "String" : "byte[]" - INCLUDE_NODE_ID = !SERIALIZE_ONLY_SEMANTICS_FIELDS || JAVA_BACKEND == "jruby" + OMIT_NODE_ID = ENV.fetch("PRISM_OMIT_NODE_ID", false) COMMON_FLAGS_COUNT = 2 @@ -95,10 +94,6 @@ def each_comment_java_line(&block) def semantic_field? true end - - def should_be_serialized? - SERIALIZE_ONLY_SEMANTICS_FIELDS ? semantic_field? : true - end end # Some node fields can be specialized if they point to a specific kind of From 1d1604fd5268c575ab392e04c6da91ce547361b9 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Mon, 6 Apr 2026 22:34:40 -0500 Subject: [PATCH 04/16] Switch node ID flag to a positive condition --- .github/workflows/java-wasm-bindings.yml | 2 +- .../java-templates/org/ruby_lang/prism/Loader.java.erb | 6 +++--- .../java-templates/org/ruby_lang/prism/Nodes.java.erb | 10 +++++----- templates/src/serialize.c.erb | 2 +- templates/template.rb | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/.github/workflows/java-wasm-bindings.yml b/.github/workflows/java-wasm-bindings.yml index 042784f532..1b7f372c5e 100644 --- a/.github/workflows/java-wasm-bindings.yml +++ b/.github/workflows/java-wasm-bindings.yml @@ -28,7 +28,7 @@ jobs: bundler-cache: true - name: rake templates - run: PRISM_EXCLUDE_PRETTYPRINT=1 PRISM_OMIT_NODE_ID=1 bundle exec rake templates + run: PRISM_EXCLUDE_PRETTYPRINT=1 PRISM_INCLUDE_NODE_ID=false bundle exec rake templates - name: Set up WASI-SDK run: | diff --git a/templates/java/api/src/main/java-templates/org/ruby_lang/prism/Loader.java.erb b/templates/java/api/src/main/java-templates/org/ruby_lang/prism/Loader.java.erb index a4eec51a8f..f1785e5d30 100644 --- a/templates/java/api/src/main/java-templates/org/ruby_lang/prism/Loader.java.erb +++ b/templates/java/api/src/main/java-templates/org/ruby_lang/prism/Loader.java.erb @@ -324,12 +324,12 @@ public class Loader { } <%- - base_params = [*("nodeId" unless Prism::Template::OMIT_NODE_ID), "startOffset", "length"] + base_params = [*("nodeId" if Prism::Template::INCLUDE_NODE_ID), "startOffset", "length"] base_params_sig = base_params.map { "int #{_1}" }.join(", ") -%> private Nodes.Node loadNode() { int type = buffer.get() & 0xFF; - <%- unless Prism::Template::OMIT_NODE_ID -%> + <%- if Prism::Template::INCLUDE_NODE_ID -%> int nodeId = loadVarUInt(); <%- end -%> int startOffset = loadVarUInt(); @@ -341,7 +341,7 @@ public class Loader { case <%= index + 1 %>: <%- params = [] - params << "nodeId" unless Prism::Template::OMIT_NODE_ID + params << "nodeId" if Prism::Template::INCLUDE_NODE_ID params << "startOffset" << "length" params << "buffer.getInt()" << "null" if node.needs_serialized_length? params << "loadFlags()" if node.flags diff --git a/templates/java/api/src/main/java-templates/org/ruby_lang/prism/Nodes.java.erb b/templates/java/api/src/main/java-templates/org/ruby_lang/prism/Nodes.java.erb index 61d5900584..bcfe47f7c6 100644 --- a/templates/java/api/src/main/java-templates/org/ruby_lang/prism/Nodes.java.erb +++ b/templates/java/api/src/main/java-templates/org/ruby_lang/prism/Nodes.java.erb @@ -88,14 +88,14 @@ public abstract class Nodes { public static final Node[] EMPTY_ARRAY = {}; - <%- unless Prism::Template::OMIT_NODE_ID -%> + <%- if Prism::Template::INCLUDE_NODE_ID -%> public final int nodeId; <%- end -%> public final int startOffset; public final int length; private boolean newLineFlag = false; - <%- unless Prism::Template::OMIT_NODE_ID -%> + <%- if Prism::Template::INCLUDE_NODE_ID -%> public Node(int nodeId, int startOffset, int length) { this.nodeId = nodeId; <%- else -%> @@ -245,7 +245,7 @@ public abstract class Nodes { <%- params = [] - params << "int nodeId" unless Prism::Template::OMIT_NODE_ID + params << "int nodeId" if Prism::Template::INCLUDE_NODE_ID params << "int startOffset" << "int length" if node.needs_serialized_length? params << "int serializedLength" @@ -255,7 +255,7 @@ public abstract class Nodes { params.concat(node.semantic_fields.map { |field| "#{field.java_type} #{field.name}" }) -%> public <%= node.name -%>(<%= params.join(", ") %>) { - <%- unless Prism::Template::OMIT_NODE_ID -%> + <%- if Prism::Template::INCLUDE_NODE_ID -%> super(nodeId, startOffset, length); <%- else -%> super(startOffset, length); @@ -280,7 +280,7 @@ public abstract class Nodes { public <%= node.name -%> getNonLazy() { if (isLazy()) { - return loader.createDefNodeFromSavedPosition(<%= "nodeId, " unless Prism::Template::OMIT_NODE_ID %>startOffset, length, -serializedLength); + return loader.createDefNodeFromSavedPosition(<%= "nodeId, " if Prism::Template::INCLUDE_NODE_ID %>startOffset, length, -serializedLength); } else { return this; } diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index a98692054f..a6a7bd91aa 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -73,7 +73,7 @@ static void pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { pm_buffer_append_byte(buffer, (uint8_t) PM_NODE_TYPE(node)); - <%- unless Prism::Template::OMIT_NODE_ID -%> + <%- if Prism::Template::INCLUDE_NODE_ID -%> pm_buffer_append_varuint(buffer, node->node_id); <%- end -%> pm_serialize_location(&node->location, buffer); diff --git a/templates/template.rb b/templates/template.rb index 3636304a30..02c98618e8 100755 --- a/templates/template.rb +++ b/templates/template.rb @@ -11,7 +11,7 @@ module Template # :nodoc: all JAVA_BACKEND = ENV["PRISM_JAVA_BACKEND"] || "default" JAVA_IDENTIFIER_TYPE = JAVA_BACKEND == "truffleruby" ? "String" : "byte[]" - OMIT_NODE_ID = ENV.fetch("PRISM_OMIT_NODE_ID", false) + INCLUDE_NODE_ID = ENV.fetch("PRISM_INCLUDE_NODE_ID", "true") != "false" COMMON_FLAGS_COUNT = 2 From 3428a14e66332b98f82a791211f2a4c93d97ef48 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Mon, 6 Apr 2026 22:53:01 -0500 Subject: [PATCH 05/16] Use #ifndef for semantic fields only check --- src/prism.c | 6 +++++- templates/include/prism/ast.h.erb | 10 ---------- templates/src/serialize.c.erb | 16 ++++++++-------- 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/src/prism.c b/src/prism.c index e813fddcd9..f77510d7a4 100644 --- a/src/prism.c +++ b/src/prism.c @@ -22862,7 +22862,11 @@ pm_serialize_header(pm_buffer_t *buffer) { pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR); pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR); pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH); - pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS); + #ifdef PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS + pm_buffer_append_byte(buffer, 1); + #else + pm_buffer_append_byte(buffer, 0); + #endif } /** diff --git a/templates/include/prism/ast.h.erb b/templates/include/prism/ast.h.erb index 44e2385d7c..5306c0abe4 100644 --- a/templates/include/prism/ast.h.erb +++ b/templates/include/prism/ast.h.erb @@ -267,14 +267,4 @@ typedef enum pm_<%= flag.human %> { PRISM_EXPORTED_FUNCTION pm_<%= node.human %>_t * pm_<%= node.human %>_new(pm_arena_t *arena, uint32_t node_id, pm_node_flags_t flags, pm_location_t location<%= params.empty? ? "" : ", #{params.join(", ")}" %>); <%- end -%> -/** -* When we're serializing to Java, we want to skip serializing the location -* fields as they won't be used by JRuby or TruffleRuby. This boolean allows us -* to specify that through the environment. It will never be true except for in -* those build systems. -*/ -#ifndef PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS -#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS 0 -#endif - #endif diff --git a/templates/src/serialize.c.erb b/templates/src/serialize.c.erb index a6a7bd91aa..2e2fa65251 100644 --- a/templates/src/serialize.c.erb +++ b/templates/src/serialize.c.erb @@ -94,9 +94,9 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { <%- if node.flags -%> pm_buffer_append_varuint(buffer, (uint32_t) node->flags); <%- else -%> - if (!PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS) { + #ifndef PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS pm_buffer_append_varuint(buffer, (uint32_t) node->flags); - } + #endif <%- end -%> <%- node.fields.each do |field| -%> <%- case field -%> @@ -126,15 +126,15 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { } <%- when Prism::Template::LocationField -%> <%- unless field.semantic_field? -%> - if (!PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS) { + #ifndef PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS <%- end -%> pm_serialize_location(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer); <%- unless field.semantic_field? -%> - } + #endif <%- end -%> <%- when Prism::Template::OptionalLocationField -%> <%- unless field.semantic_field? -%> - if (!PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS) { + #ifndef PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS <%- end -%> if (((pm_<%= node.human %>_t *)node)-><%= field.name %>.length == 0) { pm_buffer_append_byte(buffer, 0); @@ -143,7 +143,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) { pm_serialize_location(&((pm_<%= node.human %>_t *)node)-><%= field.name %>, buffer); } <%- unless field.semantic_field? -%> - } + #endif <%- end -%> <%- when Prism::Template::UInt8Field -%> pm_buffer_append_byte(buffer, ((pm_<%= node.human %>_t *)node)-><%= field.name %>); @@ -273,9 +273,9 @@ pm_serialize_metadata(pm_parser_t *parser, pm_buffer_t *buffer) { pm_serialize_encoding(parser->encoding, buffer); pm_buffer_append_varsint(buffer, parser->start_line); pm_serialize_line_offset_list(&parser->line_offsets, buffer); - if (!PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS) { + #ifndef PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS pm_serialize_comment_list(&parser->comment_list, buffer); - } + #endif pm_serialize_magic_comment_list(&parser->magic_comment_list, buffer); pm_serialize_data_loc(parser, buffer); pm_serialize_diagnostic_list(&parser->error_list, buffer); From d4cb14710ad73351d8e1b79949f451f8ef9fc8f1 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Mon, 6 Apr 2026 23:06:43 -0500 Subject: [PATCH 06/16] Freeze NativeCommon for Ractor Only used while testing the FFI backend on CRuby. --- lib/prism/ffi/native_ffi.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/prism/ffi/native_ffi.rb b/lib/prism/ffi/native_ffi.rb index 600e4236ec..b0ab9cb0e1 100644 --- a/lib/prism/ffi/native_ffi.rb +++ b/lib/prism/ffi/native_ffi.rb @@ -320,6 +320,6 @@ def string_query_local(string) end end - FFICommon = NativeCommon.new + FFICommon = NativeCommon.new.freeze private_constant(:FFICommon) end From ea9c68d0339f09e30d24459799bbbcbe831ec61f Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Mon, 6 Apr 2026 23:26:25 -0500 Subject: [PATCH 07/16] Some cleanup of FFI files --- lib/prism/ffi.rb | 7 +++++++ lib/prism/ffi/native_ffi.rb | 4 +--- lib/prism/ffi/wasm_ffi.rb | 4 +--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index fb5ca055b7..93efcd0836 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -1,7 +1,14 @@ # frozen_string_literal: true # :markup: markdown +# -- # typed: ignore +# This file is responsible for mirroring the API provided by the C extension. There +# are two backends: +# +# * Native FFI based on the 'ffi' gem +# * WASM compiled to JVM bytecode (JRuby only) + require_relative "ffi/common" begin diff --git a/lib/prism/ffi/native_ffi.rb b/lib/prism/ffi/native_ffi.rb index b0ab9cb0e1..3651182797 100644 --- a/lib/prism/ffi/native_ffi.rb +++ b/lib/prism/ffi/native_ffi.rb @@ -1,10 +1,8 @@ # frozen_string_literal: true # :markup: markdown +# -- # typed: ignore -# This file is responsible for mirroring the API provided by the C extension by -# using FFI to call into the shared library. - require "rbconfig" require "ffi" diff --git a/lib/prism/ffi/wasm_ffi.rb b/lib/prism/ffi/wasm_ffi.rb index 37ad421474..0b9dedd58c 100644 --- a/lib/prism/ffi/wasm_ffi.rb +++ b/lib/prism/ffi/wasm_ffi.rb @@ -1,10 +1,8 @@ # frozen_string_literal: true # :markup: markdown +# -- # typed: ignore -# This file is responsible for mirroring the API provided by the C extension by -# using FFI to call into the shared library. - require "rbconfig" require "ffi" From 08a1cb46bdc3b7a397cb0a56f90a735b23ba52b6 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Mon, 6 Apr 2026 23:31:27 -0500 Subject: [PATCH 08/16] Add new files to gemspec --- prism.gemspec | 3 +++ 1 file changed, 3 insertions(+) diff --git a/prism.gemspec b/prism.gemspec index aac056b3f8..449b9c052c 100644 --- a/prism.gemspec +++ b/prism.gemspec @@ -113,6 +113,9 @@ Gem::Specification.new do |spec| "lib/prism/dot_visitor.rb", "lib/prism/dsl.rb", "lib/prism/ffi.rb", + "lib/prism/ffi/common.rb", + "lib/prism/ffi/native_ffi.rb", + "lib/prism/ffi/wasm_ffi.rb", "lib/prism/inspect_visitor.rb", "lib/prism/lex_compat.rb", "lib/prism/mutation_compiler.rb", From 2ee968f90c2dc7052eb4b3188dad481e1f1e21a8 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Mon, 6 Apr 2026 23:46:41 -0500 Subject: [PATCH 09/16] Add nodoc for FFI backend internals --- lib/prism/ffi/common.rb | 20 +++++++++--------- lib/prism/ffi/native_ffi.rb | 20 +++++++++--------- lib/prism/ffi/wasm_ffi.rb | 41 +++++++++---------------------------- 3 files changed, 30 insertions(+), 51 deletions(-) diff --git a/lib/prism/ffi/common.rb b/lib/prism/ffi/common.rb index 2600273318..440b1826da 100644 --- a/lib/prism/ffi/common.rb +++ b/lib/prism/ffi/common.rb @@ -4,7 +4,7 @@ module Prism - class Common + class Common # :nodoc: def dump(string, options) # :nodoc: with_buffer do |buffer| parse_only(buffer, string, options) @@ -178,27 +178,27 @@ def dump_options(options) # Required APIs below - def with_buffer(&b) + def with_buffer(&b) # :nodoc: raise NotImplementedError end - def with_string(string, &b) + def with_string(string, &b) # :nodoc: raise NotImplementedError end - def with_file(string, &b) + def with_file(string, &b) # :nodoc: raise NotImplementedError end - def lex_only(buffer, string, options) + def lex_only(buffer, string, options) # :nodoc: raise NotImplementedError end - def parse_only(buffer, string, options) + def parse_only(buffer, string, options) # :nodoc: raise NotImplementedError end - def parse_stream(buffer, callback, eof_callback, options, source) + def parse_stream(buffer, callback, eof_callback, options, source) # :nodoc: raise NotImplementedError end @@ -214,15 +214,15 @@ def parse_file_success(string, options) # :nodoc: raise NotImplementedError end - def string_query_method_name(string) + def string_query_method_name(string) # :nodoc: raise NotImplementedError end - def string_query_constant(string) + def string_query_constant(string) # :nodoc: raise NotImplementedError end - def string_query_local(string) + def string_query_local(string) # :nodoc: raise NotImplementedError end end diff --git a/lib/prism/ffi/native_ffi.rb b/lib/prism/ffi/native_ffi.rb index 3651182797..c4fbbcb660 100644 --- a/lib/prism/ffi/native_ffi.rb +++ b/lib/prism/ffi/native_ffi.rb @@ -250,34 +250,34 @@ def self.with_file(filepath) # the prism module. private_constant :LibRubyParser - class NativeCommon < Common + class NativeCommon < Common # :nodoc: # The version constant is set by reading the result of calling pm_version. def version LibRubyParser.pm_version.read_string.freeze end - def with_buffer(&b) + def with_buffer(&b) # :nodoc: LibRubyParser::NativeBuffer.with(&b) end - def with_string(string, &b) + def with_string(string, &b) # :nodoc: LibRubyParser::NativeSource.with_string(string, &b) end - def with_file(string, &b) + def with_file(string, &b) # :nodoc: LibRubyParser::NativeSource.with_file(string, &b) end - def lex_only(buffer, string, options) + def lex_only(buffer, string, options) # :nodoc: LibRubyParser.pm_serialize_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) end - def parse_only(buffer, string, options) + def parse_only(buffer, string, options) # :nodoc: LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options)) end - def parse_stream(buffer, callback, eof_callback, options, source) + def parse_stream(buffer, callback, eof_callback, options, source) # :nodoc: pm_source = LibRubyParser.pm_source_stream_new(nil, callback, eof_callback) begin LibRubyParser.pm_serialize_parse_stream(buffer.pointer, pm_source, dump_options(options)) @@ -305,15 +305,15 @@ def parse_file_success(string, options) # :nodoc: LibRubyParser.pm_serialize_parse_success_p(string.pointer, string.length, dump_options(options)) end - def string_query_method_name(string) + def string_query_method_name(string) # :nodoc: LibRubyParser.pm_string_query_method_name(string, string.bytesize, string.encoding.name) end - def string_query_constant(string) + def string_query_constant(string) # :nodoc: LibRubyParser.pm_string_query_constant(string, string.bytesize, string.encoding.name) end - def string_query_local(string) + def string_query_local(string) # :nodoc: LibRubyParser.pm_string_query_local(string, string.bytesize, string.encoding.name) end end diff --git a/lib/prism/ffi/wasm_ffi.rb b/lib/prism/ffi/wasm_ffi.rb index 0b9dedd58c..c6e4e07293 100644 --- a/lib/prism/ffi/wasm_ffi.rb +++ b/lib/prism/ffi/wasm_ffi.rb @@ -19,7 +19,7 @@ require_jar('com.dylibso.chicory', 'log', '1.6.1') module Prism # :nodoc: - class WASMCommon < Common + class WASMCommon < Common # :nodoc: java_import org.ruby_lang.prism.wasm.Prism # TODO: concurrency @@ -30,48 +30,27 @@ def version WASM::PRISM.version end - # Prototype WASM code - # def dump(source, **options) - # parsed = WASM::PRISM.parse(source.to_java_bytes, dump_options(options).to_java_bytes) - # end - # - # # Mirror the Prism.dump_file API by using the serialization API. - # def dump_file(filepath, **options) - # dump_file(File.read(filepath), filepath: filepath, **options) - # end - # - # # Mirror the Prism.lex API by using the serialization API. - # def lex(source, **options) - # lexed = WASM::PRISM.lex(source.to_java_bytes, dump_options(options).to_java_bytes) - # Serialize.load_lex(source, lexed, options.fetch(:freeze, false)) - # end - # - # # Mirror the Prism.lex_file API by using the serialization API. - # def lex_file(filepath, **options) - # lex_file(File.read(filepath), filepath: filepath, **options) - # end - - def with_buffer(&b) + def with_buffer(&b) # :nodoc: raise NotImplementedError end - def with_string(string, &b) + def with_string(string, &b) # :nodoc: raise NotImplementedError end - def with_file(string, &b) + def with_file(string, &b) # :nodoc: raise NotImplementedError end - def lex_only(buffer, string, options) + def lex_only(buffer, string, options) # :nodoc: raise NotImplementedError end - def parse_only(buffer, string, options) + def parse_only(buffer, string, options) # :nodoc: raise NotImplementedError end - def parse_stream(buffer, callback, eof_callback, options, source) + def parse_stream(buffer, callback, eof_callback, options, source) # :nodoc: raise NotImplementedError end @@ -87,15 +66,15 @@ def parse_file_success(string, options) # :nodoc: raise NotImplementedError end - def string_query_method_name(string) + def string_query_method_name(string) # :nodoc: raise NotImplementedError end - def string_query_constant(string) + def string_query_constant(string) # :nodoc: raise NotImplementedError end - def string_query_local(string) + def string_query_local(string) # :nodoc: raise NotImplementedError end end From 7ee41bf792d09e3a2232fe5c2bc2d22a329efcc6 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Mon, 6 Apr 2026 23:54:15 -0500 Subject: [PATCH 10/16] Ignore restructured FFI sources --- Steepfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Steepfile b/Steepfile index e6e1a8efb5..9db20dfa1e 100644 --- a/Steepfile +++ b/Steepfile @@ -15,4 +15,7 @@ target :lib do # Ignored because we do not want to overlap with the C extension. ignore "lib/prism/ffi.rb" + ignore "lib/prism/ffi/common.rb" + ignore "lib/prism/ffi/native_ffi.rb" + ignore "lib/prism/ffi/wasm_ffi.rb" end From e55bc13e9bc1ed9fc18ec27d41f7db8e4d8732c3 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 7 Apr 2026 00:36:20 -0500 Subject: [PATCH 11/16] Duplicate WASM artifact for full and start hooking up --- Makefile | 13 +- Rakefile | 1 + java/api/pom.xml | 2 +- java/pom.xml | 3 +- java/wasm-full/pom.xml | 80 +++++++++ .../org/ruby_lang/prism/wasm/full/Prism.java | 153 ++++++++++++++++++ java/wasm/pom.xml | 2 +- lib/prism/ffi/common.rb | 4 + lib/prism/ffi/wasm_ffi.rb | 29 +++- 9 files changed, 275 insertions(+), 12 deletions(-) create mode 100644 java/wasm-full/pom.xml create mode 100644 java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java diff --git a/Makefile b/Makefile index 2aaec05478..5e83dfa146 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ all: shared static shared: build/libprism.$(SOEXT) static: build/libprism.a wasm: javascript/src/prism.wasm -java-wasm: java/wasm/src/main/wasm/prism.wasm +java-wasm: java/wasm/src/main/wasm/prism.wasm java/wasm-full/src/main/wasm/prism-full.wasm build/libprism.$(SOEXT): $(SHARED_OBJECTS) $(ECHO) "linking $@ with $(CC)" @@ -57,6 +57,17 @@ java/wasm/src/main/wasm/prism.wasm: Makefile $(SOURCES) $(HEADERS) $(Q) $(WASI_SDK_PATH)/bin/clang \ $(DEBUG_FLAGS) \ -DPRISM_EXCLUDE_PRETTYPRINT -DPRISM_EXPORT_SYMBOLS -D_WASI_EMULATED_MMAN -DPRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS=1 \ + -DPRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS \ + -lwasi-emulated-mman $(CPPFLAGS) $(JAVA_WASM_CFLAGS) \ + -Wl,--export-all -Wl,--no-entry -mexec-model=reactor -lc++ -lc++abi \ + -o $@ $(SOURCES) + +java/wasm-full/src/main/wasm/prism-full.wasm: Makefile $(SOURCES) $(HEADERS) + $(ECHO) "building $@" + $(Q) $(MAKEDIRS) $(@D) + $(Q) $(WASI_SDK_PATH)/bin/clang \ + $(DEBUG_FLAGS) \ + -DPRISM_EXCLUDE_PRETTYPRINT -DPRISM_EXPORT_SYMBOLS -D_WASI_EMULATED_MMAN \ -lwasi-emulated-mman $(CPPFLAGS) $(JAVA_WASM_CFLAGS) \ -Wl,--export-all -Wl,--no-entry -mexec-model=reactor -lc++ -lc++abi \ -o $@ $(SOURCES) diff --git a/Rakefile b/Rakefile index 421a4df697..cc33b4bb1c 100644 --- a/Rakefile +++ b/Rakefile @@ -55,6 +55,7 @@ CLOBBER.concat(Prism::Template::TEMPLATES) CLOBBER.concat(["build"]) CLOBBER << "lib/prism/prism.#{RbConfig::CONFIG["DLEXT"]}" CLOBBER << "java/wasm/src/main/resources/prism.wasm" +CLOBBER << "java/wasm-full/src/main/resources/prism-full.wasm" Prism::Template::TEMPLATES.each do |filepath| desc "Generate #{filepath}" diff --git a/java/api/pom.xml b/java/api/pom.xml index 831112aad8..5dd99dfc96 100644 --- a/java/api/pom.xml +++ b/java/api/pom.xml @@ -9,7 +9,7 @@ prism-parser-api - Java Prism + Java Prism API Java API for the Prism Ruby language parser https://github.com/ruby/prism diff --git a/java/pom.xml b/java/pom.xml index 8e006ed250..b849acafd4 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -6,7 +6,7 @@ prism-parser 0.0.4-SNAPSHOT pom - Java Prism + Java Prism parent Java API for the Prism Ruby language parser https://github.com/ruby/prism @@ -51,6 +51,7 @@ api wasm + wasm-full diff --git a/java/wasm-full/pom.xml b/java/wasm-full/pom.xml new file mode 100644 index 0000000000..e70172d838 --- /dev/null +++ b/java/wasm-full/pom.xml @@ -0,0 +1,80 @@ + + + 4.0.0 + + + org.ruby-lang + prism-parser + 0.0.2-SNAPSHOT + + + prism-parser-wasm-full + Java Prism WASM with full parsed content + Java WASM bindings for the Prism parser shared library + https://github.com/ruby/prism + + + 1.7.5 + + + + + + com.dylibso.chicory + bom + ${chicory.version} + pom + import + + + + + + + com.dylibso.chicory + runtime + + + com.dylibso.chicory + log + + + com.dylibso.chicory + wasi + + + com.dylibso.chicory + wasm + + + org.junit.jupiter + junit-jupiter-engine + ${junit.version} + test + + + + + + + com.dylibso.chicory + chicory-compiler-maven-plugin + ${chicory.version} + + + prism + + compile + + + org.ruby_lang.prism.wasm.full.PrismParser + org.ruby_lang.prism.wasm.full.Prism + src/main/wasm/prism-full.wasm + + + + + + + + diff --git a/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java b/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java new file mode 100644 index 0000000000..68c9707f7a --- /dev/null +++ b/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java @@ -0,0 +1,153 @@ +package org.ruby_lang.prism.wasm.full; + +import com.dylibso.chicory.runtime.ByteArrayMemory; +import com.dylibso.chicory.runtime.ImportValues; +import com.dylibso.chicory.runtime.Instance; +import com.dylibso.chicory.wasi.WasiOptions; +import com.dylibso.chicory.wasi.WasiPreview1; + +import java.nio.charset.StandardCharsets; + +public class Prism implements AutoCloseable { + private final WasiPreview1 wasi; + protected final Prism_ModuleExports exports; + private final Instance instance; + + public Prism() { + this(WasiOptions.builder().build()); + } + + public Prism(WasiOptions wasiOpts) { + wasi = WasiPreview1.builder().withOptions(wasiOpts).build(); + instance = Instance.builder(PrismParser.load()) + .withMemoryFactory(ByteArrayMemory::new) + .withMachineFactory(PrismParser::create) + .withImportValues(ImportValues.builder().addFunction(wasi.toHostFunctions()).build()) + .build(); + exports = new Prism_ModuleExports(instance); + } + + public String version() { + int versionPointer = exports.pmVersion(); + int length = exports.strchr(versionPointer, 0); + + return new String(instance.memory().readBytes(versionPointer, length - versionPointer)); + } + + public byte[] parse(byte[] sourceBytes, byte[] packedOptions) { + try ( + Buffer buffer = new Buffer(); + Source source = new Source(sourceBytes, 0, sourceBytes.length); + Options options = new Options(packedOptions)) { + + return parse(buffer, source, options); + } + } + + public byte[] lex(byte[] sourceBytes, byte[] packedOptions) { + try ( + Buffer buffer = new Buffer(); + Source source = new Source(sourceBytes, 0, sourceBytes.length); + Options options = new Options(packedOptions)) { + + return lex(buffer, source, options); + } + } + + public byte[] parse(byte[] sourceBytes, int sourceOffset, int sourceLength, byte[] packedOptions) { + try ( + Buffer buffer = new Buffer(); + Source source = new Source(sourceBytes, sourceOffset, sourceLength); + Options options = new Options(packedOptions)) { + + return parse(buffer, source, options); + } + } + + public byte[] parse(Buffer buffer, Source source, Options options) { + exports.pmSerializeParse( + buffer.pointer, source.pointer, source.length, options.pointer); + + return buffer.read(); + } + + public byte[] lex(Buffer buffer, Source source, Options options) { + exports.pmSerializeLex( + buffer.pointer, source.pointer, source.length, options.pointer); + + return buffer.read(); + } + + public class Buffer implements AutoCloseable { + final int pointer; + + Buffer() { + pointer = exports.pmBufferNew(); + clear(); + } + + public void clear() { + exports.pmBufferClear(pointer); + } + + public void close() { + exports.pmBufferFree(pointer); + } + + public byte[] read() { + return instance.memory().readBytes( + exports.pmBufferValue(pointer), + exports.pmBufferLength(pointer)); + } + } + + public class Source implements AutoCloseable{ + final int pointer; + final int length; + + public Source(int length) { + pointer = exports.calloc(1, length); + this.length = length; + } + + public Source(byte[] bytes, int offset, int length) { + this(length + 1); + write(bytes, offset, length); + } + + public Source(byte[] bytes) { + this(bytes, 0, bytes.length); + } + + public void write(byte[] bytes, int offset, int length) { + assert length + 1 <= this.length; + instance.memory().write(pointer, bytes, offset, length); + instance.memory().writeByte(pointer + length, (byte) 0); + } + + public void close() { + exports.free(pointer); + } + } + + class Options implements AutoCloseable { + final int pointer; + + Options(byte[] packedOptions) { + int pointer = exports.calloc(1, packedOptions.length); + instance.memory().write(pointer, packedOptions); + this.pointer = pointer; + } + + public void close() { + exports.free(pointer); + } + } + + @Override + public void close() { + if (wasi != null) { + wasi.close(); + } + } +} diff --git a/java/wasm/pom.xml b/java/wasm/pom.xml index 09e1fd439b..833d5b7788 100644 --- a/java/wasm/pom.xml +++ b/java/wasm/pom.xml @@ -9,7 +9,7 @@ prism-parser-wasm - Java Prism WASM + Java Prism WASM with semantic-only content Java WASM bindings for the Prism parser shared library https://github.com/ruby/prism diff --git a/lib/prism/ffi/common.rb b/lib/prism/ffi/common.rb index 440b1826da..552810c059 100644 --- a/lib/prism/ffi/common.rb +++ b/lib/prism/ffi/common.rb @@ -178,6 +178,10 @@ def dump_options(options) # Required APIs below + def version # :nodoc: + raise NotImplementedError + end + def with_buffer(&b) # :nodoc: raise NotImplementedError end diff --git a/lib/prism/ffi/wasm_ffi.rb b/lib/prism/ffi/wasm_ffi.rb index c6e4e07293..560001fcb0 100644 --- a/lib/prism/ffi/wasm_ffi.rb +++ b/lib/prism/ffi/wasm_ffi.rb @@ -12,7 +12,7 @@ # Load the prism-parser-wasm jar require 'jar-dependencies' -require_jar('org.ruby-lang', 'prism-parser-wasm', '0.0.1-SNAPSHOT') +require_jar('org.ruby-lang', 'prism-parser-wasm-full', '0.0.2-SNAPSHOT') require_jar('com.dylibso.chicory', 'runtime', '1.6.1') require_jar('com.dylibso.chicory', 'wasi', '1.6.1') require_jar('com.dylibso.chicory', 'wasm', '1.6.1') @@ -20,22 +20,32 @@ module Prism # :nodoc: class WASMCommon < Common # :nodoc: - java_import org.ruby_lang.prism.wasm.Prism + java_import org.ruby_lang.prism.wasm.full.Prism # TODO: concurrency - PRISM = org.ruby_lang.prism.wasm.Prism.new + PRISM = org.ruby_lang.prism.wasm.full.Prism.new def version # The version constant is set by reading the result of calling pm_version. - WASM::PRISM.version + PRISM.version end def with_buffer(&b) # :nodoc: - raise NotImplementedError + buffer = Prism::Buffer.new + begin + b.call(buffer) + ensure + buffer.close + end end def with_string(string, &b) # :nodoc: - raise NotImplementedError + source = Prism::Source.new(string.to_java_bytes) + begin + b.call(source) + ensure + source.close + end end def with_file(string, &b) # :nodoc: @@ -43,11 +53,11 @@ def with_file(string, &b) # :nodoc: end def lex_only(buffer, string, options) # :nodoc: - raise NotImplementedError + String.from_java_bytes(Prism.lex(buffer, string, dump_options(options))) end def parse_only(buffer, string, options) # :nodoc: - raise NotImplementedError + String.from_java_bytes(Prism.lex(buffer, string, dump_options(options))) end def parse_stream(buffer, callback, eof_callback, options, source) # :nodoc: @@ -78,4 +88,7 @@ def string_query_local(string) # :nodoc: raise NotImplementedError end end + + FFICommon = WASMCommon.new.freeze + private_constant(:FFICommon) end From 60590f24da5c5500853250687120d28b2e6ae218 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 5 May 2026 13:40:24 -0500 Subject: [PATCH 12/16] Clean up Maven for recent updates * Clean up version properties * Update version in wasm-full --- java/api/pom.xml | 1 + java/pom.xml | 2 ++ java/wasm-full/pom.xml | 12 +++++++----- java/wasm/pom.xml | 6 +----- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/java/api/pom.xml b/java/api/pom.xml index 5dd99dfc96..e95b6ae585 100644 --- a/java/api/pom.xml +++ b/java/api/pom.xml @@ -6,6 +6,7 @@ org.ruby-lang prism-parser 0.0.4-SNAPSHOT + ../pom.xml prism-parser-api diff --git a/java/pom.xml b/java/pom.xml index b849acafd4..427c55a601 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -46,6 +46,8 @@ 21 21 6.0.3 + 1.7.5 + 0.0.4 diff --git a/java/wasm-full/pom.xml b/java/wasm-full/pom.xml index e70172d838..4ec7174497 100644 --- a/java/wasm-full/pom.xml +++ b/java/wasm-full/pom.xml @@ -5,7 +5,8 @@ org.ruby-lang prism-parser - 0.0.2-SNAPSHOT + 0.0.4-SNAPSHOT + ../pom.xml prism-parser-wasm-full @@ -13,10 +14,6 @@ Java WASM bindings for the Prism parser shared library https://github.com/ruby/prism - - 1.7.5 - - @@ -52,6 +49,11 @@ ${junit.version} test + + io.roastedroot + redline + ${redline.version} + diff --git a/java/wasm/pom.xml b/java/wasm/pom.xml index 833d5b7788..e043a38753 100644 --- a/java/wasm/pom.xml +++ b/java/wasm/pom.xml @@ -6,6 +6,7 @@ org.ruby-lang prism-parser 0.0.4-SNAPSHOT + ../pom.xml prism-parser-wasm @@ -13,11 +14,6 @@ Java WASM bindings for the Prism parser shared library https://github.com/ruby/prism - - 1.7.5 - 0.0.3 - - From 1351b2690c0ac5aeb8dfe123b2b1eacf74997973 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 5 May 2026 14:35:11 -0500 Subject: [PATCH 13/16] Wire up enough wasm gem to pass dump_test This passes dump_test 100% with the basic Chicory-wasm backend. Parsing directly from file is stubbed out to simply read the file, which is degraded functionality compared to the FFI version. --- java/.gitignore | 2 ++ .../org/ruby_lang/prism/wasm/full/Prism.java | 18 +++++++++-- lib/prism/ffi.rb | 20 ++++++++++-- lib/prism/ffi/wasm_ffi.rb | 31 +++++++++++++------ 4 files changed, 55 insertions(+), 16 deletions(-) diff --git a/java/.gitignore b/java/.gitignore index e61251f358..abef8b8898 100644 --- a/java/.gitignore +++ b/java/.gitignore @@ -3,5 +3,7 @@ api/target native/target wasm/src/main/wasm wasm/target +wasm-full/src/main/wasm +wasm-full/target target .idea diff --git a/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java b/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java index 68c9707f7a..3d117a00b0 100644 --- a/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java +++ b/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java @@ -68,14 +68,14 @@ public byte[] parse(Buffer buffer, Source source, Options options) { exports.pmSerializeParse( buffer.pointer, source.pointer, source.length, options.pointer); - return buffer.read(); + return buffer.readBytes(); } public byte[] lex(Buffer buffer, Source source, Options options) { exports.pmSerializeLex( buffer.pointer, source.pointer, source.length, options.pointer); - return buffer.read(); + return buffer.readBytes(); } public class Buffer implements AutoCloseable { @@ -94,13 +94,17 @@ public void close() { exports.pmBufferFree(pointer); } - public byte[] read() { + public byte[] readBytes() { return instance.memory().readBytes( exports.pmBufferValue(pointer), exports.pmBufferLength(pointer)); } } + public Buffer newBuffer() { + return new Buffer(); + } + public class Source implements AutoCloseable{ final int pointer; final int length; @@ -130,6 +134,10 @@ public void close() { } } + public Source newSource(byte[] bytes) { + return new Source(bytes); + } + class Options implements AutoCloseable { final int pointer; @@ -144,6 +152,10 @@ public void close() { } } + public Options newOptions(byte[] packedOptions) { + return new Options(packedOptions); + } + @Override public void close() { if (wasi != null) { diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index 93efcd0836..a3382ef37f 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -35,7 +35,11 @@ def dump(source, **options) # Mirror the Prism.dump_file API by using the serialization API. def dump_file(filepath, **options) options[:filepath] = filepath - FFICommon.with_file(filepath) { |string| FFICommon.dump(string, options) } + FFICommon.with_file(filepath) do |file| + FFICommon.with_string(file.read) do |string| + FFICommon.dump(string, options) + end + end end # Mirror the Prism.lex API by using the serialization API. @@ -46,7 +50,12 @@ def lex(code, **options) # Mirror the Prism.lex_file API by using the serialization API. def lex_file(filepath, **options) options[:filepath] = filepath - FFICommon.with_file(filepath) { |string| FFICommon.lex(string, string.read, options) } + FFICommon.with_file(filepath) do |file| + code = file.read + FFICommon.with_string(code) do |string| + FFICommon.lex(string, code, options) + end + end end # Mirror the Prism.parse API by using the serialization API. @@ -59,7 +68,12 @@ def parse(code, **options) # when it is available. def parse_file(filepath, **options) options[:filepath] = filepath - FFICommon.with_file(filepath) { |string| FFICommon.parse(string, string.read, options) } + FFICommon.with_file(filepath) do |file| + code = file.read + FFICommon.with_string(code) do |string| + FFICommon.parse(string, code, options) + end + end end # Mirror the Prism.parse_stream API by using the serialization API. diff --git a/lib/prism/ffi/wasm_ffi.rb b/lib/prism/ffi/wasm_ffi.rb index 560001fcb0..d519646137 100644 --- a/lib/prism/ffi/wasm_ffi.rb +++ b/lib/prism/ffi/wasm_ffi.rb @@ -12,11 +12,14 @@ # Load the prism-parser-wasm jar require 'jar-dependencies' -require_jar('org.ruby-lang', 'prism-parser-wasm-full', '0.0.2-SNAPSHOT') -require_jar('com.dylibso.chicory', 'runtime', '1.6.1') -require_jar('com.dylibso.chicory', 'wasi', '1.6.1') -require_jar('com.dylibso.chicory', 'wasm', '1.6.1') -require_jar('com.dylibso.chicory', 'log', '1.6.1') +require_jar('org.ruby-lang', 'prism-parser-wasm-full', '0.0.4-SNAPSHOT') +chicory_version = '1.7.5' +redline_version = '0.0.4' +require_jar('com.dylibso.chicory', 'runtime', chicory_version) +require_jar('com.dylibso.chicory', 'wasi', chicory_version) +require_jar('com.dylibso.chicory', 'wasm', chicory_version) +require_jar('com.dylibso.chicory', 'log', chicory_version) +require_jar('io.roastedroot', 'redline', redline_version) module Prism # :nodoc: class WASMCommon < Common # :nodoc: @@ -31,7 +34,7 @@ def version end def with_buffer(&b) # :nodoc: - buffer = Prism::Buffer.new + buffer = PRISM.new_buffer begin b.call(buffer) ensure @@ -39,8 +42,14 @@ def with_buffer(&b) # :nodoc: end end + class Java::org.ruby_lang.prism.wasm.full.Prism::Buffer + def read + String.from_java_bytes(read_bytes) + end + end + def with_string(string, &b) # :nodoc: - source = Prism::Source.new(string.to_java_bytes) + source = PRISM.new_source(string.to_java_bytes) begin b.call(source) ensure @@ -49,15 +58,17 @@ def with_string(string, &b) # :nodoc: end def with_file(string, &b) # :nodoc: - raise NotImplementedError + File.open(string, "rb") do |file| + b.call(file) + end end def lex_only(buffer, string, options) # :nodoc: - String.from_java_bytes(Prism.lex(buffer, string, dump_options(options))) + String.from_java_bytes(PRISM.lex(buffer, string, PRISM.new_options(dump_options(options).to_java_bytes))) end def parse_only(buffer, string, options) # :nodoc: - String.from_java_bytes(Prism.lex(buffer, string, dump_options(options))) + String.from_java_bytes(PRISM.parse(buffer, string, PRISM.new_options(dump_options(options).to_java_bytes))) end def parse_stream(buffer, callback, eof_callback, options, source) # :nodoc: From bc857178be95944952d8166b4be9dbc94baf5db2 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 5 May 2026 14:55:16 -0500 Subject: [PATCH 14/16] Wire up parse_lex and parse_lex_file for wasm This also degrades mmap'ed file support but I will figure out a good abstraction to restore that later. --- .../java/org/ruby_lang/prism/wasm/full/Prism.java | 6 ++++++ lib/prism/ffi.rb | 7 ++++++- lib/prism/ffi/common.rb | 13 ++++++++++--- lib/prism/ffi/native_ffi.rb | 7 ++----- lib/prism/ffi/wasm_ffi.rb | 4 ++-- 5 files changed, 26 insertions(+), 11 deletions(-) diff --git a/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java b/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java index 3d117a00b0..e4911de0e7 100644 --- a/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java +++ b/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java @@ -78,6 +78,12 @@ public byte[] lex(Buffer buffer, Source source, Options options) { return buffer.readBytes(); } + public byte[] parseLex(Buffer buffer, Source source, Options options) { + exports.pmSerializeParseLex(buffer.pointer, source.pointer, source.length, options.pointer); + + return buffer.readBytes(); + } + public class Buffer implements AutoCloseable { final int pointer; diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index a3382ef37f..c8145ff4bc 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -116,7 +116,12 @@ def parse_lex(code, **options) # Mirror the Prism.parse_lex_file API by using the serialization API. def parse_lex_file(filepath, **options) options[:filepath] = filepath - FFICommon.with_file(filepath) { |string| FFICommon.parse_lex(string, string.read, options) } + FFICommon.with_file(filepath) do |file| + code = file.read + FFICommon.with_string(code) do |string| + FFICommon.parse_lex(string, code, options) + end + end end # Mirror the Prism.parse_success? API by using the serialization API. diff --git a/lib/prism/ffi/common.rb b/lib/prism/ffi/common.rb index 552810c059..92df7df5da 100644 --- a/lib/prism/ffi/common.rb +++ b/lib/prism/ffi/common.rb @@ -28,6 +28,13 @@ def lex(string, code, options) # :nodoc: end end + def parse_lex(string, code, options) # :nodoc: + with_buffer do |buffer| + parse_lex_only(buffer, string, options) + Serialize.load_parse_lex(code, buffer.read, options.fetch(:freeze, false)) + end + end + # Return the value that should be dumped for the command_line option. def dump_options_command_line(options) command_line = options.fetch(:command_line, "") @@ -202,15 +209,15 @@ def parse_only(buffer, string, options) # :nodoc: raise NotImplementedError end - def parse_stream(buffer, callback, eof_callback, options, source) # :nodoc: + def parse_lex_only(buffer, string, options) # :nodoc: raise NotImplementedError end - def parse_comments(string, code, options) # :nodoc: + def parse_stream(buffer, callback, eof_callback, options, source) # :nodoc: raise NotImplementedError end - def parse_lex(string, code, options) # :nodoc: + def parse_comments(string, code, options) # :nodoc: raise NotImplementedError end diff --git a/lib/prism/ffi/native_ffi.rb b/lib/prism/ffi/native_ffi.rb index c4fbbcb660..10f6ac13a1 100644 --- a/lib/prism/ffi/native_ffi.rb +++ b/lib/prism/ffi/native_ffi.rb @@ -294,11 +294,8 @@ def parse_comments(string, code, options) # :nodoc: end end - def parse_lex(string, code, options) # :nodoc: - with_buffer do |buffer| - LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) - Serialize.load_parse_lex(code, buffer.read, options.fetch(:freeze, false)) - end + def parse_lex_only(buffer, string, options) # :nodoc: + LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) end def parse_file_success(string, options) # :nodoc: diff --git a/lib/prism/ffi/wasm_ffi.rb b/lib/prism/ffi/wasm_ffi.rb index d519646137..efe94c3d22 100644 --- a/lib/prism/ffi/wasm_ffi.rb +++ b/lib/prism/ffi/wasm_ffi.rb @@ -79,8 +79,8 @@ def parse_comments(string, code, options) # :nodoc: raise NotImplementedError end - def parse_lex(string, code, options) # :nodoc: - raise NotImplementedError + def parse_lex_only(buffer, string, options) # :nodoc: + String.from_java_bytes(PRISM.parse_lex(buffer, string, PRISM.new_options(dump_options(options).to_java_bytes))) end def parse_file_success(string, options) # :nodoc: From 8653221a4ecb05323bac36012ce8b5d407919ac1 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 5 May 2026 15:03:59 -0500 Subject: [PATCH 15/16] Wire up parse_success for WASM --- .../main/java/org/ruby_lang/prism/wasm/full/Prism.java | 4 ++++ lib/prism/ffi.rb | 9 +++++++-- lib/prism/ffi/common.rb | 2 +- lib/prism/ffi/native_ffi.rb | 2 +- lib/prism/ffi/wasm_ffi.rb | 4 ++-- 5 files changed, 15 insertions(+), 6 deletions(-) diff --git a/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java b/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java index e4911de0e7..fcf4da1743 100644 --- a/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java +++ b/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java @@ -84,6 +84,10 @@ public byte[] parseLex(Buffer buffer, Source source, Options options) { return buffer.readBytes(); } + public boolean parseSuccess(Source source, Options options) { + return exports.pmSerializeParseSuccessP(source.pointer, source.length, options.pointer) != 0; + } + public class Buffer implements AutoCloseable { final int pointer; diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index c8145ff4bc..d102610974 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -126,7 +126,7 @@ def parse_lex_file(filepath, **options) # Mirror the Prism.parse_success? API by using the serialization API. def parse_success?(code, **options) - FFICommon.with_string(code) { |string| FFICommon.parse_file_success(string, options) } + FFICommon.with_string(code) { |string| FFICommon.parse_success(string, options) } end # Mirror the Prism.parse_failure? API by using the serialization API. @@ -137,7 +137,12 @@ def parse_failure?(code, **options) # Mirror the Prism.parse_file_success? API by using the serialization API. def parse_file_success?(filepath, **options) options[:filepath] = filepath - FFICommon.with_file(filepath) { |string| FFICommon.parse_file_success(string, options) } + FFICommon.with_file(filepath) do |file| + code = file.read + FFICommon.with_string(code) do |string| + FFICommon.parse_success(string, options) + end + end end # Mirror the Prism.parse_file_failure? API by using the serialization API. diff --git a/lib/prism/ffi/common.rb b/lib/prism/ffi/common.rb index 92df7df5da..650bdec289 100644 --- a/lib/prism/ffi/common.rb +++ b/lib/prism/ffi/common.rb @@ -221,7 +221,7 @@ def parse_comments(string, code, options) # :nodoc: raise NotImplementedError end - def parse_file_success(string, options) # :nodoc: + def parse_success(string, options) # :nodoc: raise NotImplementedError end diff --git a/lib/prism/ffi/native_ffi.rb b/lib/prism/ffi/native_ffi.rb index 10f6ac13a1..a529fe66de 100644 --- a/lib/prism/ffi/native_ffi.rb +++ b/lib/prism/ffi/native_ffi.rb @@ -298,7 +298,7 @@ def parse_lex_only(buffer, string, options) # :nodoc: LibRubyParser.pm_serialize_parse_lex(buffer.pointer, string.pointer, string.length, dump_options(options)) end - def parse_file_success(string, options) # :nodoc: + def parse_success(string, options) # :nodoc: LibRubyParser.pm_serialize_parse_success_p(string.pointer, string.length, dump_options(options)) end diff --git a/lib/prism/ffi/wasm_ffi.rb b/lib/prism/ffi/wasm_ffi.rb index efe94c3d22..e7e75abb2c 100644 --- a/lib/prism/ffi/wasm_ffi.rb +++ b/lib/prism/ffi/wasm_ffi.rb @@ -83,8 +83,8 @@ def parse_lex_only(buffer, string, options) # :nodoc: String.from_java_bytes(PRISM.parse_lex(buffer, string, PRISM.new_options(dump_options(options).to_java_bytes))) end - def parse_file_success(string, options) # :nodoc: - raise NotImplementedError + def parse_success(string, options) # :nodoc: + PRISM.parse_success(string, PRISM.new_options(dump_options(options).to_java_bytes)) end def string_query_method_name(string) # :nodoc: From 2232d5cbd77f4cc6898846405d7993d793ba1289 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 5 May 2026 15:15:25 -0500 Subject: [PATCH 16/16] Wire up parse_comments --- .../main/java/org/ruby_lang/prism/wasm/full/Prism.java | 4 ++++ lib/prism/ffi.rb | 7 ++++++- lib/prism/ffi/common.rb | 9 ++++++++- lib/prism/ffi/native_ffi.rb | 7 ++----- lib/prism/ffi/wasm_ffi.rb | 4 ++-- 5 files changed, 22 insertions(+), 9 deletions(-) diff --git a/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java b/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java index fcf4da1743..6999428a35 100644 --- a/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java +++ b/java/wasm-full/src/main/java/org/ruby_lang/prism/wasm/full/Prism.java @@ -88,6 +88,10 @@ public boolean parseSuccess(Source source, Options options) { return exports.pmSerializeParseSuccessP(source.pointer, source.length, options.pointer) != 0; } + public void parseComments(Buffer buffer, Source source, Options options) { + exports.pmSerializeParseComments(buffer.pointer, source.pointer, source.length, options.pointer); + } + public class Buffer implements AutoCloseable { final int pointer; diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb index d102610974..cda79da9fd 100644 --- a/lib/prism/ffi.rb +++ b/lib/prism/ffi.rb @@ -105,7 +105,12 @@ def parse_comments(code, **options) # to use mmap when it is available. def parse_file_comments(filepath, **options) options[:filepath] = filepath - FFICommon.with_file(filepath) { |string| FFICommon.parse_comments(string, string.read, options) } + FFICommon.with_file(filepath) do |file| + code = file.read + FFICommon.with_string(code) do |string| + FFICommon.parse_comments(string, code, options) + end + end end # Mirror the Prism.parse_lex API by using the serialization API. diff --git a/lib/prism/ffi/common.rb b/lib/prism/ffi/common.rb index 650bdec289..a927cac829 100644 --- a/lib/prism/ffi/common.rb +++ b/lib/prism/ffi/common.rb @@ -35,6 +35,13 @@ def parse_lex(string, code, options) # :nodoc: end end + def parse_comments(string, code, options) # :nodoc: + with_buffer do |buffer| + parse_comments_only(buffer, string, options) + Serialize.load_parse_comments(code, buffer.read, options.fetch(:freeze, false)) + end + end + # Return the value that should be dumped for the command_line option. def dump_options_command_line(options) command_line = options.fetch(:command_line, "") @@ -217,7 +224,7 @@ def parse_stream(buffer, callback, eof_callback, options, source) # :nodoc: raise NotImplementedError end - def parse_comments(string, code, options) # :nodoc: + def parse_comments_only(string, code, options) # :nodoc: raise NotImplementedError end diff --git a/lib/prism/ffi/native_ffi.rb b/lib/prism/ffi/native_ffi.rb index a529fe66de..345075f21e 100644 --- a/lib/prism/ffi/native_ffi.rb +++ b/lib/prism/ffi/native_ffi.rb @@ -287,11 +287,8 @@ def parse_stream(buffer, callback, eof_callback, options, source) # :nodoc: end end - def parse_comments(string, code, options) # :nodoc: - with_buffer do |buffer| - LibRubyParser.pm_serialize_parse_comments(buffer.pointer, string.pointer, string.length, dump_options(options)) - Serialize.load_parse_comments(code, buffer.read, options.fetch(:freeze, false)) - end + def parse_comments_only(buffer, string, options) # :nodoc: + LibRubyParser.pm_serialize_parse_comments(buffer.pointer, string.pointer, string.length, dump_options(options)) end def parse_lex_only(buffer, string, options) # :nodoc: diff --git a/lib/prism/ffi/wasm_ffi.rb b/lib/prism/ffi/wasm_ffi.rb index e7e75abb2c..df9b36cc81 100644 --- a/lib/prism/ffi/wasm_ffi.rb +++ b/lib/prism/ffi/wasm_ffi.rb @@ -75,8 +75,8 @@ def parse_stream(buffer, callback, eof_callback, options, source) # :nodoc: raise NotImplementedError end - def parse_comments(string, code, options) # :nodoc: - raise NotImplementedError + def parse_comments_only(buffer, string, options) # :nodoc: + PRISM.parse_comments(buffer, string, PRISM.new_options(dump_options(options).to_java_bytes)) end def parse_lex_only(buffer, string, options) # :nodoc: