Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .rspec
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
--format documentation
--color
--require spec_helper
12 changes: 12 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# frozen_string_literal: true

source "https://rubygems.org"

# Specify your gem's dependencies in code-challenge.gemspec
gemspec

gem "irb"
gem "rake", "~> 13.0"
gem "nokogiri", "~> 1.19"

gem "rspec", "~> 3.0"
84 changes: 84 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
PATH
remote: .
specs:
code-challenge (0.1.0)
nokogiri (~> 1.19)

GEM
remote: https://rubygems.org/
specs:
date (3.5.1)
diff-lcs (1.6.2)
erb (6.0.4)
io-console (0.8.2)
irb (1.18.0)
pp (>= 0.6.0)
prism (>= 1.3.0)
rdoc (>= 4.0.0)
reline (>= 0.4.2)
nokogiri (1.19.3-aarch64-linux-gnu)
racc (~> 1.4)
nokogiri (1.19.3-aarch64-linux-musl)
racc (~> 1.4)
nokogiri (1.19.3-arm-linux-gnu)
racc (~> 1.4)
nokogiri (1.19.3-arm-linux-musl)
racc (~> 1.4)
nokogiri (1.19.3-arm64-darwin)
racc (~> 1.4)
nokogiri (1.19.3-x86_64-darwin)
racc (~> 1.4)
nokogiri (1.19.3-x86_64-linux-gnu)
racc (~> 1.4)
nokogiri (1.19.3-x86_64-linux-musl)
racc (~> 1.4)
pp (0.6.3)
prettyprint
prettyprint (0.2.0)
prism (1.9.0)
psych (5.4.0)
date
stringio
racc (1.8.1)
rake (13.4.2)
rdoc (7.2.0)
erb
psych (>= 4.0.0)
tsort
reline (0.6.3)
io-console (~> 0.5)
rspec (3.13.2)
rspec-core (~> 3.13.0)
rspec-expectations (~> 3.13.0)
rspec-mocks (~> 3.13.0)
rspec-core (3.13.6)
rspec-support (~> 3.13.0)
rspec-expectations (3.13.5)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.13.0)
rspec-mocks (3.13.8)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.13.0)
rspec-support (3.13.7)
stringio (3.2.0)
tsort (0.2.0)

PLATFORMS
aarch64-linux-gnu
aarch64-linux-musl
arm-linux-gnu
arm-linux-musl
arm64-darwin
x86_64-darwin
x86_64-linux-gnu
x86_64-linux-musl

DEPENDENCIES
code-challenge!
irb
nokogiri (~> 1.19)
rake (~> 13.0)
rspec (~> 3.0)

BUNDLED WITH
2.6.9
21 changes: 21 additions & 0 deletions LICENSE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
The MIT License (MIT)

Copyright (c) 2026 Keith Yeung

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
8 changes: 8 additions & 0 deletions Rakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# frozen_string_literal: true

require "bundler/gem_tasks"
require "rspec/core/rake_task"

RSpec::Core::RakeTask.new(:spec)

task default: :spec
11 changes: 11 additions & 0 deletions bin/console
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

require "bundler/setup"
require "code/challenge"

# You can add fixtures and/or initialization code here to make experimenting
# with your gem easier. You can also use a different console, if you like.

require "irb"
IRB.start(__FILE__)
8 changes: 8 additions & 0 deletions bin/setup
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env bash
set -euo pipefail
IFS=$'\n\t'
set -vx

bundle install

# Do any other automated setup that you need to do here
40 changes: 40 additions & 0 deletions code-challenge.gemspec
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# frozen_string_literal: true

require_relative "lib/code/challenge/version"

Gem::Specification.new do |spec|
spec.name = "code-challenge"
spec.version = Code::Challenge::VERSION
spec.authors = ["Keith Yeung"]
spec.email = ["kungfukeith11@gmail.com"]

spec.summary = "TODO: Write a short summary, because RubyGems requires one."
spec.description = "TODO: Write a longer description or delete this line."
spec.homepage = "https://github.com/KiChjang/code-challenge"
spec.license = "MIT"
spec.required_ruby_version = ">= 3.1.0"

spec.metadata["allowed_push_host"] = "TODO: Set to your gem server 'https://example.com'"

spec.metadata["homepage_uri"] = spec.homepage
spec.metadata["source_code_uri"] = spec.homepage
spec.metadata["changelog_uri"] = "TODO: Put your gem's CHANGELOG.md URL here."

# Specify which files should be added to the gem when it is released.
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
gemspec = File.basename(__FILE__)
spec.files = IO.popen(%w[git ls-files -z], chdir: __dir__, err: IO::NULL) do |ls|
ls.readlines("\x0", chomp: true).reject do |f|
(f == gemspec) ||
f.start_with?(*%w[bin/ test/ spec/ features/ .git appveyor Gemfile])
end
end
spec.bindir = "exe"
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
spec.require_paths = ["lib"]

spec.add_dependency "nokogiri", "~> 1.19"

# For more information and examples about making a new gem, check out our
# guide at: https://bundler.io/guides/creating_gem.html
end
12 changes: 12 additions & 0 deletions lib/code/challenge.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
require_relative "challenge/version"
require_relative "challenge/webpage_parser"

module Code
module Challenge
class Error < StandardError; end

def self.parse_webpage(html)
WebpageParser.parse(html)
end
end
end
158 changes: 158 additions & 0 deletions lib/code/challenge/deferred_image_extractor.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
require "set"

module Code
module Challenge
# Extracts deferred image mappings from inline scripts that invoke
# _setImagesSrc(...). Returns a hash of img element id => data URI.
class DeferredImageExtractor
DATA_URI_PATTERN = /['"](data:image[^'"]+)['"]/.freeze
IDENTIFIER_PATTERN = /[A-Za-z_$][A-Za-z0-9_$]*/.freeze

def self.extract(document)
new(document).extract
end

def initialize(document)
@document = document
@known_image_ids = @document.css("img[id]").map { |img| img["id"] }.to_set
end

def extract
images = {}
@document.css("script").each do |script_node|
script = script_node.text
next unless script.include?("data:image") && script.include?("_setImagesSrc")

deferred_pairs(script).each do |element_id, data_uri|
images[element_id] = unescape_js_string(data_uri)
end
end
images
end

private

# Parse each _setImagesSrc(...) call independently by behavior rather than
# fixed variable names or argument order. For each call, resolve IDs and
# data URI from direct literals first, then from variable assignments
# found in the enclosing scope before the call site.
def deferred_pairs(script)
results = []
pattern = /_setImagesSrc\s*\(([^)]*)\)/
cursor = 0

while (m = pattern.match(script, cursor))
call_pos = m.begin(0)
args = m[1].split(",").map(&:strip)
ids = ids_from_args(script, call_pos, args)

if ids.any?
data_uri = data_uri_from_args(script, call_pos, args)
if data_uri.nil? || data_uri.empty?
raise StructuralMismatchException, "Unable to resolve deferred data URI for ids: #{ids.join(', ')}"
end

results.concat(ids.map { |id| [id, data_uri] })
end

cursor = m.end(0)
end

results
end

def data_uri_from_args(script, call_pos, args)
args.each do |arg|
# Simple case when the argument is a data URI.
return arg[DATA_URI_PATTERN, 1] if arg.match?(DATA_URI_PATTERN)

# Otherwise, the argument may be a variable name; do a reverse scan
# of the script from call_pos to find the variable assignment.
scope_start = find_enclosing_scope_start(script, call_pos)
window = script[scope_start...call_pos]
uri_by_var = data_uri_assignments(window)
return uri_by_var[arg] if uri_by_var.key?(arg)
end
nil
end

def data_uri_assignments(window)
mappings = {}
window.scan(/(?:(?:var|let|const)\s+)?(#{IDENTIFIER_PATTERN})\s*=\s*['"](data:image[^'"]+)['"]/) do |var_name, data_uri|
mappings[var_name] = data_uri
end
mappings
end

def ids_from_args(script, call_pos, args)
args.each do |arg|
# The argument may be an array literal; scan it for known image IDs
ids = known_ids_in(arg)
return ids unless ids.empty?

# Otherwise, the argument may be a variable name; do a reverse scan
# of the script from call_pos to find the variable assignment.
scope_start = find_enclosing_scope_start(script, call_pos)
window = script[scope_start...call_pos]
ids_by_var = id_array_assignments(window)
return ids_by_var[arg] if ids_by_var.key?(arg)
end
[]
end

def id_array_assignments(window)
mappings = {}
window.scan(/(?:(?:var|let|const)\s+)?(#{IDENTIFIER_PATTERN})\s*=\s*(\[[^\]]*\])/) do |var_name, array_literal|
ids = known_ids_in(array_literal)
mappings[var_name] = ids unless ids.empty?
end
mappings
end

def find_enclosing_scope_start(script, call_pos)
pos = [[call_pos.to_i - 1, 0].max, script.length - 1].min
depth = 0
quote = nil

escaped = lambda do |index|
slashes = 0
j = index - 1
while j >= 0 && script[j] == "\\"
slashes += 1
j -= 1
end
slashes.odd?
end

while pos >= 0
ch = script[pos]

if quote
quote = nil if ch == quote && !escaped.call(pos)
else
if (ch == "'" || ch == "\"" || ch == "`") && !escaped.call(pos)
quote = ch
elsif ch == "}"
depth += 1
elsif ch == "{"
return pos if depth.zero?
depth -= 1
end
end

pos -= 1
end

0
end

def known_ids_in(text)
text.scan(/['"]([^'"]+)['"]/).flatten.select { |value| @known_image_ids.include?(value) }
end

def unescape_js_string(value)
value.gsub(/\\x([0-9a-fA-F]{2})/) { [Regexp.last_match(1).hex].pack("C") }
end
end
end
end
7 changes: 7 additions & 0 deletions lib/code/challenge/version.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# frozen_string_literal: true

module Code
module Challenge
VERSION = "0.1.0"
end
end
Loading