Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .rspec
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
--require spec_helper
--format documentation
--color
72 changes: 72 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# human should be able to read 200 chars per line
Layout/LineLength:
Max: 200

# relax branch condition size, code can be too verbose
Metrics/AbcSize:
Max: 40

# short clear method name
Metrics/MethodLength:
Max: 25

# no performance implication
Style/OptionalBooleanParameter:
Enabled: false

# %i(array) is not common in Ruby
Style/SymbolArray:
Enabled: false

# too restrictive
Style/FrozenStringLiteralComment:
Enabled: false

# default complexity is low at 8
Metrics/PerceivedComplexity:
Max: 12

# if works as well as safe navigation (&.)
Style/SafeNavigation:
Enabled: false

# disable this cop, dont agree with it
Style/FetchEnvVar:
Enabled: false

# buggus check in Rubocop.
# SerpApiClient constructor is rated to 9
# def initialize(params = {})
Metrics/CyclomaticComplexity:
Max: 12

# There is a tradeoff between line length and line count.
Metrics/ClassLength:
Max: 140

# Keyword args are readable.
Metrics/ParameterLists:
CountKeywordArgs: false

# this rule doesn't always work well with Ruby
Layout/FirstHashElementIndentation:
Enabled: false

# Dir glob is already sorted, but the explicit sort documents intent.
Lint/RedundantDirGlobSort:
Enabled: false

# RSpec describe/context blocks legitimately span many lines
Metrics/BlockLength:
AllowedMethods:
- describe
- context

AllCops:
# hide message
SuggestExtensions: false
# show new cops
NewCops: enable
Exclude:
- 'Gemfile'
- 'benchmark/**/*'
1 change: 1 addition & 0 deletions .ruby-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
4.0.5
10 changes: 10 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# frozen_string_literal: true

source "https://rubygems.org"

gem "benchmark-ips", require: false
gem "memory_profiler", require: false
gem "nokogiri", require: false
gem "nokolexbor"
gem "rspec"
gem "rubocop", require: false
93 changes: 93 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
GEM
remote: https://rubygems.org/
specs:
ast (2.4.3)
benchmark-ips (2.15.1)
diff-lcs (1.6.2)
json (2.19.8)
language_server-protocol (3.17.0.5)
lint_roller (1.1.0)
memory_profiler (1.1.0)
nokogiri (1.19.3-x86_64-linux-gnu)
racc (~> 1.4)
nokolexbor (0.7.0-x86_64-linux)
parallel (2.1.0)
parser (3.3.11.1)
ast (~> 2.4.1)
racc
prism (1.9.0)
racc (1.8.1)
rainbow (3.1.1)
regexp_parser (2.12.0)
rspec (3.13.2)
rspec-core (~> 3.13.0)
rspec-expectations (~> 3.13.0)
rspec-mocks (~> 3.13.0)
rspec-core (3.13.6)
rspec-support (~> 3.13.0)
rspec-expectations (3.13.5)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.13.0)
rspec-mocks (3.13.8)
diff-lcs (>= 1.2.0, < 2.0)
rspec-support (~> 3.13.0)
rspec-support (3.13.7)
rubocop (1.87.0)
json (~> 2.3)
language_server-protocol (~> 3.17.0.2)
lint_roller (~> 1.1.0)
parallel (>= 1.10)
parser (>= 3.3.0.2)
rainbow (>= 2.2.2, < 4.0)
regexp_parser (>= 2.9.3, < 3.0)
rubocop-ast (>= 1.49.0, < 2.0)
ruby-progressbar (~> 1.7)
unicode-display_width (>= 2.4.0, < 4.0)
rubocop-ast (1.49.1)
parser (>= 3.3.7.2)
prism (~> 1.7)
ruby-progressbar (1.13.0)
unicode-display_width (3.2.0)
unicode-emoji (~> 4.1)
unicode-emoji (4.2.0)

PLATFORMS
x86_64-linux

DEPENDENCIES
benchmark-ips
memory_profiler
nokogiri
nokolexbor
rspec
rubocop

CHECKSUMS
ast (2.4.3) sha256=954615157c1d6a382bc27d690d973195e79db7f55e9765ac7c481c60bdb4d383
benchmark-ips (2.15.1) sha256=07a1a9f3c6105ecaf68c174fc3fbcddd71a0e9ada6236ae03093a0dcfd812d59
diff-lcs (1.6.2) sha256=9ae0d2cba7d4df3075fe8cd8602a8604993efc0dfa934cff568969efb1909962
json (2.19.8) sha256=6354310fd76ef69b87d5bd1f38b40d730613baf90b6803d2d0a48f618d32dfaa
language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
memory_profiler (1.1.0) sha256=79a17df7980a140c83c469785905409d3027ca614c42c086089d128b805aa8f8
nokogiri (1.19.3-x86_64-linux-gnu) sha256=2f5078620fe12e83669b5b17311b32532a8153d02eee7ad06948b926d6080976
nokolexbor (0.7.0-x86_64-linux) sha256=6348178e41233e67e0f533f84b0b1974b187fe137616541f1453bb7c0c16baf6
parallel (2.1.0) sha256=b35258865c2e31134c5ecb708beaaf6772adf9d5efae28e93e99260877b09356
parser (3.3.11.1) sha256=d17ace7aabe3e72c3cc94043714be27cc6f852f104d81aa284c2281aecc65d54
prism (1.9.0) sha256=7b530c6a9f92c24300014919c9dcbc055bf4cdf51ec30aed099b06cd6674ef85
racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
regexp_parser (2.12.0) sha256=35a916a1d63190ab5c9009457136ae5f3c0c7512d60291d0d1378ba18ce08ebb
rspec (3.13.2) sha256=206284a08ad798e61f86d7ca3e376718d52c0bc944626b2349266f239f820587
rspec-core (3.13.6) sha256=a8823c6411667b60a8bca135364351dda34cd55e44ff94c4be4633b37d828b2d
rspec-expectations (3.13.5) sha256=33a4d3a1d95060aea4c94e9f237030a8f9eae5615e9bd85718fe3a09e4b58836
rspec-mocks (3.13.8) sha256=086ad3d3d17533f4237643de0b5c42f04b66348c28bf6b9c2d3f4a3b01af1d47
rspec-support (3.13.7) sha256=0640e5570872aafefd79867901deeeeb40b0c9875a36b983d85f54fb7381c47c
rubocop (1.87.0) sha256=b9d9ddf55116a513f8ef2c7ae660662d8b49301f118d3f0df61865b33a5c188d
rubocop-ast (1.49.1) sha256=4412f3ee70f6fe4546cc489548e0f6fcf76cafcfa80fa03af67098ffed755035
ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
unicode-display_width (3.2.0) sha256=0cdd96b5681a5949cdbc2c55e7b420facae74c4aaf9a9815eee1087cb1853c42
unicode-emoji (4.2.0) sha256=519e69150f75652e40bf736106cfbc8f0f73aa3fb6a65afe62fefa7f80b0f80f

BUNDLED WITH
4.0.10
25 changes: 25 additions & 0 deletions benchmark/memory_profile.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# frozen_string_literal: true

require 'memory_profiler'
require 'carousel_parser'
require_relative 'parsers'
require_relative 'tee'

fixture = ARGV[0] || 'van-gogh-paintings.html'
backend = (ARGV[1] || 'nokolexbor').to_sym
parser = Parsers::ALL.fetch(backend)

html = File.read(File.expand_path("../spec/fixtures/#{fixture}", __dir__))

report_name = "memory_profile-#{File.basename(fixture, '.html')}-#{backend}.txt"
$stdout = Tee.new(File.expand_path("reports/#{report_name}", __dir__))

carousel = CarouselParser.new(parser: parser)
carousel.parse(html)

report = MemoryProfiler.report { carousel.parse(html) }

puts "Memory profile: #{fixture} via #{backend} (#{html.bytesize / 1024} KB input)\n\n"
report.pretty_print(scale_bytes: true)

$stdout.close
28 changes: 28 additions & 0 deletions benchmark/parse_benchmark.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# frozen_string_literal: true

require 'benchmark/ips'
require 'carousel_parser'
require_relative 'parsers'
require_relative 'tee'

$stdout = Tee.new(File.expand_path('reports/parse_benchmark.txt', __dir__))

FIXTURES_DIR = File.expand_path('../spec/fixtures', __dir__)

fixtures = Dir[File.join(FIXTURES_DIR, '*.html')].sort.to_h do |path|
[File.basename(path), File.read(path)]
end

puts "End-to-end parse across all fixtures, by html parser backend\n\n"

Benchmark.ips do |x|
x.config(time: 5, warmup: 2)

Parsers::ALL.each do |name, parser|
x.report(name) { fixtures.each_value { |html| CarouselParser.parse(html, parser: parser) } }
end

x.compare!
end

$stdout.close
13 changes: 13 additions & 0 deletions benchmark/parsers.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# frozen_string_literal: true

require 'nokolexbor'
require 'nokogiri'

# Catalog of html parser backends to compare in benchmarks.
module Parsers
ALL = {
nokolexbor: ->(html) { Nokolexbor::HTML(html) },
nokogiri: ->(html) { Nokogiri::HTML(html) },
nokogiri5: ->(html) { Nokogiri::HTML5(html) }
}.freeze
end
Loading