class PDF::Reader::TextRun

A value object that represents one or more consecutive characters on a page.

Attributes

font_size[R]
origin[R]
text[R]
to_s[R]
width[R]

Public Class Methods

new(x, y, width, font_size, text) click to toggle source
# File lib/pdf/reader/text_run.rb, line 17
def initialize(x, y, width, font_size, text)
  @origin = PDF::Reader::Point.new(x, y)
  @width = width
  @font_size = font_size
  @text = text
end

Public Instance Methods

+(other) click to toggle source
# File lib/pdf/reader/text_run.rb, line 64
def +(other)
  raise ArgumentError, "#{other} cannot be merged with this run" unless mergable?(other)

  if (other.x - endx) <( font_size * 0.2)
    TextRun.new(x, y, other.endx - x, font_size, text + other.text)
  else
    TextRun.new(x, y, other.endx - x, font_size, "#{text} #{other.text}")
  end
end
<=>(other) click to toggle source

Allows collections of TextRun objects to be sorted. They will be sorted in order of their position on a cartesian plain - Top Left to Bottom Right

# File lib/pdf/reader/text_run.rb, line 26
def <=>(other)
  if x == other.x && y == other.y
    0
  elsif y < other.y
    1
  elsif y > other.y
    -1
  elsif x < other.x
    -1
  elsif x > other.x
    1
  end
end
endx() click to toggle source
# File lib/pdf/reader/text_run.rb, line 48
def endx
  @endx ||= @origin.x + width
end
endy() click to toggle source
# File lib/pdf/reader/text_run.rb, line 52
def endy
  @endy ||= @origin.y + font_size
end
inspect() click to toggle source
# File lib/pdf/reader/text_run.rb, line 74
def inspect
  "#{text} w:#{width} f:#{font_size} @#{x},#{y}"
end
intersect?(other_run) click to toggle source
# File lib/pdf/reader/text_run.rb, line 78
def intersect?(other_run)
  x <= other_run.endx && endx >= other_run.x &&
    endy >= other_run.y && y <= other_run.endy
end
intersection_area_percent(other_run) click to toggle source

return what percentage of this text run is overlapped by another run

# File lib/pdf/reader/text_run.rb, line 84
def intersection_area_percent(other_run)
  return 0 unless intersect?(other_run)

  dx = [endx, other_run.endx].min - [x, other_run.x].max
  dy = [endy, other_run.endy].min - [y, other_run.y].max
  intersection_area = dx*dy

  intersection_area.to_f / area
end
mean_character_width() click to toggle source
# File lib/pdf/reader/text_run.rb, line 56
def mean_character_width
  @width / character_count
end
mergable?(other) click to toggle source
# File lib/pdf/reader/text_run.rb, line 60
def mergable?(other)
  y.to_i == other.y.to_i && font_size == other.font_size && mergable_range.include?(other.x)
end
x() click to toggle source
# File lib/pdf/reader/text_run.rb, line 40
def x
  @origin.x
end
y() click to toggle source
# File lib/pdf/reader/text_run.rb, line 44
def y
  @origin.y
end

Private Instance Methods

area() click to toggle source
# File lib/pdf/reader/text_run.rb, line 96
def area
  (endx - x) * (endy - y)
end
character_count() click to toggle source

Assume string encoding is marked correctly and we can trust String#size to return a character count

# File lib/pdf/reader/text_run.rb, line 106
def character_count
  @text.size.to_f
end
mergable_range() click to toggle source
# File lib/pdf/reader/text_run.rb, line 100
def mergable_range
  @mergable_range ||= Range.new(endx - 3, endx + font_size)
end