我最终使用JSON::Stream解决了这个问题,它有回调start_document
等start_object
。
我给了我的“解析器”一个to_enum
方法,它在解析所有“资源”对象时发出它们。请注意,ResourcesCollectionNode
除非您完全解析 JSON 流,否则永远不会真正使用它,并且ResourceNode
是仅用于命名目的的子类ObjectNode
,尽管我可能只是摆脱它:
class Parser
METHODS = %w[start_document end_document start_object end_object start_array end_array key value]
attr_reader :result
def initialize(io, chunk_size = 1024)
@io = io
@chunk_size = chunk_size
@parser = JSON::Stream::Parser.new
# register callback methods
METHODS.each do |name|
@parser.send(name, &method(name))
end
end
def to_enum
Enumerator.new do |yielder|
@yielder = yielder
begin
while !@io.eof?
# puts "READING CHUNK"
chunk = @io.read(@chunk_size)
@parser << chunk
end
ensure
@yielder = nil
end
end
end
def start_document
@stack = []
@result = nil
end
def end_document
# @result = @stack.pop.obj
end
def start_object
if @stack.size == 0
@stack.push(ResourceCollectionNode.new)
elsif @stack.size == 1
@stack.push(ResourceNode.new)
else
@stack.push(ObjectNode.new)
end
end
def end_object
if @stack.size == 2
node = @stack.pop
#puts "Stack depth: #{@stack.size}. Node: #{node.class}"
@stack[-1] << node.obj
# puts "Parsed complete resource: #{node.obj}"
@yielder << node.obj
elsif @stack.size == 1
# puts "Parsed all resources"
@result = @stack.pop.obj
else
node = @stack.pop
# puts "Stack depth: #{@stack.size}. Node: #{node.class}"
@stack[-1] << node.obj
end
end
def end_array
node = @stack.pop
@stack[-1] << node.obj
end
def start_array
@stack.push(ArrayNode.new)
end
def key(key)
# puts "Stack depth: #{@stack.size} KEY: #{key}"
@stack[-1] << key
end
def value(value)
node = @stack[-1]
node << value
end
class ObjectNode
attr_reader :obj
def initialize
@obj, @key = {}, nil
end
def <<(node)
if @key
@obj[@key] = node
@key = nil
else
@key = node
end
self
end
end
class ResourceNode < ObjectNode
end
# Node that contains all the resources - a Hash keyed by url
class ResourceCollectionNode < ObjectNode
def <<(node)
if @key
@obj[@key] = node
# puts "Completed Resource: #{@key} => #{node}"
@key = nil
else
@key = node
end
self
end
end
class ArrayNode
attr_reader :obj
def initialize
@obj = []
end
def <<(node)
@obj << node
self
end
end
end
和一个正在使用的例子:
def json
<<-EOJ
{
"1": {
"url": "url_1",
"title": "title_1",
"http_req": {
"status": 200,
"time": 10
}
},
"2": {
"url": "url_2",
"title": "title_2",
"http_req": {
"status": 404,
"time": -1
}
},
"3": {
"url": "url_1",
"title": "title_1",
"http_req": {
"status": 200,
"time": 10
}
},
"4": {
"url": "url_2",
"title": "title_2",
"http_req": {
"status": 404,
"time": -1
}
},
"5": {
"url": "url_1",
"title": "title_1",
"http_req": {
"status": 200,
"time": 10
}
},
"6": {
"url": "url_2",
"title": "title_2",
"http_req": {
"status": 404,
"time": -1
}
}
}
EOJ
end
io = StringIO.new(json)
resource_parser = ResourceParser.new(io, 100)
count = 0
resource_parser.to_enum.each do |resource|
count += 1
puts "READ: #{count}"
pp resource
break
end
io.close
输出:
READ: 1
{"url"=>"url_1", "title"=>"title_1", "http_req"=>{"status"=>200, "time"=>10}}