using MAT, MAT_v5 # with some special sauce from PR MAT.jl#23 (https://github.com/simonster/MAT.jl/pull/23) f = matopen("simple.mat") summarize(f.subsystem) # summarize and xxd are defined in the appendix at the bottom summarize(read(f)) mcos = f.subsystem["_i1"]["MCOS"][2] data = vec(mcos[1]) fdata = IOBuffer(data) xxd(data,1,0x80) function parse_header(f) id = read(f,Uint32) # First element is a version number? Always 2? id == 2 || error("unknown first field (version/id?): ", id) # Second element is the number of strings n_strs = read(f,Uint32) # Followed by up to 6 section offsets (the last two sections seem to be unused) offsets = read(f,Uint32,6) # And two reserved fields all(read(f,Int32,2) .== 0) || error("reserved header fields nonzero") # And now we're at the string data section @assert position(f) == 0x28 strs = Array(ASCIIString,n_strs) for i = 1:n_strs # simply delimited by nulls strs[i] = readuntil(f, '\0')[1:end-1] # drop the trailing null byte end (offsets,strs) end seek(fdata,0) segments, strs = parse_header(fdata) summarize(strs) function parse_class_info(f,strs,section_end) # The first four int32s unknown. Always 0? Or is this simply an empty slot for another class? all(read(f,Int32,4) .== 0) || error("unknown header to class information") classes = Array((ASCIIString,ASCIIString),0) while position(f) < section_end package_idx = read(f,Uint32) package = package_idx > 0 ? strs[package_idx] : "" name_idx = read(f,Uint32) name = name_idx > 0 ? strs[name_idx] : "" unknowns = read(f,Uint32,2) all(unknowns .== 0) || error("discovered a nonzero class property for ",name) push!(classes,(package, name)) end classes end seek(fdata,segments[1]) classes = parse_class_info(fdata,strs, segments[2]) function parse_properties(f::IO,names,heap,section_end) props = Array(Dict{ASCIIString,Any},0) position(f) >= section_end && return props all(read(fdata,Int32,2) .== 0) || error("unknown header to properties segment") # sizehint: 8 int32s would be 2 props per object; this is overly generous sizehint(props,iceil((section_end-position(f))/(8*4))) while position(f) < section_end # For each class, there is first a Int32 describing the number of properties start_offset = position(f) nprops = read(f,Int32) d = Dict{ASCIIString,Any}() sizehint(d,nprops) for i=1:nprops # For each property, there is an index into our strings name_idx = read(f,Int32) # A flag describing how the heap_idx is to be interpreted flag = read(f,Int32) # And a value; often an index into some data structure heap_idx = read(f,Int32) if flag == 0 # This means that the property is stored in the names array d[names[name_idx]] = names[heap_idx] elseif flag == 1 # The property is stored in the MCOS FileWrapper__ heap d[names[name_idx]] = heap[heap_idx+3] # But... the index is off by 3!? Crazy. elseif flag == 2 # The property is a boolean, and the heap_idx itself is the value @assert 0 <= heap_idx <= 1 "boolean flag has a value other than 0 or 1" d[names[name_idx]] = bool(heap_idx) else error("unknown flag ",flag, " for property ",names[name_idx], " with heap index ",heap_idx) end end push!(props,d) # Jump to the next 8-byte aligned offset if position(f) % 8 != 0 seek(f,iceil(position(f)/8)*8) end end props end seek(fdata,segments[2]) seg2_props = parse_properties(fdata,strs,mcos,segments[3]) summarize(seg2_props) function parse_object_info(f, section_end) # The first six int32s unknown. Always 0? Or perhaps reserved space for an extra elt? all(read(f,Int32,6) .== 0) || error("unknown header to object information") object_info = Array((Int,Int,Int,Int),0) while position(f) < section_end class_idx = read(f,Int32) unknown1 = read(f,Int32) unknown2 = read(f,Int32) segment1_idx = read(f,Int32) # The index into segment 2 segment2_idx = read(f,Int32) # The index into segment 4 obj_id = read(f,Int32) @assert unknown1 == unknown2 == 0 "discovered a nonzero object property" push!(object_info,(class_idx,segment1_idx,segment2_idx,obj_id)) end object_info end seek(fdata,segments[3]) obj_info = parse_object_info(fdata,segments[4]) # Let's map the class_idx to the classname so it's a bit more readable summarize(map(x -> (classes[x[1]][2],x[2],x[3],x[4]), obj_info)) seek(fdata,segments[4]) seg4_props = parse_properties(fdata,strs,mcos,segments[5]) summarize(seg4_props) function parse_segment5(f, segment_end) seg5 = read(f,Uint8,segment_end-position(f)) if any(seg5 .!= 0) xxd(seg5) end @assert segment_end == position(f) && eof(f) "there's more data to be had!" end seek(fdata,segments[5]) parse_segment5(fdata, segments[6]) println("The last element of FileWrapper__'s array:") print(" ") summarize(mcos[end]," ") objs = Array(Dict{ASCIIString,Any},length(obj_info)) for (i,info) in enumerate(obj_info) # Get the property from either segment 2 or segment 4 props = info[2] > 0 ? seg2_props[info[2]] : seg4_props[info[3]] # And merge it with the matfile defaults for this class objs[i] = merge(mcos[end][info[1]+1],props) end summarize(objs) # More complicated files f = matopen("fiobj.mat") mcos = f.subsystem["_i1"]["MCOS"][2] data = vec(mcos[1]) fdata = IOBuffer(data) seek(fdata,0) segments, strs = parse_header(fdata) seek(fdata,segments[1]) classes = parse_class_info(fdata,strs, segments[2]) seek(fdata,segments[2]) seg2_props = parse_properties(fdata,strs,mcos,segments[3]) seek(fdata,segments[3]) obj_info = parse_object_info(fdata,segments[4]) seek(fdata,segments[4]) seg4_props = parse_properties(fdata,strs,mcos,segments[5]) seek(fdata,segments[5]) parse_segment5(fdata, segments[6]) objs = Array(Dict{ASCIIString,Any},length(obj_info)) for (i,info) in enumerate(obj_info) # Get the property from either segment 2 or segment 4 props = info[2] > 0 ? seg2_props[info[2]] : seg4_props[info[3]] # And merge it with the matfile defaults for this class objs[i] = merge(mcos[end][info[1]+1],props) end summarize(objs) println() summarize(mcos) # Simple utitilies for viewing of hex and big nested data structures cleanascii!{N}(A::Array{Uint8,N}) = (A[(A .< 0x20) | (A .> 0x7e)] = uint8('.'); A) function xxd(x, start=1, stop=length(x)) for i=div(start-1,8)*8+1:8:stop row = i:i+7 # hexadecimal @printf("%04x: ",i-1) for r=row start <= r <= stop ? @printf("%02x",x[r]) : print(" ") r % 4 == 0 && print(" ") end # ASCII print(" ",ascii(cleanascii!(x[i:min(i+7,end)]))," ") # Int32 for j=i:4:i+7 start <= j && j+3 <= stop ? @printf("% 12d ",reinterpret(Int32,x[j:j+3])[1]) : print(" "^12) end # Float64: # start <= i && i+7 <= stop ? @printf("%.3e",reinterpret(Float64,x[row])[1]) : nothing println() end end # Summarize - smartly display large nested data structures for some datatypes summarize(x::Any,prefix="") = print(string(summary(x))) summarize(x::String,prefix="") = print(string(summary(x),": \"", x, "\"")) summarize(x::Real,prefix="") = print(string(summary(x),": ", x)) function summarize(x::Tuple,prefix="") print("(") i = start(x); while !done(x,i) t,i = next(x,i) if isa(t,String) print("\"",t,"\"") elseif isa(t,Real) print(t) else summarize(t,string(prefix," ")) end !done(x,i) && print(",") end print(")") end function summarize(x::Dict,prefix="") print(string(summary(x),": ",(isempty(x) ? "{}" : ""))) i = start(x) while !done(x,i) (v,i) = next(x,i) if typeof(v[1])<:String println() print(prefix," \"",v[1],"\"=>") summarize(v[2],string(prefix," ")) else println() print(prefix," ",summarize(v[1]),"=>") summarize(v[2],string(prefix," ")) end end end function summarize{T,N}(x::AbstractArray{T,N},prefix="") print(string(summary(x),": ")) if T<:Real truncate = length(x) > 10 maxelt = truncate ? 10 : length(x) # This is very wrong, but it works for the purposes above... Base.show_comma_array(STDOUT,x[1:min(length(x),maxelt)],"[",(truncate ? ",…]" : "]")) else i = start(x) while !done(x,i) (v,i) = next(x,i) println() print(prefix," [$(i-1)] ") summarize(v,string(prefix," ")) end end end;