base = "$(homedir())/text-fabric-data/etcbc/bhsa/tf/c" feature = "g_word_utf8" featurePath = "$base/$feature.tf" function error(msg) write(STDERR, "$msg\n") end function showResults(errors, data) if errors == 0 maxNode = maximum(keys(data)) print("$(length(data)) results, last node $maxNode\n") print("$(data[1])\n") print("$(data[2])\n") print("$(data[maxNode])\n") else print("$errors errors") end end function valueFromTf(tf) join([replace(replace(x, "\\t", "\t"), "\\n", "\n") for x in split(tf, "\\\\")], "\\") end function setFromSpec(spec)::Set{UInt32} covered = Set{UInt32}() for r_str in split(spec, ",") bounds = split(r_str, "-") if length(bounds) == 1 push!(covered, parse(UInt32, r_str)) else b = parse(UInt32, bounds[1]) e = parse(UInt32, bounds[2]) if e < b (b, e) = (e, b) end for n in b:e push!(covered, n) end end end covered end function readFile(path) if !isfile(path) error("TF reading: feature file '$path' does not exist") return false end contents = open(path) do fh read(fh, String) end lines = split(contents, "\n") if lines[end] == "" pop!(lines) end i::UInt32 = 0 for line in lines i += 1 if startswith(line, "@") continue else if line != "" error("Line $i: missing blank line after metadata") return false else break end end end i += 1 (lines, i) end function readTf(path) if !isfile(path) error("TF reading: feature file '$path' does not exist") return false end fh = open(path) i = 0 for line in eachline(fh) i += 1 text = rstrip(line) if startswith(text, "@") continue else if text != "" error("Line $i: missing blank line after metadata") close(fh) return false else break end end end result = readDataTf(fh, i) close(fh) result end function readDataTf(fh, firstI) i = firstI implicit_node = 1 data = Dict{Integer, String}() normFields = 2 isNum = false errors = 0 for line in eachline(fh) i += 1 fields = split(rstrip(line, '\n'), "\t") lfields = length(fields) if lfields > normFields error("$(i) : wrongFields") errors += 1 continue end if lfields == normFields nodes = setFromSpec(fields[1]) valTf = fields[end] else nodes = Set([implicit_node]) if lfields == 1 valTf = fields[1] else valTf = "" end end implicit_node = maximum(nodes) + 1 value = ( valTf == "" ? (isNum && valTf != "") ? parse(Int, valTf) : (isNum ? nothing : "") : valueFromTf(valTf) ) for n in nodes if value !== nothing data[n] = value end end end (errors, data) end function readTfList(path) if !isfile(path) error("TF reading: feature file '$path' does not exist") return false end fh = open(path) i = 0 for line in eachline(fh) i += 1 text = rstrip(line) if startswith(text, "@") continue else if text != "" error("Line $i: missing blank line after metadata") close(fh) return false else break end end end result = readDataTfList(fh, i) close(fh) result end function readDataTfList(fh, firstI) i = firstI implicit_node = 1 data = Array{String, 1}() normFields = 2 isNum = false errors = 0 for line in eachline(fh) i += 1 fields = split(rstrip(line, '\n'), "\t") lfields = length(fields) if lfields > normFields error("$(i) : wrongFields") errors += 1 continue end if lfields == normFields nodes = setFromSpec(fields[1]) valTf = fields[end] else nodes = Set([implicit_node]) if lfields == 1 valTf = fields[1] else valTf = "" end end implicit_node = maximum(nodes) + 1 value = ( valTf == "" ? (isNum && valTf != "") ? parse(Int, valTf) : (isNum ? nothing : "") : valueFromTf(valTf) ) for n in nodes if value !== nothing push!(data, value) end end end (errors, data) end function readTfSlurp(path) if !isfile(path) error("TF reading: feature file '$path' does not exist") return false end contents = open(path) do fh read(fh, String) end lines = split(contents, "\n") if lines[end] == "" pop!(lines) end i = 0 for line in lines i += 1 if startswith(line, "@") continue else if line != "" error("Line $i: missing blank line after metadata") return false else break end end end result = readDataTfSlurp(lines, i + 1) result end function readDataTfSlurp(lines, firstI) i = firstI implicit_node = 1 data = Dict{Integer, String}() normFields = 2 isNum = false errors = 0 for line in lines[firstI:end] i += 1 fields = split(line, "\t") lfields = length(fields) if lfields > normFields error("$(i) : wrongFields") errors += 1 continue end if lfields == normFields nodes = setFromSpec(fields[1]) valTf = fields[end] else nodes = Set([implicit_node]) if lfields == 1 valTf = fields[1] else valTf = "" end end implicit_node = maximum(nodes) + 1 value = ( valTf == "" ? (isNum && valTf != "") ? parse(Int, valTf) : (isNum ? nothing : "") : valueFromTf(valTf) ) for n in nodes if value !== nothing data[n] = value end end end (errors, data) end function readDataTfSlurpOpt(lines, firstI::UInt32) i::UInt32 = firstI implicit_node::UInt32 = 1 data = Dict{UInt32, SubString{String}}() normFields::UInt8 = 2 isNum::Bool = false errors::UInt32 = 0 for line in lines[firstI:end] i += 1 fields = split(line, "\t") lfields::UInt8 = length(fields) if lfields > normFields error("$(i) : wrongFields") errors += 1 continue end if lfields == normFields nodes::Set{UInt32} = setFromSpec(fields[1]) valTf = fields[end] else nodes = Set{UInt32}([implicit_node]) if lfields == 1 valTf = fields[1] else valTf = "" end end implicit_node = maximum(nodes) + 1 value = ( valTf == "" ? (isNum && valTf != "") ? parse(Int, valTf) : (isNum ? nothing : "") : valueFromTf(valTf) ) for n::UInt32 in nodes if value !== nothing data[n] = value end end end (errors, data) end (errors, data) = readTf(featurePath) showResults(errors, data) (errors, data) = readTfList(featurePath) showResults(errors, data) (errors, data) = readTfSlurp(featurePath) showResults(errors, data) (lines, first) = readFile(featurePath) (errors, data) = readDataTfSlurpOpt(lines, first) showResults(errors, data)