-- Copyright 2012-2017 Patrick Gundlach (patrick@gundla.ch) -- Public repository: https://github.com/pgundlach/lua-check-hyphen -- Licensed under the MIT license. See the file 'mit-license.txt' for exact terms. -- Version: 0.7a -- for debugging purpuse: -- function w( ... ) -- texio.write_nl("--->" .. string.format(...)) -- end local explode = function(s,p) local t = { } for s in unicode.utf8.gmatch(s,p) do if s ~= "" then t[#t+1] = s end end return t end luacheckhyphen = {} local hyphenattr = luatexbase.new_attribute("hyphenattr") luacheckhyphen.hyphenwords = {} luacheckhyphen.all_hyphenatedwords = {} luacheckhyphen.word_whitelist = {} luacheckhyphen.rectangle = node.new("whatsit","pdf_literal") luacheckhyphen.rectangle.data = string.format("q 0 0 10 10 re f S Q") local a_glyph_node = node.id("glyph") local a_disc_node = node.id("disc") local a_glue_node = node.id("glue") local a_whatsit_node = node.id("whatsit") local subtype_rightskip = 9 local sln = unicode.utf8 luacheckhyphen.deligature = function ( glyph_node ) local head = glyph_node.components local str = "" while head do if head.id == a_glyph_node then if head.components then str = str .. luacheckhyphen.deligature(head) else str = str .. sln.char(head.char) end end head = head.next end return str end -- This functions analyzes the list beginning at head. If it encounters a box, -- it recurses into the box. If it finds a disc node, it goes back until it finds the word -- start. Then it analyzes the word und finds all hyphenation points. For example the German -- word "Salpetersäure" has these disc nodes: , "Sal-petersäure", "Salpe-tersäure", -- "Salpeter-säure", and "Salpetersäu-re". Each of these "word with hyphen" gets stored in -- the hash hyphenwords luacheckhyphen.collect_discs = function(head) local word_start local word -- this is where we store all the breakpoints local thisbreakpoint local word_with_hyphen local c local hyphencounter = #luacheckhyphen.hyphenwords + 1 local sln = unicode.utf8 local ligature_chars while head do if head.id == 0 then elseif head.id == a_disc_node then word_start = head word_end = head while word_start.prev and word_start.prev.id ~= a_glue_node do word_start = word_start.prev end word = "" c = 0 while word_start and word_start.id ~= a_glue_node do if word_start == head then -- disc -- there is a breakpoint after letter c node.set_attribute(head,hyphenattr,hyphencounter) thisbreakpoint = c elseif word_start.id == a_glyph_node then if word_start.components then ligature_chars = luacheckhyphen.deligature(word_start) word = word .. ligature_chars c = c + string.len(ligature_chars) elseif sln.match(sln.char(word_start.char),"%a") then c = c + 1 word = word .. sln.char(word_start.char) end end word_start = word_start.next end if thisbreakpoint then word_with_hyphen = sln.sub(word,1,thisbreakpoint) .. "-" .. sln.sub(word,thisbreakpoint+1,-1) -- word with hyphen has all possible hyphenation points luacheckhyphen.hyphenwords[hyphencounter] = word_with_hyphen hyphencounter = #luacheckhyphen.hyphenwords + 1 end end head = head.next end return true end -- Remove '-' from word local function removedash( word ) local ret = sln.gsub(word,"-","") return ret end luacheckhyphen.check_discs = function (head,parent) local c local word local tmp while head do if head.id < 2 then -- a box, recurse luacheckhyphen.check_discs(head.list,head) -- package luashowhyphens has disc-whatsit-rightskip, without luashowhyphens it is disc-rightskip elseif head.id == a_disc_node and head.next and head.next.id == a_glue_node and head.next.subtype == subtype_rightskip or head.id == a_disc_node and head.next and head.next.next and head.next.id == a_whatsit_node and head.next.next.id == a_glue_node and head.next.next.subtype == subtype_rightskip then c = node.has_attribute(head,hyphenattr) word = sln.lower(luacheckhyphen.hyphenwords[c]) if luacheckhyphen.word_whitelist[word] then -- word found, but OK (whitelisted) else if luachekchyphen.compact == nil or luachekchyphen.compact == "true" then local word_without_hyphen = removedash(word) local tmp = luacheckhyphen.all_hyphenatedwords[word_without_hyphen] or {} tmp[word] = true luacheckhyphen.all_hyphenatedwords[word_without_hyphen] = tmp else luacheckhyphen.all_hyphenatedwords[word] = true end if luacheckhyphen.drawmarks then tmp = node.copy(luacheckhyphen.rectangle) node.insert_after(parent,head,tmp) end end end head = head.next end return true end -- http://www.lua.org/pil/19.3.html local function pairsByKeys (t) local a = {} for n in pairs(t) do table.insert(a, n) end table.sort(a) local i = 0 -- iterator variable local iter = function () -- iterator function i = i + 1 if a[i] == nil then return nil else return a[i], t[a[i]] end end return iter end local function getUhyFilename (tex_jobname) local unknown_hyphenation_filename = tex_jobname .. ".uhy" local i = 1 local FLAG = '-output-directory' while (arg[i] ~= nil) do local argument = arg[i] if (string.sub(argument, 2, 2) == '-' ) then argument = string.sub(argument, 2) end if (string.sub(argument, 1, string.len(FLAG)) == FLAG) then local prefix = nil if (string.sub(argument, string.len(FLAG)+1, string.len(FLAG)+1) == '=') then prefix = string.sub(argument, string.len(FLAG)+2) else prefix = arg[i+1] end unknown_hyphenation_filename = prefix .. "/" .. unknown_hyphenation_filename break end i = i + 1 end return unknown_hyphenation_filename end luacheckhyphen.listhyphenatedwords = function() if luacheckhyphen.final == "true" then return end -- don't write if the use has turned that off! if not luacheckhyphen.nofile then local unknown_hyphenation_filename = getUhyFilename(tex.jobname) local unknown_hyphenation_file = io.open(unknown_hyphenation_filename,"w") for k,v in pairsByKeys(luacheckhyphen.all_hyphenatedwords) do if luachekchyphen.compact == "true" or luachekchyphen.compact == nil then local hyphenationlist = {} local hyphenpos = {} for l,_ in pairs(v) do local tmp = string.find(l,"-") if tmp then hyphenpos[#hyphenpos + 1] = tmp end hyphenationlist[#hyphenationlist + 1] = l end table.sort(hyphenpos) local word_with_all_hyphenationpoints = {} local cur = 1 for i=1,string.len(k) do if hyphenpos[cur] == i then word_with_all_hyphenationpoints[#word_with_all_hyphenationpoints + 1] = "-" cur = cur + 1 end word_with_all_hyphenationpoints[#word_with_all_hyphenationpoints + 1] = string.sub(k,i, i) end unknown_hyphenation_file:write(table.concat(word_with_all_hyphenationpoints,"") .. "\n") else unknown_hyphenation_file:write(k .. "\n") end end unknown_hyphenation_file:close() end texio.write_nl("log","All words with unknown hyphenation below") for k,v in pairs(luacheckhyphen.all_hyphenatedwords) do texio.write_nl("log",k) end end luacheckhyphen.enable = function() if luacheckhyphen.final == "true" then return end local whitelistfile,err local filecontents if luacheckhyphen.whitelist then for i,v in ipairs(string.explode(luacheckhyphen.whitelist,",")) do whitelistfile,err = io.open(v) if not whitelistfile then if err then texio.write_nl(err) else texio.write_nl(string.format("White list %q not found, ignored.",tostring(v))) end else filecontents = whitelistfile:read("*a") for _,entry in ipairs(explode(filecontents,"[^%s]+")) do parts = string.explode(entry,"-") if #parts > 2 then local c = 1 for c=1,#parts - 1 do local word = {} for i=1,#parts do word[#word + 1] = parts[i] if i == c then word[#word + 1] = "-" end end luacheckhyphen.word_whitelist[table.concat(word,"")] = true end else luacheckhyphen.word_whitelist[entry] = true end end end end end if luacheckhyphen.mark == "true" then luacheckhyphen.drawmarks = true end luatexbase.add_to_callback("pre_linebreak_filter", luacheckhyphen.collect_discs,"collect_discs") luatexbase.add_to_callback("post_linebreak_filter",luacheckhyphen.check_discs,"check_discs") end return luacheckhyphen -- end of file