File: //usr/share/texlive/texmf-dist/scripts/texdoc/texdoclib-search.tlu
-- texdoclib-search.tlu: file searching functions for texdoc
--
-- The TeX Live Team, GPLv3, see texdoclib.tlu for details
-- Warning: Some functions here assume that M.init_databases() has been called.
-- dependencies
local kpse = require 'kpse'
local lfs = require 'lfs'
local texdoc = {
const = require 'texdoclib-const',
util = require 'texdoclib-util',
alias = require 'texdoclib-alias',
score = require 'texdoclib-score',
config = require 'texdoclib-config',
}
-- shortcuts
local M = {}
local C = texdoc.const
local err_print = texdoc.util.err_print
local dbg_print = texdoc.util.dbg_print
-- shared by all functions in this file
local s_doclist -- the Doclist object to be populated by various functions
local s_meta -- {[normname] = meta, ...} (populated by init_tlp_database)
local vanilla -- is this a vanilla TL or a re-package one without tlpdb?
--------------------------- utility functions ----------------------------
-- find the TeX Live root
local function get_tlroot()
local tlroot = kpse.expand_path('$TEXMFROOT') -- it should be exisitng one
get_tlroot = function() return tlroot end
return tlroot
end
-- says if file has a known extension according to ext_list
-- (or known basename according to basename_list)
local function check_ext(file)
file = file:lower()
-- remove zipext if applicable
file = texdoc.util.parse_zip(file)
-- then do the normal thing
for _, e in ipairs(texdoc.config.get_value('ext_list')) do
if e == '*' then
return true
elseif (e == '') then
if not file:find('.', 1, true) then
return true
end
else
local dot_e = '.' .. e
if file:sub(-#dot_e) == dot_e then
return true
end
end
end
-- is the basename good?
for _, b in ipairs(texdoc.config.get_value('basename_list')) do
if file:find('^' .. b .. '$') or file:find('^' .. b .. '%.') then
return true
end
end
return false
end
---------------------- docfile and doclist objects -----------------------
--[[
doclist = {
[1] = docfile1, [2] = docfiles2, ...,
inv = {realpath1 = index1, ...}
}
The inv subtable is such that for all i
doclist.inv(doclist[i].realpath:lower()) == i
Paths are lowercased in order to avoid duplicates on windows.
--]]
local Doclist = {}
Doclist.__index = Doclist
-- create a new list of docfiles
function Doclist:new()
local dl = {inv = {}}
setmetatable(dl, self)
return dl
end
-- add a docfile to a list
function Doclist:add(df)
-- if no realpath information, unable to add
-- (useful if vanilla == false)
if not df.realpath then return end
-- check the existence of the file
if not lfs.isfile(df.realpath) then
dbg_print('search', 'File %s not found. Skipping.', df.realpath)
return
end
-- add the docfile to the list
local index = self.inv[df.realpath:lower()]
if index then
self[index]:mergein(df)
else
dbg_print('search', 'File %s found.', df.realpath)
local newindex = #self + 1
self[newindex] = df
self.inv[df.realpath:lower()] = newindex
end
end
-- stops a doclist
function Doclist:stop()
self.inv = nil
end
--[[
docfile = {
-- name and tree are mandatory
name = filename (used for scoring only)
tree = code of the tree, see below
-- at least one of the following fields should exist
matches = {pattern1, pattern2, ...} or {}
runtodoc = true if there is a runfile -> docfile association
tlptodoc = true if there is a tlp name -> docfile association
-- those are virtual members, see below
realpath = full path
normname = nomrmalised (path removed up to the 'doc' component)
basename = basename
lang = language tag from the catalogue metadata
details = details tag from the catalogue metadata
quality = 'good', 'bad', or 'killed' depending on score
ext_pos = position of the extension in ext_list
-- set for elements of a list as a side effect of sort_doclist()
score = score
}
if tree > 1, this is the index of the tree in TEXDOCS
if tree = 0, then name is relative to TLROOT
tree = - 1 if and only if file is a sty file. Here name is absolute.
--]]
-- Docfile objects inherit members from Docfile
-- and have virtual members implemented by get_<member>() methods
-- for best cache performance, getters should not return nil
local Docfile = {}
function Docfile:__index(key)
if Docfile[key] then return Docfile[key] end
local getter = Docfile['get_' .. key]
if getter then
rawset(self, key, getter(self))
return rawget(self, key)
end
end
-- create a new docfile object using initialisation info
-- required fields: name, tree
function Docfile:new(info)
local df = {}
setmetatable(df, self)
for k, v in pairs(info) do
if k == 'pattern' then
df.matches = {info.pattern}
else
df[k] = v
end
end
return df
end
-- merge a second docfile object in, assuming it represents the same file
function Docfile:mergein(df)
for k, v in pairs(df) do
if k == 'matches' then
for _, m in ipairs(df.matches or {}) do
table.insert(self.matches, m)
end
else
self[k] = v
end
end
end
-- return the full path to the file
local texdocs_tree_to_path -- definition later
function Docfile:get_realpath()
if self.tree > 0 then
return texdocs_tree_to_path(self.tree, self.name)
elseif self.tree == 0 then
if vanilla then
return get_tlroot() .. '/' .. self.name
else
return kpse.find_file(self.normname, 'TeX system documentation')
end
else
return self.name
end
end
-- normalise a name from the tlpdb (use for s_meta indexes)
local function reloc_tlpdb_path(name)
return name:gsub('^texmf[^/]*/doc/', '')
end
-- return normalised name
function Docfile:get_normname()
return (self.tree == 0) and reloc_tlpdb_path(self.name) or self.name
end
-- retrieve the lang from meta
function Docfile:get_lang()
local meta = s_meta[self.normname]
return meta and (meta.lang or false) or false
end
-- retrieve the details from meta
function Docfile:get_details()
local meta = s_meta[self.normname]
return meta and (meta.details or false) or false
end
-- return the base name
function Docfile:get_basename()
return self.name:gsub('.*/', '')
end
-- for interface consistency, matches should always be a table, never nil
function Docfile:get_matches()
return {}
end
-- from texdoclib-score.tlu
Docfile.get_quality = texdoc.score.docfile_quality
-- from texdoclib-score.tlu
function Docfile:get_ext_pos()
return texdoc.score.ext_pos(self.basename)
end
------------------- select results from TEXDOCS trees --------------------
-- says if a file (with its path) matches a pattern
local function matches(pattern, file)
if pattern.original then
return file:lower():find(pattern.name:lower(), 1, true)
else
return texdoc.score.is_exact(file, pattern.name)
end
end
-- return a docfile object if file "matches", nil otherwise
local function process_file(patlist, file, pathfile, code)
local docfile
local pattern
for _, pattern in ipairs(patlist) do
if matches(pattern, pathfile) then
local info = {
name = pathfile,
tree = code,
pattern = pattern,
}
if docfile then
docfile:mergein(Docfile:new(info))
else
docfile = Docfile:new(info)
end
end
end
return docfile
end
-- scan a database
local function scan_db(patlist, code, lsr_db)
for file, basename in pairs(lsr_db) do
local df = process_file(patlist, basename, file, code)
if df then s_doclist:add(df) end
end
end
--------------------- manage TEXDOCS trees of the kpse ----------------------
-- build a db from a ls-R file
local function init_lsr_db(root, shift)
-- open the file
local lsr = assert(io.open(root .. '/ls-R', 'r'))
local _ = lsr:read('*line') -- throw away first line (comment)
-- scan it
local db = {}
local maybe_dir, is_doc = true, false
local current_dir
local l = #shift
while true do
local line = lsr:read('*line')
while line == '' do line, maybe_dir = lsr:read('*line'), true end
if line == nil then break end -- EOF
local dir_line = maybe_dir and line:match('^%./(.*):$')
if dir_line then
maybe_dir = false -- next line may not be a dir
if string.sub(dir_line .. '/', 1, l) == shift then
is_doc = true
current_dir = dir_line:sub(l + 1)
db[current_dir] = nil
elseif is_doc then
break -- we're exiting the ./doc (or shift) dir, so it's over
end
elseif is_doc then
local file = line
if current_dir ~= '' then
file = current_dir .. '/' .. line
end
if check_ext(line) then db[file] = line end
end
end
lsr:close()
return db
end
-- build a db for a tree without ls-R index
local function init_tree_db(base, recurse)
local db = {}
local function init_tree_db_rec(dir)
for file in lfs.dir(base .. '/' .. dir) do
if file ~= '.' and file ~= '..' then
local f = (dir == '') and file or dir .. '/' .. file
if lfs.isdir(base..'/'..f) then
if recurse then init_tree_db_rec(f) end
else
if check_ext(file) then db[f] = file end
end
end
end
end
init_tree_db_rec('')
return db
end
local init_texdocs_database, get_doclist_texdocs
do -- begin scope of doc_roots
local doc_roots
--[[
doc_roots[i] = {
path = initial path,
db = {[file1] = basename1, [file2] = basename2, ...},
}
--]]
-- populate the doc_roots filename databases
init_texdocs_database = function()
-- find a ls-R file in a parent directory and return it or nil
local function lsr_root(path)
if not lfs.isdir (path) then return end
local root, shift = path, ''
if root:sub(-1) == '/' then root = root:sub(1, -2) end
while root:find('/', 1, true) do
if lfs.isfile(root .. '/ls-R') then
return root, shift
end
local last_comp = root:match('^.*/(.*)$')
-- /!\ cannot put last_comp in a regex: can contain special char
root = root:sub(1, - (#last_comp + 2))
shift = last_comp .. '/' .. shift
end
end
doc_roots = {}
local kpse_texdocs = kpse.expand_var('$TEXDOCS')
-- expand the path and turn it into a lua list
local raw_doc_roots = kpse.expand_braces(kpse_texdocs):explode(C.kpse_sep)
local max = #raw_doc_roots + 1
for j, dir in ipairs(raw_doc_roots) do
local i = max - j
local n
local path, db
-- get path, !! and // values
dir, n = dir:gsub('//$', '')
local recursion_allowed = (n == 1)
local path, n = dir:gsub('^!!', '')
local index_mandatory = (n == 1)
dbg_print('texdocs',
'texdocs[%d] = %s (index_mandatory=%s, recursion_allowed=%s)',
i, path, tostring(index_mandatory), tostring(recursion_allowed))
-- decide if we should use a ls-R index, the filesystem, or do nothing
local root, shift = lsr_root(path)
if root and shift and recursion_allowed then
dbg_print('texdocs',
'texdocs[%d] using index: %s (shift=%s)', i, root, shift)
db = init_lsr_db(root, shift)
elseif not index_mandatory and lfs.isdir(path) then
dbg_print('texdocs',
'texdocs[%d] using filesystem search', i)
db = init_tree_db(path, recursion_allowed)
end
-- register this in docroots
doc_roots[i] = {path=path, db=db}
end
end
-- find docfiles in texdocs directories
get_doclist_texdocs = function(patlist)
for code, dr in ipairs(doc_roots) do
if dr.db then scan_db(patlist, code, dr.db) end
end
end
-- return the real path from a texdocs tree number + relative path
texdocs_tree_to_path = function (tree, rel)
return doc_roots[tree].path .. '/' .. rel
end
end -- end scope of doc_roots
--------------------------- look for sty files ---------------------------
-- add doclist entries for sty files in patlist
local function get_doclist_sty(patlist)
for _, pat in ipairs(patlist) do
local file = kpse.find_file(pat.name)
if file then
local df = Docfile:new({
name = file,
tree = -1,
pattern = pat,
})
s_doclist:add(df)
end
end
end
------------------------------- use tlpdb --------------------------------
-- tlpdb means TeX Live Package DataBase and tlp means TeX Live Package
-- return true if cache exists and is newer than original, false otherwise
local function good_cache(cache, ori)
local cache_date = lfs.attributes(cache, 'modification')
if not cache_date then return false end
local ori_date = assert(lfs.attributes(ori, 'modification'))
return cache_date > ori_date
end
-- make sure a given directory exists, or return nil plus an error string
local function mkdir_p(dir)
if os.type == 'windows' and chgstrcp then
if lfs.isdir(chgstrcp.syscptoutf8(dir)) then return true end
else
if lfs.isdir(dir) then return true end
end
local parent = texdoc.util.path_parent(dir)
if parent then
local ok, msg = mkdir_p(parent)
if not ok then return nil, msg end
end
if os.type == 'windows' and chgstrcp then
return lfs.mkdir(chgstrcp.syscptoutf8(dir))
else
return lfs.mkdir(dir)
end
end
local print_out_tlpinfo, get_doclist_tlpdb
local get_tlpinfo_from_tlpdb, get_tlpinfo_from_cache, get_tlpinfo_from_dist
do -- begin scope of tlpinfo tables
local tlp_from_runfile -- {[runfile_basename] = {tlp1 = true, ...}, ...}
local tlp_doclist -- {[tlp_name] = {relname1, relname2, ...}, ...}
-- remove entries for tlp without any docfile
local function remove_useless_tlp()
for tlp, doclist in pairs(tlp_doclist) do
if #doclist == 0 then tlp_doclist[tlp] = nil end
end
for runfile, tlp_set in pairs(tlp_from_runfile) do
for tlp in pairs(tlp_set) do
if not tlp_doclist[tlp] then
tlp_from_runfile[runfile][tlp] = nil
end
end
end
end
-- populate tlpinfo tables using the given texlive.tlpdb
get_tlpinfo_from_tlpdb = function(filename)
s_meta, tlp_from_runfile, tlp_doclist = {}, {}, {}
local curr_tlp
local state = 'none'
for line in io.lines(filename) do
if state == 'none' and line:find('^name ') then
-- begin a new package
curr_tlp = line:sub(6, -1):lower()
tlp_doclist[curr_tlp] = {}
elseif state == 'docfiles' then
if not line:find('^ ') then
state = 'none'
else
local file = line:match('^ ([^ ]*)')
local meta = line:match('^ [^ ]* (.+)')
local basename = file:match('([^/]*)$')
if check_ext(basename) then
-- we've got a docfile here, add it
table.insert(tlp_doclist[curr_tlp], file)
if meta then
local details = meta:match('details="([^"]+)"')
local lang = meta:match('language="([^"]+)"')
s_meta[reloc_tlpdb_path(file)] = {
details = details,
lang = lang,
}
end
end
end
elseif state == 'runfiles' then
if not line:find('^ ') then
state = 'none'
else
-- check for interesting runfiles
local e = line:sub(-4, -1)
if e == '.tex' or e == '.sty' or e == '.cls' then
local f = line:match('.*/(.*)%.')
tlp_from_runfile[f] = tlp_from_runfile[f] or {}
tlp_from_runfile[f][curr_tlp] = true
end
end
end
-- update state
if line:find('^docfiles ') then
state = 'docfiles'
elseif line:find('^runfiles ') then
state = 'runfiles'
end
end
remove_useless_tlp()
end
-- print out data from tlpdb in dofile()-able form
print_out_tlpinfo = function(filename)
local fh
if os.type == 'windows' and chgstrcp then
fh = assert(io.open(chgstrcp.syscptoutf8(filename), 'w'))
else
fh = assert(io.open(filename, 'w'))
end
local function printf(s, ...) fh:write(string.format(s, ...)) end
-- s_meta
printf('local s_meta = {\n')
for k, v in pairs(s_meta) do
printf(' [%q] = {', k)
for i, j in pairs(v) do printf('[%q] = %q, ', i, j) end
printf('},\n')
end
printf('}\n')
-- tlp_from_runfile
printf('local tlp_from_runfile = {\n')
for k, v in pairs(tlp_from_runfile) do
printf(' [%q] = {', k)
for f in pairs(v) do printf('[%q]=true,', f) end
printf('},\n')
end
printf('}\n')
-- tlp_doclist
printf('local tlp_doclist = {\n')
for k, v in pairs(tlp_doclist) do
printf(' [%q] = {\n', k)
for _, f in ipairs(v) do printf(' %q,\n', f) end
printf(' },\n')
end
printf('}\n')
printf('return s_meta, tlp_from_runfile, tlp_doclist\n')
fh:close()
end
-- get pre-hashed tlpdb info from a cache file
get_tlpinfo_from_cache = function(filename)
s_meta, tlp_from_runfile, tlp_doclist = dofile(filename)
end
-- get pre-hashed tlpdb info from a pseudo-cache file
get_tlpinfo_from_dist = function()
local f = kpse.find_file(C.data_tlpdb_name, 'texmfscripts')
if not f then
err_print('error', 'No texlive.tlpdb nor shipped tlpdb data found.')
os.exit(C.exit_usage)
end
dbg_print('tlpdb', 'Getting data from shipped tlpdb data file ' .. f)
s_meta, tlp_from_runfile, tlp_doclist = dofile(f)
end
-- get docfiles for pattern using specific tlpdb information
get_doclist_tlpdb = function(pattern)
-- runfile to tlp to docfile
if tlp_from_runfile[pattern] then
for tlp in pairs(tlp_from_runfile[pattern]) do
for _, file in ipairs(tlp_doclist[tlp]) do
s_doclist:add(Docfile:new{
name = file,
tree = 0,
runtodoc = true,
})
end
end
end
-- tlp name to docfile
if tlp_doclist[pattern] then
for _, file in ipairs(tlp_doclist[pattern]) do
s_doclist:add(Docfile:new{
name = file,
tree = 0,
tlptodoc = true,
})
end
end
end
-- calculating Levenshtein distance by dynamic programming
-- cf. http://lua-users.org/lists/lua-l/2008-01/msg00095.html
function M.levenshtein(s, t)
local s, t = tostring(s), tostring(t)
if type(s) == 'string' and type(t) == 'string' then
local m, n, d = #s, #t, {}
for i = 0, m do d[i] = {[0] = i} end
for j = 1, n do d[0][j] = j end
for i = 1, m do
for j = 1, n do
local cost = s:sub(i,i) == t:sub(j,j) and 0 or 1
d[i][j] = math.min(
d[i-1][j] + 1, d[i][j-1] + 1, d[i-1][j-1] + cost
)
end
end
return d[m][n]
end
end
-- fuzzy search by using Levenshtein distance
function M.fuzzy_search(pattern)
local tmp_d
local min = math.huge
local result = ''
for p in pairs(tlp_doclist) do
tmp_d = M.levenshtein(pattern, p)
if tmp_d < min then
min, result = tmp_d, p
end
end
return result, min
end
end -- end scope of tlpinfo table
-- get tlpinfo tables initialised by whatever mean
local function init_tlp_database()
-- we assume TEXMFVAR always consists of an element
local cache_file = kpse.expand_var('$TEXMFVAR/' .. C.cache_name)
-- set vanilla and detect texlive.tlpdb to use
local texlive_tlpdb = get_tlroot() .. '/tlpkg/texlive.tlpdb'
vanilla = lfs.isfile(texlive_tlpdb)
local tlpdb_found = vanilla
local custom_texlive_tlpdb = texdoc.config.get_value('texlive_tlpdb')
if custom_texlive_tlpdb then
if lfs.isfile(custom_texlive_tlpdb) then
texlive_tlpdb = custom_texlive_tlpdb
tlpdb_found = true
else
err_print('warning',
'Specified texlive.tlpdb does not exist: ' ..
texdoc.util.w32_path(custom_texlive_tlpdb))
err_print('warning',
'Fallback to use the texlive.tlpdb in the distribution.')
end
end
-- get tlpinfo
if tlpdb_found then
if good_cache(cache_file, texlive_tlpdb) then
dbg_print('tlpdb', 'Using cached data from ' .. cache_file)
get_tlpinfo_from_cache(cache_file)
else
dbg_print('tlpdb',
'Getting data from tlpdb file ' .. texlive_tlpdb)
get_tlpinfo_from_tlpdb(texlive_tlpdb)
dbg_print('tlpdb', 'Writing data in cache file ' .. cache_file)
local ok, msg = mkdir_p(texdoc.util.path_parent(cache_file))
if not ok then
err_print('warning',
'Failed to create cache file in %s:', cache_file)
err_print('warning', msg)
else
print_out_tlpinfo(cache_file)
end
end
else
dbg_print('tlpdb', 'Using shipped tlpdb data.')
get_tlpinfo_from_dist()
end
end
----------------------------- main function ------------------------------
-- initialise the various databases (must be called first)
function M.init_databases()
init_texdocs_database()
init_tlp_database()
end
-- find docfiles according to pattern
function M.get_doclist(pattern, no_alias)
dbg_print('search', 'Searching documents for pattern "%s"', pattern)
-- separate sty patterns from the rest
local function normal_vs_sty(list)
local normal, sty = {}, {}
for _, p in ipairs(list) do
if p.name:lower():match('%.([^/.]*)$') == 'sty' then
table.insert(sty, p)
else
table.insert(normal, p)
end
end
return normal, sty
end
local function doc_search(pattern, no_alias)
-- get patterns (inc. aliases)
local patterns = texdoc.alias.get_patterns(pattern, no_alias)
local normal, sty = normal_vs_sty(patterns)
-- get results; _texdocs search comes after _tlpdb search so that
-- files found by both will have the priority of the _texdocs tree.
-- (https://puszcza.gnu.org.ua/bugs/?369)
get_doclist_sty(sty)
get_doclist_tlpdb(pattern)
get_doclist_texdocs(normal)
end
-- initialise result list
s_doclist = Doclist:new()
-- 1. normal search with the input pattern
doc_search(pattern, no_alias)
-- 2. if no result, execute fuzzy search
local fuzzy_level = texdoc.config.get_value('fuzzy_level')
if not s_doclist[1] and fuzzy_level > 0 then
local f_res, f_lev = M.fuzzy_search(pattern)
if f_lev <= fuzzy_level then
err_print('info', 'Fuzzy search result: ' .. f_res)
dbg_print('search', 'Levenshtein distance: ' .. f_lev)
pattern = f_res
doc_search(pattern, no_alias)
else
dbg_print('search', 'Fuzzy search result: ' .. f_res)
dbg_print('search', 'Levenshtein distance: ' .. f_lev)
end
end
-- finally, sort results
texdoc.score.sort_doclist(s_doclist, pattern)
return s_doclist
end
return M
-- vim: ft=lua: