HEX
Server: Apache
System: Linux pdx1-shared-a1-38 6.6.104-grsec-jammy+ #3 SMP Tue Sep 16 00:28:11 UTC 2025 x86_64
User: mmickelson (3396398)
PHP: 8.1.31
Disabled: NONE
Upload Files
File: //usr/share/texlive/texmf-dist/scripts/texdoc/texdoclib-score.tlu
-- texdoclib-score.tlu: scoring functions for texdoc
--
-- The TeX Live Team, GPLv3, see texdoclib.tlu for details

-- dependencies
local md5 = require 'md5'
local texdoc = {
    util = require 'texdoclib-util',
    config = require 'texdoclib-config',
}

-- shortcuts
local M = {}

-- shared variables
local global_adjscore, spec_adjscore = {}, {}

-------------------------   configuration directives   -------------------------

-- set key in score table to val, without overriding
local function set_score_table(tab, key, val)
    local k = string.lower(key)
    local v = tonumber(val)
    if v then
        if tab[k] == nil then tab[k] = v end
        return true
    end
    return false
end

-- interpret a confline as a score directive or return false
function M.confline_to_score(line, file, pos)
    local keyw, pat, val
    -- try global adjscore
    pat, val = string.match(line, '^adjscore%s+([%w%p]+)%s*=%s*([%d+-.]+)')
    if pat and val then
        return set_score_table(global_adjscore, pat, val)
    end
    -- try keyword specific adjscore
    keyw, pat, val = string.match(line,
        '^adjscore%(([%w%p]+)%)%s+([%w%p]+)%s*=%s*([%d+-.]+)')
    if keyw and pat and val then
        keyw = string.lower(keyw)
        spec_adjscore[keyw] = spec_adjscore[keyw] or {}
        return set_score_table(spec_adjscore[keyw], pat, val)
    end
    return false
end

----------------------------   score computation   -----------------------------

-- says if pat is a "subword" of str
local function is_subword(str, pat)
    local function is_delim(str, i)
        return not not string.find(string.sub(str, i, i), '%p')
    end

    local i, j = string.find(str, pat, 1, true)
    return not not (i and j
        and (i == 1 or is_delim(str, i) or is_delim(str, i-1))
        and (j == #str or is_delim(str, j) or is_delim(str, j+1)))
end

-- says if a filename has a bad basename
local function has_bad_basename(file)
    file = file:gsub('.*/', '')
    for _, b in ipairs(texdoc.config.get_value('badbasename_list')) do
        if file:find('^' .. b .. '$') or file:find('^' .. b .. '%.') then
            return true
        end
    end
    return false
end

-- compute a heuristic score -10 <= s < 10
local function heuristic_score(file, pat, dbg_score)
    dbg_score('Start heuristic scoring with pattern: ' .. pat)

    -- score management
    local score = -10
    local function upscore(s, reason, force)
        if s > score or force then
            score = s
            dbg_score('New heuristic score: %.1f. Reason: %s', s, reason)
        end
    end
    local slash = not not string.find(pat, '/', 1, true)

    -- look for exact or subword match
    if M.is_exact_locale(file, pat) then
        upscore(5, 'exact match with correct locale')
    elseif M.is_exact(file, pat) then
        upscore(4, 'exact match')
    elseif is_subword(file, pat) then
        upscore(1, 'subword match')
    end

    -- try derivatives unless pat contains a slash
    if not slash then
        for _, suffix in ipairs(texdoc.config.get_value('suffix_list')) do
            local deriv = pat .. suffix
            if M.is_exact(file, deriv) then
                upscore(4.5, 'exact match for derived pattern: ' .. deriv)
            elseif is_subword(file, deriv) then
                upscore(3.5, 'subword match for derived pattern: ' .. deriv)
            end
        end
    end

    -- if extension is bad, score becomes an epsilon
    local ext = texdoc.config.get_value('ext_list')[M.ext_pos(file)]
    if ext and texdoc.config.get_value('badext_list_inv')[ext] and score > 0 then
        upscore(0.1, 'bad extension', true)
    end

    -- if basename is bad, score becomes an epsilon
    if has_bad_basename(file) and score > 0 then
        upscore(0.1, 'bad basename', true)
    end

    -- bonus for being in the right directory
    if string.find('/' .. file, '/' .. pat .. '/', 1, true) and not slash then
        upscore(score + 1.5, 'directory bonus')
    end

    -- done
    dbg_score('Final heuristic score: %.1f', score)
    return score
end

-- set the score of a docfile
local function set_score(df, original_kw)
    -- scoring is case-insensitive (patterns are already lowercased)
    local name = string.lower(df.normname)
    local df_id = string.sub(md5.sumhexa(name), 1, 7)

    -- special debugging function
    local function dbg_score(msg, ...)
        -- add the hash id prefix to make the outputs grep-friendly
        local msg = string.format('(%s) ', df_id) .. msg
        texdoc.util.dbg_print('score', msg, ...)
    end

    dbg_score('Start scoring ' .. df.realpath)
    dbg_score('Name used: ' .. name)

    -- get score from patterns
    local score = -10
    for _, pat in ipairs(df.matches) do
        local s = -10
        local p = string.lower(pat.name)
        if pat.original then  -- non-alias
            s = df.tree > -1 and heuristic_score(name, p, dbg_score) or 1
        elseif M.is_exact(name, p) then  -- alias
            local bonus, note = 0, ''
            if pat.locale then
                bonus, note = 5, ', (language-based)'
            end
            s = (pat.score or 10) + bonus  -- default alias score is 10
            dbg_score('Matching alias "%s", score: %.1f%s', pat.name, s, note)
        end
        if s > score then score = s end
    end
    dbg_score('Max pattern score: %.1f', score)

    -- get score from tlp associations
    if score == -10 and df.tlptodoc then
        score = -1
        dbg_score('New score: %.1f from package name association', score)
    end

    if score == -10 and df.runtodoc then
        score = -5
        dbg_score('New score: %.1f from sty/cls association', score)
    end

    -- bonus for metadata
    if df.details then
        if string.find(string.lower(df.details), 'readme') then
            score = score + 0.1
            dbg_score('Catalogue "readme" bonus: +0.1')
        else
            score = score + 1.5
            dbg_score('Catalogue details bonus: +1.5')
        end
    end

    -- adjust from keyword-specific tables
    if df.tree > -1 and spec_adjscore[original_kw] then
        for pat, val in pairs(spec_adjscore[original_kw]) do
            if val and is_subword('/' .. name, pat) then
                score = score + val
                dbg_score('Adjust by %.1f from specific pattern "%s"', val, pat)
            end
        end
    end

    -- adjust from global tables
    if df.tree > -1 then
        for pat, val in pairs(global_adjscore) do
            if val and is_subword('/' .. name, pat) then
                if score > -10 or val < 0 then score = score + val end
                dbg_score('Adjust by %.1f from global pattern "%s"', val, pat)
            end
        end
    end

    dbg_score('Final score: %.1f', score)

    -- the final score should be a float value
    df.score = score + 0.0
end

-- set the scores for a doclist
local function set_list_scores(list, original_kw)
    for _, df in ipairs(list) do
        set_score(df, original_kw)
    end
end

-- says if file is an exact match for pat
function M.is_exact(file, pat)
    file = texdoc.util.parse_zip(file)
    local slashes = string.gsub(pat, '[^/]+', '[^/]+')
    basename = string.match(file, slashes .. '$')
    if not basename then return nil end
    if basename == pat then return true end
    for _, ext in ipairs(texdoc.config.get_value('ext_list')) do
        if ext ~= '' and ext ~= '*' and basename == pat .. '.' .. ext then
            return true
        end
    end
    return false
end

-- says if file is an exact match for pat and the current locale
function M.is_exact_locale(file, pat)
    if string.match(pat, '%-%l%l%l?$') then
        -- don't match if the pattern appears to include a language code
        return false
    end
    local lang = texdoc.config.get_value('lang')
    if lang then
        return M.is_exact(file, pat .. '-' .. lang)
            or M.is_exact(file, lang .. '-' .. pat)
    end
    return false
end

-- compare two docfile's: (see texdoclib-search.tlu for structure)
-- 1. by score
-- 2. then by extensions (ordered as in ext_list),
-- 3. then lexicographically by normname.
-- 4. then by tree.
-- return true if a is better than b
local function docfile_order(a, b)
    if     a.score > b.score       then return true
    elseif a.score < b.score       then return false
    elseif a.ext_pos < b.ext_pos   then return true
    elseif a.ext_pos > b.ext_pos   then return false
    elseif a.normname < b.normname then return true
    elseif a.normname > b.normname then return false
    else return (a.tree > b.tree)
    end
end

-----------------------------   public functions   -----------------------------

-- returns the index of the most specific extension of file in ext_list,
-- or config.ext_list_max + 1
function M.ext_pos(file)
    -- remove zipext if applicable
    file = texdoc.util.parse_zip(file)
    -- now find the extension
    local p, e, pos, ext
    for p, e in ipairs(texdoc.config.get_value('ext_list')) do
        if (e == '*') and (ext == nil) then
            pos, ext = p, e
        elseif (e == '') and not string.find(file, '.', 1, true) then
            pos, ext = p, e
        elseif string.sub(file, -string.len(e)-1) == '.' .. e then
            if (ext == nil) or (ext == '*')
                or (string.len(e) > string.len(ext)) then
                pos, ext = p, e
            end
        end
    end
    return pos or (texdoc.config.get_value('ext_list_max') + 1)
end

-- return the "quality" of docfile
function M.docfile_quality(df)
    if df.score > 0 then
        return 'good'
    elseif df.score > -100 then
        return 'bad'
    else
        return 'killed'
    end
end

-- sort a doclist
function M.sort_doclist(dl, original_kw)
    dl:stop()
    set_list_scores(dl, original_kw)
    table.sort(dl, docfile_order)
end

return M

-- vim: ft=lua: