Module:PageListTools

From Roat Pkz
Jump to navigation Jump to search

Documentation for this module may be created at Module:PageListTools/doc

local p = {}

local libraryUtil = require('libraryUtil')
local checkType = libraryUtil.checkType
local arr = require('Module:Array')
local dpl = require('Module:DPLlua')

-- Given a list of pages, this function returns lists of pages that are either
-- non-existing (invalid), redirects or duplicates.
-- The maximum list length is constrained only by MediaWiki's maximum 
-- 'Expensive parser function count', which at the time of writing is set to 100.
function p.pagelistchecks(pages, logging)
    checkType('Module:PageListTools.pagelistchecks', 1, pages, 'table')
    checkType('Module:PageListTools.pagelistchecks', 2, logging, 'boolean', true)

    local invalid = {}
    local redirect = {}
    local duplicate = {}
    local pageid = {}

    local t1 = os.clock()
    for _, page in ipairs(pages) do
        local title = mw.title.new(page, '')

        if title.exists then 
            if title.isRedirect then
                table.insert(redirect, page)
            end

            if arr.contains(pageid, title.id) then
                table.insert(duplicate, page)
            else
                table.insert(pageid, title.id)
            end
        else
            table.insert(invalid, page)
        end
    end
    local t2 = os.clock()

    if logging then
        mw.log(string.format('Checks (pagelist): total pages: %d, non-existing: %d, redirects: %d, duplicates: %d, time elapsed: %.3f ms.',
            #pages, #invalid, #redirect, #duplicate, (t2 - t1) * 1000))
    end

    return {
        invalid   = invalid,
        redirect  = redirect,
        duplicate = duplicate 
    }
end

-- Returns a list of pages with the specified categories. Uses DPL.
-- Each entry can consist of several categories AND-ed together with '&'.
function p.pageswithcatsdpl(cats, logging)
    checkType('Module:PageListTools.pageswithcatsdpl', 1, cats, 'table')
    checkType('Module:PageListTools.pageswithcatsdpl', 2, logging, 'boolean', true)
    assert(#cats > 0, 'You must supply at least one category')

    local catlist = {}
    for _, c in ipairs(cats) do
        table.insert(catlist, { category = c })
    end

    return p.pageswithconditionsdpl(catlist, logging)
end

-- Returns a list of pages that satisfies the given DPL conditions
function p.pageswithconditionsdpl(conditions, logging)
    checkType('Module:PageListTools.pageswithconditionsdpl', 1, conditions, 'table')
    checkType('Module:PageListTools.pageswithconditionsdpl', 2, logging, 'boolean', true)

    local plistraw = {}

    local t1 = os.clock()
    for _, c in ipairs(conditions) do
        local pages = dpl.ask(c)

        for _, p in ipairs(pages) do
            table.insert(plistraw, p)
        end
    end
    local t2 = os.clock()

    local plist = arr.unique(plistraw)
    table.sort(plist)

    if logging then
        mw.log(string.format('DPL (pagelist): found: %d, unique: %d, time elapsed: %.3f ms.',
            #plistraw, #plist, (t2 - t1) * 1000))
    end

    return plist
end

-- Returns a list of pages with the specified categories. Uses SMW.
-- Each entry can consist of several categories AND-ed together with ' '.
function p.pageswithcats(cats, logging)
    checkType('Module:PageListTools.pageswithcats', 1, cats, 'table')
    checkType('Module:PageListTools.pageswithcats', 2, logging, 'boolean', true)
    assert(#cats > 0, 'You must supply at least one category')

    return p.pageswithconditions(table.concat(cats, ' OR '), logging)
end

-- Returns a list of pages that satisfies the given SMW conditions
function p.pageswithconditions(conditions, logging)
    checkType('Module:PageListTools.pageswithconditions', 1, conditions, 'string')
    checkType('Module:PageListTools.pageswithconditions', 2, logging, 'boolean', true)

    -- Build query
    local query = {}
    table.insert(query, conditions)
    table.insert(query, '?=#-')
    query.offset = 0
    query.limit = 1000

    -- Fetch data
    --mw.logObject (query)
    local t1 = os.clock()
    local smw = mw.smw.ask(query)
    local t2 = os.clock()
    if not smw then
        smw = {}
    end

    -- Post-process
    local data = {}

    for _, e in ipairs(smw) do
        table.insert(data, e[1])
    end

    assert(#smw == #data)

    -- Statistics
    if logging then
        mw.log(string.format('SMW (pagelist): found %i, offset %i, limit %i, time elapsed %.3f ms.', 
            #smw, query.offset, query.limit, (t2 - t1) * 1000))
    end

    return data
end

--[=[ DEBUG COPYPASTA
mw.logObject( p.pagelistchecks({'Verac\'s brassard#Undamaged', 'Verac\'s flail#Undamaged', 'Verac\'s helm#Undamaged', 'Verac\'s plateskirt#Undamaged', 'Verac\'s helm#Undamaged', 'Verac\'s plateskirt#Undamaged', 'Nonexistent', 'Area-51', 'Addy scim', 'Rune scim'}, true) )
mw.logObject( p.pageswithcatsdpl({'Monsters&Discontinued content', 'Monsters&Deadman Mode'}, true) )
mw.logObject( p.pageswithcats({'[[Category:Monsters]] [[Category:Discontinued content]]', '[[Category:Monsters]] [[Category:Deadman Mode]]'}, true) )
mw.logObject( p.pageswithconditionsdpl({{ category = 'Monsters&Discontinued content' }}, true) )
mw.logObject( p.pageswithconditions('[[Category:Monsters]] [[Category:Discontinued content]]', true) )
--]=]

return p