--[[[ -- @module lua_maps -- This module contains helper functions for managing rspamd maps --]] --[[ Copyright (c) 2022, Vsevolod Stakhov Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ]]-- local rspamd_logger = require "rspamd_logger" local ts = require("tableshape").types local lua_util = require "lua_util" local exports = {} local maps_cache = {} local function map_hash_key(data, mtype) local hash = require "rspamd_cryptobox_hash" local st = hash.create_specific('xxh64') st:update(data) st:update(mtype) return st:hex() end local function starts(where,st) return string.sub(where,1,string.len(st))==st end local function cut_prefix(where,st) return string.sub(where,#st + 1) end local function maybe_adjust_type(data,mtype) local function check_prefix(prefix, t) if starts(data, prefix) then data = cut_prefix(data, prefix) mtype = t return true end return false end local known_types = { {'regexp;', 'regexp'}, {'re;', 'regexp'}, {'regexp_multi;', 'regexp_multi'}, {'re_multi;', 'regexp_multi'}, {'glob;', 'glob'}, {'glob_multi;', 'glob_multi'}, {'radix;', 'radix'}, {'ipnet;', 'radix'}, {'set;', 'set'}, {'hash;', 'hash'}, {'plain;', 'hash'}, {'cdb;', 'cdb'}, {'cdb:/', 'cdb'}, } if mtype == 'callback' then return mtype end for _,t in ipairs(known_types) do if check_prefix(t[1], t[2]) then return data,mtype end end -- No change return data,mtype end local external_map_schema = ts.shape{ external = ts.equivalent(true), -- must be true backend = ts.string, -- where to get data, required method = ts.one_of{"body", "header", "query"}, -- how to pass input encode = ts.one_of{"json", "messagepack"}:is_optional(), -- how to encode input (if relevant) timeout = (ts.number + ts.string / lua_util.parse_time_interval):is_optional(), } local rspamd_http = require "rspamd_http" local ucl = require "ucl" local function url_encode_string(str) str = string.gsub(str, "([^%w _%%%-%.~])", function(c) return string.format("%%%02X", string.byte(c)) end) str = string.gsub(str, " ", "+") return str end assert(url_encode_string('上海+中國') == '%E4%B8%8A%E6%B5%B7%2B%E4%B8%AD%E5%9C%8B') assert(url_encode_string('? and the Mysterians') == '%3F+and+the+Mysterians') local function query_external_map(map_config, upstreams, key, callback, task) local http_method = (map_config.method == 'body' or map_config.method == 'form') and 'POST' or 'GET' local upstream = upstreams:get_upstream_round_robin() local http_headers = { ['Accept'] = '*/*' } local http_body = nil local url = map_config.backend if type(key) == 'string' or type(key) == 'userdata' then if map_config.method == 'body' then http_body = key http_headers['Content-Type'] = 'text/plain' elseif map_config.method == 'header' then http_headers = { key = key } elseif map_config.method == 'query' then url = string.format('%s?key=%s', url, url_encode_string(tostring(key))) end elseif type(key) == 'table' then if map_config.method == 'body' then if map_config.encode == 'json' then http_body = ucl.to_format(key, 'json-compact', true) http_headers['Content-Type'] = 'application/json' elseif map_config.encode == 'messagepack' then http_body = ucl.to_format(key, 'messagepack', true) http_headers['Content-Type'] = 'application/msgpack' else local caller = debug.getinfo(2) or {} rspamd_logger.errx(task, "requested external map key with a wrong combination body method and missing encode; caller: %s:%s", caller.short_src, caller.currentline) callback(false, 'invalid map usage', 500, task) end else -- query/header and no encode if map_config.method == 'query' then local params_table = {} for k,v in pairs(key) do if type(v) == 'string' then table.insert(params_table, string.format('%s=%s', url_encode_string(k), url_encode_string(v))) end end url = string.format('%s?%s', url, table.concat(params_table, '&')) elseif map_config.method == 'header' then http_headers = key else local caller = debug.getinfo(2) or {} rspamd_logger.errx(task, "requested external map key with a wrong combination of encode and input; caller: %s:%s", caller.short_src, caller.currentline) callback(false, 'invalid map usage', 500, task) return end end end local function map_callback(err, code, body, _) if err then callback(false, err, code, task) elseif code == 200 then callback(true, body, 200, task) else callback(false, err, code, task) end end local ret = rspamd_http.request{ task = task, url = url, callback = map_callback, timeout = map_config.timeout or 1.0, keepalive = true, upstream = upstream, method = http_method, headers = http_headers, body = http_body, } if not ret then callback(false, 'http request error', 500, task) end end --[[[ -- @function lua_maps.map_add_from_ucl(opt, mtype, description) -- Creates a map from static data -- Returns true if map was added or nil -- @param {string or table} opt data for map (or URL) -- @param {string} mtype type of map (`set`, `map`, `radix`, `regexp`) -- @param {string} description human-readable description of map -- @param {function} callback optional callback that will be called on map match (required for external maps) -- @return {bool} true on success, or `nil` --]] local function rspamd_map_add_from_ucl(opt, mtype, description, callback) local ret = { get_key = function(t, k, key_callback, task) if t.__data then local cb = key_callback or callback if t.__external then if not cb or not task then local caller = debug.getinfo(2) or {} rspamd_logger.errx(rspamd_config, "requested external map key without callback or task; caller: %s:%s", caller.short_src, caller.currentline) return nil end query_external_map(t.__data, t.__upstreams, k, cb, task) else local result = t.__data:get_key(k) if cb then if result then cb(true, result, 200, task) else cb(false, 'not found', 404, task) end else return result end end end return nil end, foreach = function(t, cb) return t.__data:foreach(cb) end, on_load = function(t, cb) t.__data:on_load(cb) end } local ret_mt = { __index = function(t, k, key_callback, task) if t.__data then return t.get_key(k, key_callback, task) end return nil end } if not opt then return nil end local function maybe_register_selector() if opt.selector_alias then local lua_selectors = require "lua_selectors" lua_selectors.add_map(opt.selector_alias, ret) end end if type(opt) == 'string' then opt,mtype = maybe_adjust_type(opt, mtype) local cache_key = map_hash_key(opt, mtype) if not callback and maps_cache[cache_key] then rspamd_logger.infox(rspamd_config, 'reuse url for %s(%s)', opt, mtype) return maps_cache[cache_key] end -- We have a single string, so we treat it as a map local map = rspamd_config:add_map{ type = mtype, description = description, url = opt, } if map then ret.__data = map ret.hash = cache_key setmetatable(ret, ret_mt) maps_cache[cache_key] = ret return ret end elseif type(opt) == 'table' then local cache_key = lua_util.table_digest(opt) if not callback and maps_cache[cache_key] then rspamd_logger.infox(rspamd_config, 'reuse url for complex map definition %s: %s', cache_key:sub(1,8), description) return maps_cache[cache_key] end if opt[1] then -- Adjust each element if needed local adjusted for i,source in ipairs(opt) do local nsrc,ntype = maybe_adjust_type(source, mtype) if mtype ~= ntype then if not adjusted then mtype = ntype end adjusted = true end opt[i] = nsrc end if mtype == 'radix' then if string.find(opt[1], '^%d') then local map = rspamd_config:radix_from_ucl(opt) if map then ret.__data = map setmetatable(ret, ret_mt) maps_cache[cache_key] = ret maybe_register_selector() return ret end else -- Plain table local map = rspamd_config:add_map{ type = mtype, description = description, url = opt, } if map then ret.__data = map setmetatable(ret, ret_mt) maps_cache[cache_key] = ret maybe_register_selector() return ret end end elseif mtype == 'regexp' or mtype == 'glob' then if string.find(opt[1], '^/%a') or string.find(opt[1], '^http') then -- Plain table local map = rspamd_config:add_map{ type = mtype, description = description, url = opt, } if map then ret.__data = map setmetatable(ret, ret_mt) maps_cache[cache_key] = ret maybe_register_selector() return ret end else local map = rspamd_config:add_map{ type = mtype, description = description, url = { url = 'static', data = opt, } } if map then ret.__data = map setmetatable(ret, ret_mt) maps_cache[cache_key] = ret maybe_register_selector() return ret end end else if string.find(opt[1], '^/%a') or string.find(opt[1], '^http') then -- Plain table local map = rspamd_config:add_map{ type = mtype, description = description, url = opt, } if map then ret.__data = map setmetatable(ret, ret_mt) maps_cache[cache_key] = ret maybe_register_selector() return ret end else local data = {} local nelts = 0 -- Plain array of keys, count merely numeric elts for _,elt in ipairs(opt) do if type(elt) == 'string' then -- Numeric table if mtype == 'hash' then -- Treat as KV pair local pieces = lua_util.str_split(elt, ' ') if #pieces > 1 then local key = table.remove(pieces, 1) data[key] = table.concat(pieces, ' ') else data[elt] = true end else data[elt] = true end nelts = nelts + 1 end end if nelts > 0 then -- Plain Lua table that is used as a map ret.__data = data ret.get_key = function(t, k) if k ~= '__data' then return t.__data[k] end return nil end ret.foreach = function(_, func) for k,v in pairs(ret.__data) do if not func(k, v) then return false end end return true end ret.on_load = function(_, cb) rspamd_config:add_on_load(function(_, _, _) cb() end) end maps_cache[cache_key] = ret maybe_register_selector() return ret else -- Empty map, huh? rspamd_logger.errx(rspamd_config, 'invalid map element: %s', opt) end end end else if opt.external then -- External map definition, missing fields are handled by schema local parse_res,parse_err = external_map_schema(opt) if parse_res then ret.__upstreams = lua_util.http_upstreams_by_url(rspamd_config:get_mempool(), opt.backend) if ret.__upstreams then ret.__data = opt ret.__external = true setmetatable(ret, ret_mt) maybe_register_selector() return ret else rspamd_logger.errx(rspamd_config, 'cannot parse external map upstreams: %s', opt.backend) end else rspamd_logger.errx(rspamd_config, 'cannot parse external map: %s', parse_err) end else -- Adjust lua specific augmentations in a trivial case if type(opt.url) == 'string' then local nsrc,ntype = maybe_adjust_type(opt.url, mtype) if nsrc and ntype then opt.url = nsrc mtype = ntype end end -- We have some non-trivial object so let C code to deal with it somehow... local map = rspamd_config:add_map{ type = mtype, description = description, url = opt, } if map then ret.__data = map setmetatable(ret, ret_mt) maps_cache[cache_key] = ret maybe_register_selector() return ret end end end -- opt[1] end return nil end --[[[ -- @function lua_maps.map_add(mname, optname, mtype, description) -- Creates a map from configuration elements (static data or URL) -- Returns true if map was added or nil -- @param {string} mname config section to use -- @param {string} optname option name to use -- @param {string} mtype type of map ('set', 'hash', 'radix', 'regexp', 'glob') -- @param {string} description human-readable description of map -- @param {function} callback optional callback that will be called on map match (required for external maps) -- @return {bool} true on success, or `nil` --]] local function rspamd_map_add(mname, optname, mtype, description, callback) local opt = rspamd_config:get_module_opt(mname, optname) return rspamd_map_add_from_ucl(opt, mtype, description, callback) end exports.rspamd_map_add = rspamd_map_add exports.map_add = rspamd_map_add exports.rspamd_map_add_from_ucl = rspamd_map_add_from_ucl exports.map_add_from_ucl = rspamd_map_add_from_ucl -- Check `what` for being lua_map name, otherwise just compares key with what local function rspamd_maybe_check_map(key, what) local fun = require "fun" if type(what) == "table" then return fun.any(function(elt) return rspamd_maybe_check_map(key, elt) end, what) end if type(rspamd_maps) == "table" then local mn if starts(key, "map:") then mn = string.sub(key, 5) elseif starts(key, "map://") then mn = string.sub(key, 7) end if mn and rspamd_maps[mn] then return rspamd_maps[mn]:get_key(what) end end return what:lower() == key end exports.rspamd_maybe_check_map = rspamd_maybe_check_map --[[[ -- @function lua_maps.fill_config_maps(mname, options, defs) -- Fill maps that could be defined in defs, from the config in the options -- Defs is a table indexed by a map's parameter name and defining it's config, -- for example: defs = { my_map = { type = 'map', description = 'my cool map', optional = true, } } -- Then this function will look for opts.my_map parameter and try to replace it's with -- a map with the specific type, description but not failing if it was empty. -- It will also set options.my_map_orig to the original value defined in the map --]] exports.fill_config_maps = function(mname, opts, map_defs) assert(type(opts) == 'table') assert(type(map_defs) == 'table') for k, v in pairs(map_defs) do if opts[k] then local map = rspamd_map_add_from_ucl(opts[k], v.type or 'map', v.description) if not map then rspamd_logger.errx(rspamd_config, 'map add error %s for module %s', k, mname) return false end opts[k..'_orig'] = opts[k] opts[k] = map elseif not v.optional then rspamd_logger.errx(rspamd_config, 'cannot find non optional map %s for module %s', k, mname) return false end end return true end local direct_map_schema = ts.shape{ -- complex object name = ts.string:is_optional(), description = ts.string:is_optional(), selector_alias = ts.string:is_optional(), -- an optional alias for the selectos framework timeout = ts.number, data = ts.array_of(ts.string):is_optional(), -- Tableshape has no options support for something like key1 or key2? upstreams = ts.one_of{ ts.string, ts.array_of(ts.string), }:is_optional(), url = ts.one_of{ ts.string, ts.array_of(ts.string), }:is_optional(), } exports.map_schema = ts.one_of{ ts.string, -- 'http://some_map' ts.array_of(ts.string), -- ['foo', 'bar'] ts.one_of{direct_map_schema, external_map_schema} } return exports