OPA进阶-函数与虚拟文档

本文来讲讲OPA常用的函数（function）和虚拟文档（virtual document），以及他们使用的场景。

我们将以实现判断配置文件为例展开。

文章目录

函数
虚拟文档
适用场景

用到的输入(配置文件列表)为：

// input.json
// posix为常见路径格式
// traditional-mac为mac的一种文件路径格式
{
  "files": [
  {
    "type": "posix",
    "path": "/Users/newbmiao/Documents/1.yaml"
  },
  {
    "type": "posix",
    "path": "/Users/newbmiao/Documents/2.yaml"
  },
  {
    "type": "traditional-mac",
    "path": "Macintosh HD:Users:newbmiao:Documents:3.yml"
  },
  {
    "type": "traditional-mac",
    "path": "Macintosh HD:Users:newbmiao:Documents:3.json"
  }
  ]
}

函数

函数基本每个语言都会有，要说OPA里有啥特别的地方，那就是他也实现同名函数，类似“函数重载”，但简化了许多。

他的特点是：

默认函数返回值为 true/false
可以指定函数返回值
可以存在同名函数, 但参数数目不能变
相同输入（参数）必须获得相同输出（返回值）

举例来说，如果实现判断文件是否是配置文件后缀：

下边is_config_file就是不指定返回值

条件满足则返回 true，三个实现只要满足一个就为 true

is_config_file(str) {
  contains(str, ".yaml")
}

is_config_file(str) {
  contains(str, ".yml")
}

is_config_file(str) {
  contains(str, ".json")
}

当然也可以使用else关键字合并到一个函数：

is_config_file2(str) {
  contains(str, ".yaml")
}
else {
  contains(str, ".yml")
}
else {
  contains(str, ".json")
}

那如果指定返回值怎么重载呢？

以实现不同路径格式的文件名为例：

getFileName(type, str) = x {
  type = "posix"
  str = trim(str)
  tmp := split(str, "/")
  x := tmp[minus(count(tmp), 1)]
}

getFileName(type, str) = x {
  type = "traditional-mac"
  str = trim(str)
  tmp := split(str, ":")
  x := tmp[minus(count(tmp), 1)]
}

你会发现两个函数输入和返回不是相同的么？

其实不是，仔细看他们内都做了

type = {"traditional-mac"|"posix"} 的判断

这实际类似函数声明为

1	getFileName("posix", str) = x {}

所以输入是不同的

如果你省略了type的判断，会报错的：

1	eval_conflict_error: functions must not produce multiple outputs for same inputs

到此，将函数组合一下就可以实现判定是否为配置：

import input.files

is_config {
  file := files[_]
  x := getFileName(file.type, file.path)
  is_config_file(x)
}

Tips: 关于OPA的内置函数可以查看文档:built-in-functions[1]

虚拟文档

虚拟文档是生成的文档（或者称为集合）

他的特点是：

作为集合，输入和输出必须是有限的
可以遍历
可以查询

举例来说，同样是获取文件名：

import input.files

getFileNames[x] {
  file := files[_]
  file.type = "posix"
  file.path = trim(file.path)
  tmp := split(file.path, "/")
  x := tmp[minus(count(tmp), 1)]
}

getFileNames[x] {
  file := files[_]
  file.type = "traditional-mac"
  file.path = trim(file.path)
  tmp := split(file.path, ":")
  x := tmp[minus(count(tmp), 1)]
}

这里 getFileNames[x] {} 等价于 getFileNames[x] = x {}

就是x作为了集合的一个结果，填充到getFileNames这个虚拟文档中，其key也是x的值。

有点抽象是不？

别怕，他可以查询，我们查询看下他的结果就好理解了

1	mkdir syntax

comprehensions.rego

package example_comprehensions

import input.files

# object comprehensions
# { <key>: <term> | <body> }
group_files_by_type := {type: paths |
	file := files[_]
	type := file.type
	paths := [path |
		tmp := files[_]
		tmp.type == type
		path := tmp.path
	]
}

# object comprehensions
# { <key>: <term> | <body> }
group_files_by_file_extension := {ext: paths |
	# set: ["yaml", "yml", "json"]
	extSets := {e |
		e = regex.find_all_string_submatch_n(".*\\.(.*)$", files[_].path, -1)[0][1]
	}

	ext := extSets[_]
	paths := [path |
		tmp := files[_]
		endswith(tmp.path, ext)
		path := tmp.path
	]
}

# set comprehensions
# { <term> | <body> }
convert_all_to_posix_path_sets := {path |
	path1 := {p |
		file := files[_]
		file.type == "posix"
		p := file.path
	}

	path2 := {p |
		file := files[_]
		file.type == "traditional-mac"
		p := replace(replace(file.path, "Macintosh HD", ""), ":", "/")
	}

	paths := path1 | path2
	path = paths[_]
}

# array comprehensions
# [ <term> | <body> ]
convert_all_to_posix_path_array := [path |
	path1 := [p |
		file := files[_]
		file.type == "posix"
		p := file.path
	]

	path2 := [p |
		file := files[_]
		file.type == "traditional-mac"
		p := replace(replace(file.path, "Macintosh HD", ""), ":", "/")
	]

	paths := array.concat(path1, path2)
	path = paths[_]
]

function.rego

package example_func

import input.files

is_config {
	file := files[_]
	x := getFileName(file.type, file.path)
	is_config_file(x)
}

# or
# is_config_file2(x)

# function's input/output can be infinite
getFileName(type, str) = x {
	type = "posix" # this is needed, without will report error :

	# eval_conflict_error: functions must not produce multiple outputs for same inputs
	str = trim(str)
	tmp := split(str, "/")
	x := tmp[count(tmp) - 1]
}

getFileName(type, str) = x {
	type = "traditional-mac" # this is needed, without will report error
	str = trim(str)
	tmp := split(str, ":")
	x := tmp[count(tmp) - 1]
}

# Or
# getFileName("posix", str) = x {
# 	str = trim(str)
# 	tmp := split(str, "/")
# 	x := tmp[minus(count(tmp), 1)]
# }

# getFileName("traditional-mac", str) = x {
# 	str = trim(str)
# 	tmp := split(str, ":")
# 	x := tmp[minus(count(tmp), 1)]
# }

is_config_file(str) {
	contains(str, ".yaml")
}

is_config_file(str) {
	contains(str, ".yml")
}

is_config_file(str) {
	contains(str, ".json")
}

is_config_file2(str) {
	contains(str, ".yaml")
}

else {
	contains(str, ".yml")
}

else {
	contains(str, ".json")
}

virtualDocument.rego

package example_virtual_doc

import data.example_func.is_config_file
import input.files

is_config {
	is_config_file(getFileNames[_])
}

# virtual document's input/output is finite, and is generated document,  can be query, support iteration, will be output
getFileNames[x] {
	file := files[_]
	file.type = "posix"
	file.path = trim(file.path)
	tmp := split(file.path, "/")
	x := tmp[count(tmp) - 1]
}

getFileNames[x] {
	file := files[_]
	file.type = "traditional-mac"
	file.path = trim(file.path)
	tmp := split(file.path, ":")
	x := tmp[count(tmp) - 1]
}

input.json

{
    "files": [
      {
        "type": "posix",
        "path": "/Users/newbmiao/Documents/1.yaml"
      },
      {
        "type": "posix",
        "path": "/Users/newbmiao/Documents/2.yaml"
      },
      {
        "type": "traditional-mac",
        "path": "Macintosh HD:Users:newbmiao:Documents:3.yml"
      },
      {
        "type": "traditional-mac",
        "path": "Macintosh HD:Users:newbmiao:Documents:3.json"
      }
    ]
  }

cd syntax
opa eval -f values -i input.json -d . "data.example_virtual_doc.getFileNames"
[
  [
    "1.yaml",
    "2.yaml",
    "3.yml",
    "3.json"
  ]
]

然后遍历虚拟文档，结合判定is_config_file就可实现判定：

import data.example_func.is_config_file
import input.files

is_config {
  is_config_file(getFileNames[_])
}

这里遍历getFileNames[_],随即作为每一个输入参数传给is_config_file进行判断，是不是很方便！

适用场景

具体到他们的适用场景，其实比较好区分，重点只要关注两点：

是否需要遍历？
输入输出是否是有限的？

如果有一个答案是肯定的，那么虚拟文档就更适合。

OPA的函数和虚拟文档很简单、很清秀是不是？

今天就到这里，下一篇我们看看他更优雅的推导式（Comprehensions）。