binary-audit
v1.0An open-source benchmark for evaluating AI agents' ability to find backdoors hidden in compiled binaries.
uvx harbor run -d binary-audit@1.0Tasks (46)
caddy-backdoor-detect
uvx harbor run -d binary-audit@1.0 -t caddy-backdoor-detect75f3e6e
caddy-backdoor-multiple-arch-binaries-detect
uvx harbor run -d binary-audit@1.0 -t caddy-backdoor-multiple-arch-binaries-detect75f3e6e
caddy-backdoor-multiple-binaries-detect
uvx harbor run -d binary-audit@1.0 -t caddy-backdoor-multiple-binaries-detect75f3e6e
caddy-backdoor-simple-detect
uvx harbor run -d binary-audit@1.0 -t caddy-backdoor-simple-detect75f3e6e
caddy-timebomb-multiple-arch-binaries-detect
uvx harbor run -d binary-audit@1.0 -t caddy-timebomb-multiple-arch-binaries-detect75f3e6e
caddy-timebomb-multiple-binaries-detect
uvx harbor run -d binary-audit@1.0 -t caddy-timebomb-multiple-binaries-detect75f3e6e
dnsmasq-backdoor-detect
uvx harbor run -d binary-audit@1.0 -t dnsmasq-backdoor-detect75f3e6e
dnsmasq-backdoor-detect-execvp-obfuscated
uvx harbor run -d binary-audit@1.0 -t dnsmasq-backdoor-detect-execvp-obfuscated75f3e6e
dnsmasq-backdoor-detect-negative
uvx harbor run -d binary-audit@1.0 -t dnsmasq-backdoor-detect-negative75f3e6e
dnsmasq-backdoor-detect-negative2
uvx harbor run -d binary-audit@1.0 -t dnsmasq-backdoor-detect-negative275f3e6e
dnsmasq-backdoor-detect-obfuscated
uvx harbor run -d binary-audit@1.0 -t dnsmasq-backdoor-detect-obfuscated75f3e6e
dnsmasq-backdoor-detect-posix-spawn
uvx harbor run -d binary-audit@1.0 -t dnsmasq-backdoor-detect-posix-spawn75f3e6e
dnsmasq-backdoor-detect-posix-spawn-obfuscated
uvx harbor run -d binary-audit@1.0 -t dnsmasq-backdoor-detect-posix-spawn-obfuscated75f3e6e
dnsmasq-backdoor-detect-printf
uvx harbor run -d binary-audit@1.0 -t dnsmasq-backdoor-detect-printf75f3e6e
dnsmasq-backdoor-detect-syscall
uvx harbor run -d binary-audit@1.0 -t dnsmasq-backdoor-detect-syscall75f3e6e
dnsmasq-backdoor-detect-syscall-obfuscated
uvx harbor run -d binary-audit@1.0 -t dnsmasq-backdoor-detect-syscall-obfuscated75f3e6e
dropbear-brokenauth-detect
uvx harbor run -d binary-audit@1.0 -t dropbear-brokenauth-detect75f3e6e
dropbear-brokenauth-detect-hint
uvx harbor run -d binary-audit@1.0 -t dropbear-brokenauth-detect-hint75f3e6e
dropbear-brokenauth-detect-negative
uvx harbor run -d binary-audit@1.0 -t dropbear-brokenauth-detect-negative75f3e6e
dropbear-brokenauth-detect-negative2
uvx harbor run -d binary-audit@1.0 -t dropbear-brokenauth-detect-negative275f3e6e
dropbear-brokenauth-detect-nologline
uvx harbor run -d binary-audit@1.0 -t dropbear-brokenauth-detect-nologline75f3e6e
dropbear-brokenauth2-detect
uvx harbor run -d binary-audit@1.0 -t dropbear-brokenauth2-detect75f3e6e
ghidra-decompile-pyghidra
uvx harbor run -d binary-audit@1.0 -t ghidra-decompile-pyghidra75f3e6e
ghidra-decompile-pyghidra-jq
uvx harbor run -d binary-audit@1.0 -t ghidra-decompile-pyghidra-jq75f3e6e
ghidra-decompile-vanilla
uvx harbor run -d binary-audit@1.0 -t ghidra-decompile-vanilla75f3e6e
ghidra-decompile-vanilla-jq
uvx harbor run -d binary-audit@1.0 -t ghidra-decompile-vanilla-jq75f3e6e
lighttpd-authentication-harvester-detect
uvx harbor run -d binary-audit@1.0 -t lighttpd-authentication-harvester-detect75f3e6e
lighttpd-backdoor-detect
uvx harbor run -d binary-audit@1.0 -t lighttpd-backdoor-detect75f3e6e
lighttpd-backdoor-detect-negative
uvx harbor run -d binary-audit@1.0 -t lighttpd-backdoor-detect-negative75f3e6e
lighttpd-backdoor-detect-negative2
uvx harbor run -d binary-audit@1.0 -t lighttpd-backdoor-detect-negative275f3e6e
lighttpd-backdoor-detect-open
uvx harbor run -d binary-audit@1.0 -t lighttpd-backdoor-detect-open75f3e6e
lighttpd-backdoor-detect-proc
uvx harbor run -d binary-audit@1.0 -t lighttpd-backdoor-detect-proc75f3e6e
lighttpd-backdoor-detect-proc-obfuscated
uvx harbor run -d binary-audit@1.0 -t lighttpd-backdoor-detect-proc-obfuscated75f3e6e
lighttpd-backdoor-detect-syscall
uvx harbor run -d binary-audit@1.0 -t lighttpd-backdoor-detect-syscall75f3e6e
lighttpd-backdoor-detect-syscall-obfuscated
uvx harbor run -d binary-audit@1.0 -t lighttpd-backdoor-detect-syscall-obfuscated75f3e6e
lighttpd-backdoor-multiple-arch-binaries-detect
uvx harbor run -d binary-audit@1.0 -t lighttpd-backdoor-multiple-arch-binaries-detect75f3e6e
lighttpd-backdoor-multiple-binaries-detect
uvx harbor run -d binary-audit@1.0 -t lighttpd-backdoor-multiple-binaries-detect75f3e6e
lighttpd-timebomb-multiple-binaries-detect
uvx harbor run -d binary-audit@1.0 -t lighttpd-timebomb-multiple-binaries-detect75f3e6e
pingora-backdoor-detect
uvx harbor run -d binary-audit@1.0 -t pingora-backdoor-detect75f3e6e
radare2-decompile
uvx harbor run -d binary-audit@1.0 -t radare2-decompile75f3e6e
radare2-decompile-jq
uvx harbor run -d binary-audit@1.0 -t radare2-decompile-jq75f3e6e
sozu-backdoor-detect-negative
uvx harbor run -d binary-audit@1.0 -t sozu-backdoor-detect-negative75f3e6e
sozu-backdoor-detect-negative2
uvx harbor run -d binary-audit@1.0 -t sozu-backdoor-detect-negative275f3e6e
sozu-backdoor-multiple-arch-binaries-detect
uvx harbor run -d binary-audit@1.0 -t sozu-backdoor-multiple-arch-binaries-detect75f3e6e
sozu-backdoor-multiple-binaries-detect
uvx harbor run -d binary-audit@1.0 -t sozu-backdoor-multiple-binaries-detect75f3e6e
sozu-timebomb-multiple-binaries-detect
uvx harbor run -d binary-audit@1.0 -t sozu-timebomb-multiple-binaries-detect75f3e6e