openthoughts-tblite

v2.0

OpenThoughts-TBLite: A difficulty-calibrated benchmark of 100 tasks for building terminal agents. By OpenThoughts Agent team, Snorkel AI, Bespoke Labs.

uvx harbor run -d openthoughts-tblite@2.0

Tasks (100)

acl-permissions-inheritance
uvx harbor run -d openthoughts-tblite@2.0 -t acl-permissions-inheritance
f075e46
amuse-install
uvx harbor run -d openthoughts-tblite@2.0 -t amuse-install
f075e46
anomaly-detection-ranking
uvx harbor run -d openthoughts-tblite@2.0 -t anomaly-detection-ranking
f075e46
api-endpoint-permission-canonicalizer
uvx harbor run -d openthoughts-tblite@2.0 -t api-endpoint-permission-canonicalizer
f075e46
application-debug
uvx harbor run -d openthoughts-tblite@2.0 -t application-debug
f075e46
auth_token_race_condition
uvx harbor run -d openthoughts-tblite@2.0 -t auth_token_race_condition
f075e46
bandit-delayed-feedback
uvx harbor run -d openthoughts-tblite@2.0 -t bandit-delayed-feedback
f075e46
bash-log-processor-fix
uvx harbor run -d openthoughts-tblite@2.0 -t bash-log-processor-fix
f075e46
basic-message-queue
uvx harbor run -d openthoughts-tblite@2.0 -t basic-message-queue
f075e46
battery-charging-optimization
uvx harbor run -d openthoughts-tblite@2.0 -t battery-charging-optimization
f075e46
bloom-filter-cache-penetration-prevention
uvx harbor run -d openthoughts-tblite@2.0 -t bloom-filter-cache-penetration-prevention
f075e46
book-portfolio-analysis
uvx harbor run -d openthoughts-tblite@2.0 -t book-portfolio-analysis
f075e46
bracket-sequence-restoration
uvx harbor run -d openthoughts-tblite@2.0 -t bracket-sequence-restoration
f075e46
breast-cancer-mlflow
uvx harbor run -d openthoughts-tblite@2.0 -t breast-cancer-mlflow
f075e46
broken-python
uvx harbor run -d openthoughts-tblite@2.0 -t broken-python
f075e46
build-merkle-tree-cli-sha512
uvx harbor run -d openthoughts-tblite@2.0 -t build-merkle-tree-cli-sha512
f075e46
build-system-task-ordering
uvx harbor run -d openthoughts-tblite@2.0 -t build-system-task-ordering
f075e46
california-housing-api
uvx harbor run -d openthoughts-tblite@2.0 -t california-housing-api
f075e46
chained-forensic-extraction_20260101_011957
uvx harbor run -d openthoughts-tblite@2.0 -t chained-forensic-extraction_20260101_011957
f075e46
competitive-programming-solver
uvx harbor run -d openthoughts-tblite@2.0 -t competitive-programming-solver
f075e46
container-registry-optimization
uvx harbor run -d openthoughts-tblite@2.0 -t container-registry-optimization
f075e46
convolutional-layers
uvx harbor run -d openthoughts-tblite@2.0 -t convolutional-layers
f075e46
corrupted-filesystem-recovery
uvx harbor run -d openthoughts-tblite@2.0 -t corrupted-filesystem-recovery
f075e46
cosign-keyless-signing
uvx harbor run -d openthoughts-tblite@2.0 -t cosign-keyless-signing
f075e46
cpp-daemon-sighup-segfault
uvx harbor run -d openthoughts-tblite@2.0 -t cpp-daemon-sighup-segfault
f075e46
cryptographic-protocol-verifier
uvx harbor run -d openthoughts-tblite@2.0 -t cryptographic-protocol-verifier
f075e46
csv-json-jsonl-merger
uvx harbor run -d openthoughts-tblite@2.0 -t csv-json-jsonl-merger
f075e46
db-migration-local-storage
uvx harbor run -d openthoughts-tblite@2.0 -t db-migration-local-storage
f075e46
distributed-test-execution-scheduler
uvx harbor run -d openthoughts-tblite@2.0 -t distributed-test-execution-scheduler
f075e46
ekf-localization
uvx harbor run -d openthoughts-tblite@2.0 -t ekf-localization
f075e46
etl_checkpoint_resume_bug
uvx harbor run -d openthoughts-tblite@2.0 -t etl_checkpoint_resume_bug
f075e46
fix_async_worker_queue
uvx harbor run -d openthoughts-tblite@2.0 -t fix_async_worker_queue
f075e46
fix-js-network-controller
uvx harbor run -d openthoughts-tblite@2.0 -t fix-js-network-controller
f075e46
floor-plan-geometry
uvx harbor run -d openthoughts-tblite@2.0 -t floor-plan-geometry
f075e46
game-of-stones
uvx harbor run -d openthoughts-tblite@2.0 -t game-of-stones
f075e46
git-repo-forensics
uvx harbor run -d openthoughts-tblite@2.0 -t git-repo-forensics
f075e46
grid-pathfinding
uvx harbor run -d openthoughts-tblite@2.0 -t grid-pathfinding
f075e46
grpc-plant-position-server
uvx harbor run -d openthoughts-tblite@2.0 -t grpc-plant-position-server
f075e46
html-index-analysis
uvx harbor run -d openthoughts-tblite@2.0 -t html-index-analysis
f075e46
hydra-debug-slurm-mode
uvx harbor run -d openthoughts-tblite@2.0 -t hydra-debug-slurm-mode
f075e46
image-tile-identification
uvx harbor run -d openthoughts-tblite@2.0 -t image-tile-identification
f075e46
industrial-kiln-controller
uvx harbor run -d openthoughts-tblite@2.0 -t industrial-kiln-controller
f075e46
iot-device-registration-server
uvx harbor run -d openthoughts-tblite@2.0 -t iot-device-registration-server
f075e46
iris-dataset-classification
uvx harbor run -d openthoughts-tblite@2.0 -t iris-dataset-classification
f075e46
jq-data-processing
uvx harbor run -d openthoughts-tblite@2.0 -t jq-data-processing
f075e46
jsonl-aggregator
uvx harbor run -d openthoughts-tblite@2.0 -t jsonl-aggregator
f075e46
legal-summary-extraction
uvx harbor run -d openthoughts-tblite@2.0 -t legal-summary-extraction
f075e46
log-summary
uvx harbor run -d openthoughts-tblite@2.0 -t log-summary
f075e46
malicious-package-forensics
uvx harbor run -d openthoughts-tblite@2.0 -t malicious-package-forensics
f075e46
maven-slf4j-conflict
uvx harbor run -d openthoughts-tblite@2.0 -t maven-slf4j-conflict
f075e46
mech-system
uvx harbor run -d openthoughts-tblite@2.0 -t mech-system
f075e46
mlflow-register
uvx harbor run -d openthoughts-tblite@2.0 -t mlflow-register
f075e46
monorepo-changelog-cli
uvx harbor run -d openthoughts-tblite@2.0 -t monorepo-changelog-cli
f075e46
mtls-cert-rotation
uvx harbor run -d openthoughts-tblite@2.0 -t mtls-cert-rotation
f075e46
multi-labeller
uvx harbor run -d openthoughts-tblite@2.0 -t multi-labeller
f075e46
multi-server-configuration
uvx harbor run -d openthoughts-tblite@2.0 -t multi-server-configuration
f075e46
network-log-normalization
uvx harbor run -d openthoughts-tblite@2.0 -t network-log-normalization
f075e46
neural-architecture-search-final
uvx harbor run -d openthoughts-tblite@2.0 -t neural-architecture-search-final
f075e46
neutron-submission
uvx harbor run -d openthoughts-tblite@2.0 -t neutron-submission
f075e46
okhttp-trailers-crash
uvx harbor run -d openthoughts-tblite@2.0 -t okhttp-trailers-crash
f075e46
pandas-etl
uvx harbor run -d openthoughts-tblite@2.0 -t pandas-etl
f075e46
pandas-numpy-data-analysis
uvx harbor run -d openthoughts-tblite@2.0 -t pandas-numpy-data-analysis
f075e46
parking-lot-pathfinding
uvx harbor run -d openthoughts-tblite@2.0 -t parking-lot-pathfinding
f075e46
pdf-table-parsing
uvx harbor run -d openthoughts-tblite@2.0 -t pdf-table-parsing
f075e46
permutation-construction-100k
uvx harbor run -d openthoughts-tblite@2.0 -t permutation-construction-100k
f075e46
pgn-chess-repair-puzzles
uvx harbor run -d openthoughts-tblite@2.0 -t pgn-chess-repair-puzzles
f075e46
playing-card-recognition
uvx harbor run -d openthoughts-tblite@2.0 -t playing-card-recognition
f075e46
prediction-model-evaluation
uvx harbor run -d openthoughts-tblite@2.0 -t prediction-model-evaluation
f075e46
protein-sequence
uvx harbor run -d openthoughts-tblite@2.0 -t protein-sequence
f075e46
publisher-market-analysis
uvx harbor run -d openthoughts-tblite@2.0 -t publisher-market-analysis
f075e46
publisher-market-analysis-v2
uvx harbor run -d openthoughts-tblite@2.0 -t publisher-market-analysis-v2
f075e46
python-api-rate-limit
uvx harbor run -d openthoughts-tblite@2.0 -t python-api-rate-limit
f075e46
raft-log-repair-concurrent-access
uvx harbor run -d openthoughts-tblite@2.0 -t raft-log-repair-concurrent-access
f075e46
react-typescript-debugg
uvx harbor run -d openthoughts-tblite@2.0 -t react-typescript-debugg
f075e46
reproducibility-and-envsetup
uvx harbor run -d openthoughts-tblite@2.0 -t reproducibility-and-envsetup
f075e46
reverse-engineer-stack-vm
uvx harbor run -d openthoughts-tblite@2.0 -t reverse-engineer-stack-vm
f075e46
rsa-jwt-token-api-redis-blacklist
uvx harbor run -d openthoughts-tblite@2.0 -t rsa-jwt-token-api-redis-blacklist
f075e46
sakila-sqlite-queries
uvx harbor run -d openthoughts-tblite@2.0 -t sakila-sqlite-queries
f075e46
sales-data-csv-analysis
uvx harbor run -d openthoughts-tblite@2.0 -t sales-data-csv-analysis
f075e46
scan-linux-persistence-artifacts
uvx harbor run -d openthoughts-tblite@2.0 -t scan-linux-persistence-artifacts
f075e46
schedule-vacation
uvx harbor run -d openthoughts-tblite@2.0 -t schedule-vacation
f075e46
security-breach-incident-response
uvx harbor run -d openthoughts-tblite@2.0 -t security-breach-incident-response
f075e46
security-incident-log-analysis
uvx harbor run -d openthoughts-tblite@2.0 -t security-incident-log-analysis
f075e46
server-log-analysis
uvx harbor run -d openthoughts-tblite@2.0 -t server-log-analysis
f075e46
service-deployment-wave-planner
uvx harbor run -d openthoughts-tblite@2.0 -t service-deployment-wave-planner
f075e46
sign-vector-game
uvx harbor run -d openthoughts-tblite@2.0 -t sign-vector-game
f075e46
simple-database-query-tool
uvx harbor run -d openthoughts-tblite@2.0 -t simple-database-query-tool
f075e46
smiles-data-lab
uvx harbor run -d openthoughts-tblite@2.0 -t smiles-data-lab
f075e46
sql-injection-forensics
uvx harbor run -d openthoughts-tblite@2.0 -t sql-injection-forensics
f075e46
submission_a63937a5_20251224_152124
uvx harbor run -d openthoughts-tblite@2.0 -t submission_a63937a5_20251224_152124
f075e46
supply-chain-fulfillment
uvx harbor run -d openthoughts-tblite@2.0 -t supply-chain-fulfillment
f075e46
symlink-chain-traversal
uvx harbor run -d openthoughts-tblite@2.0 -t symlink-chain-traversal
f075e46
sympy-bug-fix
uvx harbor run -d openthoughts-tblite@2.0 -t sympy-bug-fix
f075e46
systemd-log-monitoring
uvx harbor run -d openthoughts-tblite@2.0 -t systemd-log-monitoring
f075e46
task-xxe-exploit
uvx harbor run -d openthoughts-tblite@2.0 -t task-xxe-exploit
f075e46
todos-api
uvx harbor run -d openthoughts-tblite@2.0 -t todos-api
f075e46
token-auth-websocket
uvx harbor run -d openthoughts-tblite@2.0 -t token-auth-websocket
f075e46
tsl-test-case-generation
uvx harbor run -d openthoughts-tblite@2.0 -t tsl-test-case-generation
f075e46
vimscript-vim-quine
uvx harbor run -d openthoughts-tblite@2.0 -t vimscript-vim-quine
f075e46
word-derangement-mapping
uvx harbor run -d openthoughts-tblite@2.0 -t word-derangement-mapping
f075e46