openthoughts-tblite
v2.0OpenThoughts-TBLite: A difficulty-calibrated benchmark of 100 tasks for building terminal agents. By OpenThoughts Agent team, Snorkel AI, Bespoke Labs.
uvx harbor run -d openthoughts-tblite@2.0Tasks (100)
acl-permissions-inheritance
uvx harbor run -d openthoughts-tblite@2.0 -t acl-permissions-inheritancef075e46
amuse-install
uvx harbor run -d openthoughts-tblite@2.0 -t amuse-installf075e46
anomaly-detection-ranking
uvx harbor run -d openthoughts-tblite@2.0 -t anomaly-detection-rankingf075e46
api-endpoint-permission-canonicalizer
uvx harbor run -d openthoughts-tblite@2.0 -t api-endpoint-permission-canonicalizerf075e46
application-debug
uvx harbor run -d openthoughts-tblite@2.0 -t application-debugf075e46
auth_token_race_condition
uvx harbor run -d openthoughts-tblite@2.0 -t auth_token_race_conditionf075e46
bandit-delayed-feedback
uvx harbor run -d openthoughts-tblite@2.0 -t bandit-delayed-feedbackf075e46
bash-log-processor-fix
uvx harbor run -d openthoughts-tblite@2.0 -t bash-log-processor-fixf075e46
basic-message-queue
uvx harbor run -d openthoughts-tblite@2.0 -t basic-message-queuef075e46
battery-charging-optimization
uvx harbor run -d openthoughts-tblite@2.0 -t battery-charging-optimizationf075e46
bloom-filter-cache-penetration-prevention
uvx harbor run -d openthoughts-tblite@2.0 -t bloom-filter-cache-penetration-preventionf075e46
book-portfolio-analysis
uvx harbor run -d openthoughts-tblite@2.0 -t book-portfolio-analysisf075e46
bracket-sequence-restoration
uvx harbor run -d openthoughts-tblite@2.0 -t bracket-sequence-restorationf075e46
breast-cancer-mlflow
uvx harbor run -d openthoughts-tblite@2.0 -t breast-cancer-mlflowf075e46
broken-python
uvx harbor run -d openthoughts-tblite@2.0 -t broken-pythonf075e46
build-merkle-tree-cli-sha512
uvx harbor run -d openthoughts-tblite@2.0 -t build-merkle-tree-cli-sha512f075e46
build-system-task-ordering
uvx harbor run -d openthoughts-tblite@2.0 -t build-system-task-orderingf075e46
california-housing-api
uvx harbor run -d openthoughts-tblite@2.0 -t california-housing-apif075e46
chained-forensic-extraction_20260101_011957
uvx harbor run -d openthoughts-tblite@2.0 -t chained-forensic-extraction_20260101_011957f075e46
competitive-programming-solver
uvx harbor run -d openthoughts-tblite@2.0 -t competitive-programming-solverf075e46
container-registry-optimization
uvx harbor run -d openthoughts-tblite@2.0 -t container-registry-optimizationf075e46
convolutional-layers
uvx harbor run -d openthoughts-tblite@2.0 -t convolutional-layersf075e46
corrupted-filesystem-recovery
uvx harbor run -d openthoughts-tblite@2.0 -t corrupted-filesystem-recoveryf075e46
cosign-keyless-signing
uvx harbor run -d openthoughts-tblite@2.0 -t cosign-keyless-signingf075e46
cpp-daemon-sighup-segfault
uvx harbor run -d openthoughts-tblite@2.0 -t cpp-daemon-sighup-segfaultf075e46
cryptographic-protocol-verifier
uvx harbor run -d openthoughts-tblite@2.0 -t cryptographic-protocol-verifierf075e46
csv-json-jsonl-merger
uvx harbor run -d openthoughts-tblite@2.0 -t csv-json-jsonl-mergerf075e46
db-migration-local-storage
uvx harbor run -d openthoughts-tblite@2.0 -t db-migration-local-storagef075e46
distributed-test-execution-scheduler
uvx harbor run -d openthoughts-tblite@2.0 -t distributed-test-execution-schedulerf075e46
ekf-localization
uvx harbor run -d openthoughts-tblite@2.0 -t ekf-localizationf075e46
etl_checkpoint_resume_bug
uvx harbor run -d openthoughts-tblite@2.0 -t etl_checkpoint_resume_bugf075e46
fix_async_worker_queue
uvx harbor run -d openthoughts-tblite@2.0 -t fix_async_worker_queuef075e46
fix-js-network-controller
uvx harbor run -d openthoughts-tblite@2.0 -t fix-js-network-controllerf075e46
floor-plan-geometry
uvx harbor run -d openthoughts-tblite@2.0 -t floor-plan-geometryf075e46
game-of-stones
uvx harbor run -d openthoughts-tblite@2.0 -t game-of-stonesf075e46
git-repo-forensics
uvx harbor run -d openthoughts-tblite@2.0 -t git-repo-forensicsf075e46
grid-pathfinding
uvx harbor run -d openthoughts-tblite@2.0 -t grid-pathfindingf075e46
grpc-plant-position-server
uvx harbor run -d openthoughts-tblite@2.0 -t grpc-plant-position-serverf075e46
html-index-analysis
uvx harbor run -d openthoughts-tblite@2.0 -t html-index-analysisf075e46
hydra-debug-slurm-mode
uvx harbor run -d openthoughts-tblite@2.0 -t hydra-debug-slurm-modef075e46
image-tile-identification
uvx harbor run -d openthoughts-tblite@2.0 -t image-tile-identificationf075e46
industrial-kiln-controller
uvx harbor run -d openthoughts-tblite@2.0 -t industrial-kiln-controllerf075e46
iot-device-registration-server
uvx harbor run -d openthoughts-tblite@2.0 -t iot-device-registration-serverf075e46
iris-dataset-classification
uvx harbor run -d openthoughts-tblite@2.0 -t iris-dataset-classificationf075e46
jq-data-processing
uvx harbor run -d openthoughts-tblite@2.0 -t jq-data-processingf075e46
jsonl-aggregator
uvx harbor run -d openthoughts-tblite@2.0 -t jsonl-aggregatorf075e46
legal-summary-extraction
uvx harbor run -d openthoughts-tblite@2.0 -t legal-summary-extractionf075e46
log-summary
uvx harbor run -d openthoughts-tblite@2.0 -t log-summaryf075e46
malicious-package-forensics
uvx harbor run -d openthoughts-tblite@2.0 -t malicious-package-forensicsf075e46
maven-slf4j-conflict
uvx harbor run -d openthoughts-tblite@2.0 -t maven-slf4j-conflictf075e46
mech-system
uvx harbor run -d openthoughts-tblite@2.0 -t mech-systemf075e46
mlflow-register
uvx harbor run -d openthoughts-tblite@2.0 -t mlflow-registerf075e46
monorepo-changelog-cli
uvx harbor run -d openthoughts-tblite@2.0 -t monorepo-changelog-clif075e46
mtls-cert-rotation
uvx harbor run -d openthoughts-tblite@2.0 -t mtls-cert-rotationf075e46
multi-labeller
uvx harbor run -d openthoughts-tblite@2.0 -t multi-labellerf075e46
multi-server-configuration
uvx harbor run -d openthoughts-tblite@2.0 -t multi-server-configurationf075e46
network-log-normalization
uvx harbor run -d openthoughts-tblite@2.0 -t network-log-normalizationf075e46
neural-architecture-search-final
uvx harbor run -d openthoughts-tblite@2.0 -t neural-architecture-search-finalf075e46
neutron-submission
uvx harbor run -d openthoughts-tblite@2.0 -t neutron-submissionf075e46
okhttp-trailers-crash
uvx harbor run -d openthoughts-tblite@2.0 -t okhttp-trailers-crashf075e46
pandas-etl
uvx harbor run -d openthoughts-tblite@2.0 -t pandas-etlf075e46
pandas-numpy-data-analysis
uvx harbor run -d openthoughts-tblite@2.0 -t pandas-numpy-data-analysisf075e46
parking-lot-pathfinding
uvx harbor run -d openthoughts-tblite@2.0 -t parking-lot-pathfindingf075e46
pdf-table-parsing
uvx harbor run -d openthoughts-tblite@2.0 -t pdf-table-parsingf075e46
permutation-construction-100k
uvx harbor run -d openthoughts-tblite@2.0 -t permutation-construction-100kf075e46
pgn-chess-repair-puzzles
uvx harbor run -d openthoughts-tblite@2.0 -t pgn-chess-repair-puzzlesf075e46
playing-card-recognition
uvx harbor run -d openthoughts-tblite@2.0 -t playing-card-recognitionf075e46
prediction-model-evaluation
uvx harbor run -d openthoughts-tblite@2.0 -t prediction-model-evaluationf075e46
protein-sequence
uvx harbor run -d openthoughts-tblite@2.0 -t protein-sequencef075e46
publisher-market-analysis
uvx harbor run -d openthoughts-tblite@2.0 -t publisher-market-analysisf075e46
publisher-market-analysis-v2
uvx harbor run -d openthoughts-tblite@2.0 -t publisher-market-analysis-v2f075e46
python-api-rate-limit
uvx harbor run -d openthoughts-tblite@2.0 -t python-api-rate-limitf075e46
raft-log-repair-concurrent-access
uvx harbor run -d openthoughts-tblite@2.0 -t raft-log-repair-concurrent-accessf075e46
react-typescript-debugg
uvx harbor run -d openthoughts-tblite@2.0 -t react-typescript-debuggf075e46
reproducibility-and-envsetup
uvx harbor run -d openthoughts-tblite@2.0 -t reproducibility-and-envsetupf075e46
reverse-engineer-stack-vm
uvx harbor run -d openthoughts-tblite@2.0 -t reverse-engineer-stack-vmf075e46
rsa-jwt-token-api-redis-blacklist
uvx harbor run -d openthoughts-tblite@2.0 -t rsa-jwt-token-api-redis-blacklistf075e46
sakila-sqlite-queries
uvx harbor run -d openthoughts-tblite@2.0 -t sakila-sqlite-queriesf075e46
sales-data-csv-analysis
uvx harbor run -d openthoughts-tblite@2.0 -t sales-data-csv-analysisf075e46
scan-linux-persistence-artifacts
uvx harbor run -d openthoughts-tblite@2.0 -t scan-linux-persistence-artifactsf075e46
schedule-vacation
uvx harbor run -d openthoughts-tblite@2.0 -t schedule-vacationf075e46
security-breach-incident-response
uvx harbor run -d openthoughts-tblite@2.0 -t security-breach-incident-responsef075e46
security-incident-log-analysis
uvx harbor run -d openthoughts-tblite@2.0 -t security-incident-log-analysisf075e46
server-log-analysis
uvx harbor run -d openthoughts-tblite@2.0 -t server-log-analysisf075e46
service-deployment-wave-planner
uvx harbor run -d openthoughts-tblite@2.0 -t service-deployment-wave-plannerf075e46
sign-vector-game
uvx harbor run -d openthoughts-tblite@2.0 -t sign-vector-gamef075e46
simple-database-query-tool
uvx harbor run -d openthoughts-tblite@2.0 -t simple-database-query-toolf075e46
smiles-data-lab
uvx harbor run -d openthoughts-tblite@2.0 -t smiles-data-labf075e46
sql-injection-forensics
uvx harbor run -d openthoughts-tblite@2.0 -t sql-injection-forensicsf075e46
submission_a63937a5_20251224_152124
uvx harbor run -d openthoughts-tblite@2.0 -t submission_a63937a5_20251224_152124f075e46
supply-chain-fulfillment
uvx harbor run -d openthoughts-tblite@2.0 -t supply-chain-fulfillmentf075e46
symlink-chain-traversal
uvx harbor run -d openthoughts-tblite@2.0 -t symlink-chain-traversalf075e46
sympy-bug-fix
uvx harbor run -d openthoughts-tblite@2.0 -t sympy-bug-fixf075e46
systemd-log-monitoring
uvx harbor run -d openthoughts-tblite@2.0 -t systemd-log-monitoringf075e46
task-xxe-exploit
uvx harbor run -d openthoughts-tblite@2.0 -t task-xxe-exploitf075e46
todos-api
uvx harbor run -d openthoughts-tblite@2.0 -t todos-apif075e46
token-auth-websocket
uvx harbor run -d openthoughts-tblite@2.0 -t token-auth-websocketf075e46
tsl-test-case-generation
uvx harbor run -d openthoughts-tblite@2.0 -t tsl-test-case-generationf075e46
vimscript-vim-quine
uvx harbor run -d openthoughts-tblite@2.0 -t vimscript-vim-quinef075e46
word-derangement-mapping
uvx harbor run -d openthoughts-tblite@2.0 -t word-derangement-mappingf075e46