{
  "generated_at": "2026-06-01T21:00:41+00:00",
  "discipline": "cs-ai",
  "category": "research-harnesses",
  "direction_id": "ad-hoc",
  "problem": "",
  "thesis": "",
  "title": "Automated Evidence-Ledger Production of Research Papers",
  "title_key": "automated-evidence-ledger-production-of-research-papers",
  "top_n": 12,
  "max_results": 40,
  "min_evidence": 5,
  "min_full_text": 1,
  "evidence_paper_ids": [
    "2605.03042v1",
    "2504.08066v1",
    "2603.28589v1",
    "2507.23276v2",
    "2511.04583v4",
    "2509.08713v2",
    "2506.01372v2",
    "2405.13352v1"
  ],
  "evidence_fingerprint": "dfe9879d46d1066d62a8cfcf",
  "novelty_score": 0.979,
  "novelty_breakdown": {
    "score": 0.979,
    "paper_novelty": 1.0,
    "claim_novelty": 0.949,
    "total_papers": 8,
    "novel_papers": 8,
    "duplicate_papers": 0,
    "duplicate_paper_ids": [],
    "duplicate_breakdown": [],
    "novel_paper_ids": [
      "2605.03042v1",
      "2504.08066v1",
      "2603.28589v1",
      "2507.23276v2",
      "2511.04583v4",
      "2509.08713v2",
      "2506.01372v2",
      "2405.13352v1"
    ],
    "prior_productions_scanned": 68,
    "current_claim_rows_scored": 8
  },
  "novelty_threshold": 0.35,
  "correctness_score": 0.793,
  "correctness_breakdown": {
    "score": 0.793,
    "rows_scored": 8,
    "rows": [
      {
        "paper_id": "2605.03042v1",
        "quote_in_pdf": 1.0,
        "claim_supported_by_quote": 0.621,
        "locator_present": 1.0,
        "score": 0.886,
        "pdf_cached": true
      },
      {
        "paper_id": "2504.08066v1",
        "quote_in_pdf": 1.0,
        "claim_supported_by_quote": 0.281,
        "locator_present": 0.0,
        "score": 0.584,
        "pdf_cached": true
      },
      {
        "paper_id": "2603.28589v1",
        "quote_in_pdf": 1.0,
        "claim_supported_by_quote": 0.471,
        "locator_present": 1.0,
        "score": 0.841,
        "pdf_cached": true
      },
      {
        "paper_id": "2507.23276v2",
        "quote_in_pdf": 1.0,
        "claim_supported_by_quote": 0.472,
        "locator_present": 1.0,
        "score": 0.842,
        "pdf_cached": true
      },
      {
        "paper_id": "2511.04583v4",
        "quote_in_pdf": 1.0,
        "claim_supported_by_quote": 0.5,
        "locator_present": 1.0,
        "score": 0.85,
        "pdf_cached": true
      },
      {
        "paper_id": "2509.08713v2",
        "quote_in_pdf": 1.0,
        "claim_supported_by_quote": 0.517,
        "locator_present": 1.0,
        "score": 0.855,
        "pdf_cached": true
      },
      {
        "paper_id": "2506.01372v2",
        "quote_in_pdf": 1.0,
        "claim_supported_by_quote": 0.304,
        "locator_present": 0.0,
        "score": 0.591,
        "pdf_cached": true
      },
      {
        "paper_id": "2405.13352v1",
        "quote_in_pdf": 1.0,
        "claim_supported_by_quote": 0.655,
        "locator_present": 1.0,
        "score": 0.897,
        "pdf_cached": true
      }
    ],
    "pdfs_missing": 0,
    "quote_in_pdf_avg": 1.0,
    "claim_support_avg": 0.478,
    "locator_present_avg": 0.75,
    "issues": [
      "2504.08066v1: missing source_quote/page/checked_at",
      "2506.01372v2: missing source_quote/page/checked_at"
    ]
  },
  "demo_proof": {
    "score": 0.75,
    "verdict": "pass",
    "passed": 6,
    "total": 8,
    "demo_path": "workspace/cs-ai/research-harnesses/paper/demo.py",
    "proof_path": "workspace/cs-ai/research-harnesses/paper/proof.json"
  },
  "auto_evidence_rows_generated": 0,
  "supported_claims": 6,
  "filled_evidence_rows": 8,
  "validation_status": "pass",
  "audit_status": "needs work",
  "submission_readiness": "blocked",
  "blocking_findings": [
    "2 evidence row(s) have unclear source depth; mark them full-text verified or preliminary.",
    "2 preliminary-linked claim(s) remain; do not promote to final support.",
    "Brief mixes supported and preliminary-linked claims; make final vs draft language explicit.",
    "Paper mentions 'peer-reviewed' without an explicit disclaimer that the draft itself is not peer-reviewed.",
    "1 full-text supported row(s) do not surface their source_quote in the paper body: 2603.28589v1.",
    "Demo proof score 0.75 below required floor 0.8 (6/8 claims independently re-verified against cached PDFs).",
    "Claim not independently re-verified: 2504.08066v1 (overlap=1.0, substring=True).",
    "Claim not independently re-verified: 2506.01372v2 (overlap=1.0, substring=False).",
    "correctness detail: 2504.08066v1: missing source_quote/page/checked_at",
    "correctness detail: 2506.01372v2: missing source_quote/page/checked_at"
  ],
  "audit_report": "workspace/cs-ai/research-harnesses/audit_report.md",
  "audit_report_json": "workspace/cs-ai/research-harnesses/audit_report.json",
  "audit_rounds": 13,
  "paper": "workspace/cs-ai/research-harnesses/paper/main.md",
  "claims": "workspace/cs-ai/research-harnesses/paper/claims.csv",
  "progress": "workspace/cs-ai/research-harnesses/production_progress.md",
  "progress_jsonl": "workspace/cs-ai/research-harnesses/production_progress.jsonl",
  "site": "",
  "deployed": false,
  "domain": "",
  "duplicate_policy": "update-existing",
  "limitations": [
    "Automated evidence rows are abstract-derived unless later replaced by full-text audit.",
    "The generated paper is a draft and must not be treated as peer-reviewed research."
  ],
  "ttv": {
    "started_at": "2026-06-01T21:00:35+00:00",
    "last_event_at": "2026-06-01T21:00:41+00:00",
    "time_to_first_evidence_seconds": 0.0,
    "time_to_paper_draft_seconds": 6.0,
    "elapsed_seconds": 6.0,
    "completed_steps": 9,
    "blocked_steps": 0,
    "failed_steps": 0,
    "first_value_label": "Produce abstract evidence",
    "reader_value_label": "Write paper"
  },
  "value_score": 0.944,
  "value_breakdown": {
    "score": 0.944,
    "components": {
      "gap": 1.0,
      "contradiction": 1.0,
      "surprise": 1.0,
      "recency": 0.625
    },
    "evidence": {
      "gap_count": 71,
      "surprise_count": 23,
      "contradiction_count": 30,
      "recent_papers": 5,
      "total_papers": 8,
      "rejected_papers_filtered": 0
    },
    "top_gaps": [
      {
        "paper_id": "1701.03868",
        "sentence": "The extent to which this constraint is weighed against other s, such as the computational resources required for simulating an environment, is an open question , but some minimal commitment to naturalismisnecessary.",
        "topics": [],
        "signal": "gap"
      },
      {
        "paper_id": "2006.04439",
        "sentence": "The open questions are: how expressive are neural ODEs in their current formalism, and can we improve their structure to enable richer representation learning and expressiveness? *Authors with equal contributions Copyright © 2021, Association for the Advancement of Artiﬁcial Intelligence (www.aaai.org).",
        "topics": [
          "enable richer representation"
        ],
        "signal": "gap"
      },
      {
        "paper_id": "2008.11865",
        "sentence": "However, it has been an open question as to what is causing this structure to appear and how to exploit it.",
        "topics": [],
        "signal": "gap"
      },
      {
        "paper_id": "2103.06769",
        "sentence": "Michael Tomasello [31], among others, has shown that very, very early on, infants who have very limited understanding of language, they can already understand in a very ad- vanced manner social situations in which they guess the goals of others.",
        "topics": [
          "Michael Tomasello"
        ],
        "signal": "gap"
      },
      {
        "paper_id": "2109.02722",
        "sentence": "However, to the best of our knowledge, such an approach has not been tested on pelvic scans.",
        "topics": [],
        "signal": "gap"
      }
    ],
    "top_surprises": [
      {
        "paper_id": "2204.09179",
        "sentence": "In contrast, X-M OE in Figure 2b shows a well-organized feature space with clear distinctions between clusters.",
        "signal": "surprise"
      },
      {
        "paper_id": "2207.00729",
        "sentence": "Corollary 1.1 (Non-uniform) .Any log-precision transformer can be simulated by a non-uniform TC0circuit family.14 13This may seem counterintuitive since multiplication of twon-precision numbers is outside AC0.",
        "signal": "surprise"
      },
      {
        "paper_id": "2305.02582",
        "sentence": "In contrast, Fig- ure 1b shows that LayerNorm has projected all keys to the hyperplane that is orthogonal to the ~1vector.",
        "signal": "surprise"
      }
    ],
    "top_contradictions": [
      {
        "benchmark": "all",
        "spread": 1.0,
        "min": 0.5,
        "max": 525.0,
        "papers": [
          "1803.08691",
          "1804.03830",
          "2101.02323",
          "2106.13898",
          "2109.02722",
          "2405.13407",
          "2405.18781",
          "2406.13215",
          "2410.14574",
          "2412.11538",
          "2501.02725",
          "2504.08066",
          "2507.05280",
          "2507.23083",
          "2509.10025",
          "2509.14199",
          "2510.16411",
          "2511.04583",
          "2511.10566",
          "2511.13250",
          "2512.07011",
          "2512.10411",
          "2601.19895",
          "2601.21653",
          "2602.11534",
          "2603.13258",
          "2603.28589",
          "2605.18747",
          "2606.04455"
        ],
        "samples": [
          {
            "paper_id": "1803.08691",
            "value": 1.0,
            "snippet": "oss for optimization. D. Implementation All models are implemented in Keras1with the TensorFlow2back- end, which emp"
          },
          {
            "paper_id": "1804.03830",
            "value": 3.0,
            "snippet": "pooling, and 2 fully-connected layers. All 3D convolutional kernels are 5 \u00025\u00025 with"
          },
          {
            "paper_id": "1804.03830",
            "value": 50.0,
            "snippet": "followed by the L2-normalization layer. All of the convolutional layers use 50 kernels of 5 \u00025\u00025 voxels with a stride o"
          },
          {
            "paper_id": "2101.02323",
            "value": 8.0,
            "snippet": "ations on a single xed random seed for all 8 variants. Jenson-Shannon Divergence Expe"
          },
          {
            "paper_id": "2106.13898",
            "value": 7.0,
            "snippet": "riable models with RNNs, and with ODEs, all from ( 7). 18 Input CNN"
          },
          {
            "paper_id": "2109.02722",
            "value": 2.0,
            "snippet": "he hyperparameters of the DIR pipeline. All the CT scans were resampled to have 2 mm ×2 mm×2 mm voxel spacing and the imag"
          }
        ],
        "signal": "contradiction"
      },
      {
        "benchmark": "learning",
        "spread": 1.0,
        "min": 0.001,
        "max": 469.0,
        "papers": [
          "1803.08691",
          "1804.03830",
          "2006.04439",
          "2008.11865",
          "2101.02323",
          "2104.09864",
          "2106.13898",
          "2109.02722",
          "2204.09179",
          "2204.10358",
          "2304.08691",
          "2305.14858",
          "2307.04772",
          "2403.20284",
          "2405.13407",
          "2407.03548",
          "2410.14574",
          "2502.11664",
          "2503.23007",
          "2504.08066",
          "2504.13990",
          "2511.04583",
          "2511.10566",
          "2602.11534",
          "2602.18849",
          "2605.18747"
        ],
        "samples": [
          {
            "paper_id": "1803.08691",
            "value": 31.0,
            "snippet": "om multi-atlas registration and machine learning have been proposed [31], [32]. However, the difﬁculties of mod"
          },
          {
            "paper_id": "1804.03830",
            "value": 2.0,
            "snippet": "segmentation method has two phases: (1) learning feature representations using JULE and (2) clustering deep representations for se"
          },
          {
            "paper_id": "2006.04439",
            "value": 0.001,
            "snippet": "er of hidden units 32 Minibatch size 16 Learning rate 0.001 - 0.02 ODE-solver step 1/6 relative to i"
          },
          {
            "paper_id": "2008.11865",
            "value": 15.0,
            "snippet": "from overﬁtting. The Journal of Machine Learning Research , 15(1):1929–1958, 2014. Valentin Thomas, Fa"
          },
          {
            "paper_id": "2101.02323",
            "value": 5.0,
            "snippet": "t when acquiring training data for deep learning [5]. Primary applications for active learn"
          },
          {
            "paper_id": "2101.02323",
            "value": 2.0,
            "snippet": ". Queries and concept learning. Machine learning , 2(4):319{342, 1988. [2] Jordan T Ash and"
          }
        ],
        "signal": "contradiction"
      },
      {
        "benchmark": "where",
        "spread": 1.0,
        "min": 0.1,
        "max": 500.0,
        "papers": [
          "2006.04439",
          "2008.11865",
          "2104.09864",
          "2207.00729",
          "2212.08228",
          "2305.14858",
          "2405.04134",
          "2405.13407",
          "2405.18781",
          "2406.13215",
          "2407.03548",
          "2409.15161",
          "2410.14574",
          "2502.18845",
          "2503.23007",
          "2504.13990",
          "2506.01372",
          "2509.10025",
          "2510.16411",
          "2511.04583",
          "2511.09146",
          "2511.10566",
          "2512.07011",
          "2602.18849",
          "2604.09742",
          "2606.04455"
        ],
        "samples": [
          {
            "paper_id": "2006.04439",
            "value": 1.0,
            "snippet": "i(t) i+mX j=1wijf(uj(t)) +Ii(t); (S16) where iis the time-constant, wijare the weights, Ii(t)is the input, and fis aC1-sigmoid function ( f(x) = 1=(1 +exp(\u0000x)"
          },
          {
            "paper_id": "2006.04439",
            "value": 1.0,
            "snippet": "as follows: G(z) =\u00001 sysz+Wf(z); (S49) where z= (x1;:::;xn;y1;:::;yn). Then the dynamical s"
          },
          {
            "paper_id": "2008.11865",
            "value": 1.0,
            "snippet": "is indexed by a three-tuple, (i;c;c0), where 1\u0014i\u0014N runs across the indices of examples"
          },
          {
            "paper_id": "2008.11865",
            "value": 2.0,
            "snippet": "given by Vcross= Ave c;c0zc;c0z> c;c0; where the cross-class mean deviations zc;c02Rpare deﬁned as follows: zc;c0=vc;c0\u0000vc:"
          },
          {
            "paper_id": "2008.11865",
            "value": 2.0,
            "snippet": "y Vwithin = Ave i;c;c0zi;c;c0z> i;c;c0; where the replication deviations zi;c;c02RDare deﬁned as follows: zi;c;c0=vi;c;c0"
          },
          {
            "paper_id": "2008.11865",
            "value": 4.0,
            "snippet": "e can write gi;c;c0=hi;c \u000ei;c;c0; (7.5) where denotes the Khatri-Rao4product, which computes in the l’th laye"
          }
        ],
        "signal": "contradiction"
      }
    ]
  },
  "product_health": {
    "product_state": "useful-draft",
    "readiness": "blocked",
    "audit_status": "needs work",
    "correctness_band": "mid",
    "novelty_band": "high",
    "value_band": "high",
    "proof_band": "low",
    "time_to_first_evidence_seconds": 0.0,
    "time_to_paper_draft_seconds": 6.0,
    "blockers": [
      "submission_readiness blocked",
      "demo proof below product floor"
    ]
  },
  "manifest": "workspace/cs-ai/research-harnesses/production_run.json",
  "audit_updated_at": "2026-06-05T10:03:39+00:00"
}