mirror of
https://github.com/sigp/lighthouse.git
synced 2026-03-19 21:04:41 +00:00
deneb related logging improvements (#4859)
1. Add commitments to logs and update the `Display` implementation of `KzgCommitment` to become truncated similarly to block root.
I've been finding it difficult to debug scenarios involving multiple blobs for the same `(index, block_root)`. Logging the commitment will help with this, we can match it to what exists in the block.
Example output:
```
Oct 20 21:13:36.700 DEBG Successfully verified gossip blob commitment: 0xa3c1…1cd8, index: 0, root: 0xf31e…f9de, slot: 154568
Oct 20 21:13:36.785 DEBG Successfully verified gossip block commitments: [0xa3c1…1cd8, 0x8655…02ff, 0x8d6a…955a, 0x84ac…3a1b, 0x9752…629b, 0xb9fc…20fb], root: 0xf31eeb732702e429e89057b15e1c0c631e8452e09e03cb1924353f536ef4f9de, slot: 154568, graffiti: teku/besu, service: beacon
```
Example output in a block with no blobs (this will show up pre-deneb):
```
426734:Oct 20 21:15:24.113 DEBG Successfully verified gossip block, commitments: [], root: 0x619db1360ba0e8d44ae2a0f2450ebca47e167191feecffcfac0e8d7b6c39623c, slot: 154577, graffiti: teku/nethermind, service: beacon, module: beacon_chain::beacon_chain:2765
```
2. Remove `strum::IntoStaticStr` from `AvailabilityCheckError`. This is because `IntoStaticStr` end up dropping information inside the enum. So kzg commitments in this error are dropped, making it more difficult to debug
```
AvailabilityCheckError::KzgCommitmentMismatch {
blob_commitment: KzgCommitment,
block_commitment: KzgCommitment,
},
```
which is output as just `AvailabilityCheckError`
3. Some additional misc sync logs I found useful in debugging https://github.com/sigp/lighthouse/pull/4869
4. This downgrades ”Block returned for single block lookup not present” to debug because I don’t think we can fix the scenario that causes this unless we can cancel inflight rpc requests
Co-authored-by: realbigsean <seananderson33@gmail.com>
This commit is contained in:
@@ -330,7 +330,9 @@ impl<T: BeaconChainTypes> BlockLookups<T> {
|
||||
|
||||
let Some(lookup) = self.get_single_lookup::<R>(lookup_id) else {
|
||||
if response.is_some() {
|
||||
warn!(
|
||||
// We don't have the ability to cancel in-flight RPC requests. So this can happen
|
||||
// if we started this RPC request, and later saw the block/blobs via gossip.
|
||||
debug!(
|
||||
self.log,
|
||||
"Block returned for single block lookup not present";
|
||||
"response_type" => ?response_type,
|
||||
|
||||
@@ -30,7 +30,7 @@ impl<T: EthSpec> BlocksAndBlobsRequestInfo<T> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_responses(self) -> Result<Vec<RpcBlock<T>>, &'static str> {
|
||||
pub fn into_responses(self) -> Result<Vec<RpcBlock<T>>, String> {
|
||||
let BlocksAndBlobsRequestInfo {
|
||||
accumulated_blocks,
|
||||
accumulated_sidecars,
|
||||
@@ -50,28 +50,28 @@ impl<T: EthSpec> BlocksAndBlobsRequestInfo<T> {
|
||||
.unwrap_or(false);
|
||||
pair_next_blob
|
||||
} {
|
||||
blob_list.push(blob_iter.next().ok_or("Missing next blob")?);
|
||||
blob_list.push(blob_iter.next().ok_or("Missing next blob".to_string())?);
|
||||
}
|
||||
|
||||
let mut blobs_buffer = vec![None; T::max_blobs_per_block()];
|
||||
for blob in blob_list {
|
||||
let blob_index = blob.index as usize;
|
||||
let Some(blob_opt) = blobs_buffer.get_mut(blob_index) else {
|
||||
return Err("Invalid blob index");
|
||||
return Err("Invalid blob index".to_string());
|
||||
};
|
||||
if blob_opt.is_some() {
|
||||
return Err("Repeat blob index");
|
||||
return Err("Repeat blob index".to_string());
|
||||
} else {
|
||||
*blob_opt = Some(blob);
|
||||
}
|
||||
}
|
||||
let blobs = VariableList::from(blobs_buffer.into_iter().flatten().collect::<Vec<_>>());
|
||||
responses.push(RpcBlock::new(None, block, Some(blobs))?)
|
||||
responses.push(RpcBlock::new(None, block, Some(blobs)).map_err(|e| format!("{e:?}"))?)
|
||||
}
|
||||
|
||||
// if accumulated sidecars is not empty, throw an error.
|
||||
if blob_iter.next().is_some() {
|
||||
return Err("Received sidecars that don't pair well");
|
||||
return Err("Received sidecars that don't pair well".to_string());
|
||||
}
|
||||
|
||||
Ok(responses)
|
||||
|
||||
@@ -1034,8 +1034,11 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
// inform range that the request needs to be treated as failed
|
||||
// With time we will want to downgrade this log
|
||||
warn!(
|
||||
self.log, "Blocks and blobs request for range received invalid data";
|
||||
"peer_id" => %peer_id, "batch_id" => resp.batch_id, "error" => e
|
||||
self.log,
|
||||
"Blocks and blobs request for range received invalid data";
|
||||
"peer_id" => %peer_id,
|
||||
"batch_id" => resp.batch_id,
|
||||
"error" => e.clone()
|
||||
);
|
||||
let id = RequestId::RangeBlockAndBlobs { id };
|
||||
self.network.report_peer(
|
||||
@@ -1043,7 +1046,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
PeerAction::MidToleranceError,
|
||||
"block_blob_faulty_batch",
|
||||
);
|
||||
self.inject_error(peer_id, id, RPCError::InvalidData(e.into()))
|
||||
self.inject_error(peer_id, id, RPCError::InvalidData(e))
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1091,7 +1094,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
// With time we will want to downgrade this log
|
||||
warn!(
|
||||
self.log, "Blocks and blobs request for backfill received invalid data";
|
||||
"peer_id" => %peer_id, "batch_id" => resp.batch_id, "error" => e
|
||||
"peer_id" => %peer_id, "batch_id" => resp.batch_id, "error" => e.clone()
|
||||
);
|
||||
let id = RequestId::BackFillBlockAndBlobs { id };
|
||||
self.network.report_peer(
|
||||
@@ -1099,7 +1102,7 @@ impl<T: BeaconChainTypes> SyncManager<T> {
|
||||
PeerAction::MidToleranceError,
|
||||
"block_blob_faulty_backfill_batch",
|
||||
);
|
||||
self.inject_error(peer_id, id, RPCError::InvalidData(e.into()))
|
||||
self.inject_error(peer_id, id, RPCError::InvalidData(e))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ use types::{BlobSidecar, EthSpec, SignedBeaconBlock};
|
||||
|
||||
pub struct BlocksAndBlobsByRangeResponse<T: EthSpec> {
|
||||
pub batch_id: BatchId,
|
||||
pub responses: Result<Vec<RpcBlock<T>>, &'static str>,
|
||||
pub responses: Result<Vec<RpcBlock<T>>, String>,
|
||||
}
|
||||
|
||||
pub struct BlocksAndBlobsByRangeRequest<T: EthSpec> {
|
||||
@@ -338,12 +338,26 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
||||
request_id: Id,
|
||||
batch_type: ByRangeRequestType,
|
||||
) -> Option<(ChainId, BatchId)> {
|
||||
match batch_type {
|
||||
let req = match batch_type {
|
||||
ByRangeRequestType::BlocksAndBlobs => self
|
||||
.range_blocks_and_blobs_requests
|
||||
.remove(&request_id)
|
||||
.map(|req| (req.chain_id, req.batch_id)),
|
||||
ByRangeRequestType::Blocks => self.range_requests.remove(&request_id),
|
||||
};
|
||||
if let Some(req) = req {
|
||||
debug!(
|
||||
self.log,
|
||||
"Range sync request failed";
|
||||
"request_id" => request_id,
|
||||
"batch_type" => ?batch_type,
|
||||
"chain_id" => ?req.0,
|
||||
"batch_id" => ?req.1
|
||||
);
|
||||
Some(req)
|
||||
} else {
|
||||
debug!(self.log, "Range sync request failed"; "request_id" => request_id, "batch_type" => ?batch_type);
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
@@ -352,12 +366,25 @@ impl<T: BeaconChainTypes> SyncNetworkContext<T> {
|
||||
request_id: Id,
|
||||
batch_type: ByRangeRequestType,
|
||||
) -> Option<BatchId> {
|
||||
match batch_type {
|
||||
let batch_id = match batch_type {
|
||||
ByRangeRequestType::BlocksAndBlobs => self
|
||||
.backfill_blocks_and_blobs_requests
|
||||
.remove(&request_id)
|
||||
.map(|(batch_id, _info)| batch_id),
|
||||
ByRangeRequestType::Blocks => self.backfill_requests.remove(&request_id),
|
||||
};
|
||||
if let Some(batch_id) = batch_id {
|
||||
debug!(
|
||||
self.log,
|
||||
"Backfill sync request failed";
|
||||
"request_id" => request_id,
|
||||
"batch_type" => ?batch_type,
|
||||
"batch_id" => ?batch_id
|
||||
);
|
||||
Some(batch_id)
|
||||
} else {
|
||||
debug!(self.log, "Backfill sync request failed"; "request_id" => request_id, "batch_type" => ?batch_type);
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -827,9 +827,24 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
// sending an error /timeout) if the peer is removed from the chain for other
|
||||
// reasons. Check that this block belongs to the expected peer
|
||||
if !batch.is_expecting_block(peer_id, &request_id) {
|
||||
debug!(
|
||||
self.log,
|
||||
"Batch not expecting block";
|
||||
"batch_epoch" => batch_id,
|
||||
"batch_state" => ?batch.state(),
|
||||
"peer_id" => %peer_id,
|
||||
"request_id" => %request_id
|
||||
);
|
||||
return Ok(KeepChain);
|
||||
}
|
||||
debug!(self.log, "Batch failed. RPC Error"; "batch_epoch" => batch_id);
|
||||
debug!(
|
||||
self.log,
|
||||
"Batch failed. RPC Error";
|
||||
"batch_epoch" => batch_id,
|
||||
"batch_state" => ?batch.state(),
|
||||
"peer_id" => %peer_id,
|
||||
"request_id" => %request_id
|
||||
);
|
||||
if let Some(active_requests) = self.peers.get_mut(peer_id) {
|
||||
active_requests.remove(&batch_id);
|
||||
}
|
||||
@@ -841,6 +856,13 @@ impl<T: BeaconChainTypes> SyncingChain<T> {
|
||||
}
|
||||
self.retry_batch_download(network, batch_id)
|
||||
} else {
|
||||
debug!(
|
||||
self.log,
|
||||
"Batch not found";
|
||||
"batch_epoch" => batch_id,
|
||||
"peer_id" => %peer_id,
|
||||
"request_id" => %request_id
|
||||
);
|
||||
// this could be an error for an old batch, removed when the chain advances
|
||||
Ok(KeepChain)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user