Improve client disconnect handling (#2068)

* xtask/run: Use varargs when run with `-data-dir`

Previously any additional arguments passed on the command line were
ignored. Now they are appended to `cargo run ...` as documented.

* server/os_i_o: Improve error message when IPC dies

and display the last send/recv error to the user instead of a generic
"Buffer full" message.

* server/lib: Log error in `send_to_client!`

so we will know when an error occured while trying to send a message to
the client. The most likely cause for this is that the client buffer
filled up and hence we cannot send any new messages. While we still
disconnect the client as before, now we also write a log message that
explains the situation.

* utils/channel: Apply rustfmt

* server/lib: Detect when client responds too slow

and log a message before disconnecting it.

* server/os_i_o: Add retry queue to client senders

that is dynamically allocated on-demand and stores `ServerToClientMsg`
in case the regular IPC channel is currently full. This acts as a
dynamic buffer to hold and buffer messages for a while until the client
hopefully catches up.

Also write a message to the log to indicate when the client is
recognized to be too slow in handling server messages.

* server: apply rustfmt

* utils/ipc: Add session name to "Disconnect" error

* utils/ipc: Fix error message indent

* server/os_i_o: Undo IPC channel extension

via `Vec` and drastically increase the IPC message queue size instead.
Measurements didn't discover a drastic increase in RAM caused by this,
and it is a much easier fix for the problem at hand.

* CHANGELOG: Add PR #2068
This commit is contained in:
har7an 2023-01-22 18:04:40 +00:00 committed by GitHub
parent b274fc5ab1
commit beddfb77a8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 74 additions and 9 deletions

View file

@ -17,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
* errors: Remove more `unwrwap`s from server code (https://github.com/zellij-org/zellij/pull/2069)
* fix: support UTF-8 character in tab name and pane name (https://github.com/zellij-org/zellij/pull/2102)
* fix: handle missing/inaccessible cache directory (https://github.com/zellij-org/zellij/pull/2093)
* errors: Improve client disconnect handling (https://github.com/zellij-org/zellij/pull/2068)
## [0.34.4] - 2022-12-13

View file

@ -104,6 +104,7 @@ pub fn run(sh: &Shell, flags: flags::Run) -> anyhow::Result<()> {
.arg("--no-default-features")
.args(["--features", "disable_automatic_asset_installation"])
.args(["--", "--data-dir", &format!("{}", data_dir.display())])
.args(&flags.args)
.run()
.map_err(anyhow::Error::new)
})

View file

@ -42,7 +42,7 @@ use zellij_utils::{
cli::CliArgs,
consts::{DEFAULT_SCROLL_BUFFER_SIZE, SCROLL_BUFFER_SIZE},
data::{Event, PluginCapabilities},
errors::{ContextType, ErrorInstruction, FatalError, ServerContext},
errors::{prelude::*, ContextType, ErrorInstruction, FatalError, ServerContext},
input::{
command::{RunCommand, TerminalAction},
get_mode_info,
@ -150,7 +150,21 @@ macro_rules! remove_client {
macro_rules! send_to_client {
($client_id:expr, $os_input:expr, $msg:expr, $session_state:expr) => {
let send_to_client_res = $os_input.send_to_client($client_id, $msg);
if let Err(_) = send_to_client_res {
if let Err(e) = send_to_client_res {
// Try to recover the message
let context = match e.downcast_ref::<ZellijError>() {
Some(ZellijError::ClientTooSlow { .. }) => {
format!(
"client {} is processing server messages too slow",
$client_id
)
},
_ => {
format!("failed to route server message to client {}", $client_id)
},
};
// Log it so it isn't lost
Err::<(), _>(e).context(context).non_fatal();
// failed to send to client, remove it
remove_client!($client_id, $os_input, $session_state);
}
@ -552,6 +566,9 @@ pub fn start_server(mut os_input: Box<dyn ServerOsApi>, socket_path: PathBuf) {
// If `None`- Send an exit instruction. This is the case when a user closes the last Tab/Pane.
if let Some(output) = &serialized_output {
for (client_id, client_render_instruction) in output.iter() {
// TODO: When a client is too slow or unresponsive, the channel fills up
// and this call will disconnect the client in turn. Should this be
// changed?
send_to_client!(
*client_id,
os_input,

View file

@ -14,6 +14,7 @@ use signal_hook::consts::*;
use sysinfo::{ProcessExt, ProcessRefreshKind, System, SystemExt};
use zellij_utils::{
async_std, channels,
channels::TrySendError,
data::Palette,
errors::prelude::*,
input::command::{RunCommand, TerminalAction},
@ -343,15 +344,14 @@ struct ClientSender {
impl ClientSender {
pub fn new(client_id: ClientId, mut sender: IpcSenderWithContext<ServerToClientMsg>) -> Self {
let (client_buffer_sender, client_buffer_receiver) = channels::bounded(50);
let (client_buffer_sender, client_buffer_receiver) = channels::bounded(5000);
std::thread::spawn(move || {
let err_context = || format!("failed to send message to client {client_id}");
for msg in client_buffer_receiver.iter() {
let _ = sender.send(msg).with_context(err_context);
sender.send(msg).with_context(err_context).non_fatal();
}
let _ = sender.send(ServerToClientMsg::Exit(ExitReason::Error(
"Buffer full".to_string(),
)));
// If we're here, the message buffer is broken for some reason
let _ = sender.send(ServerToClientMsg::Exit(ExitReason::Disconnect));
});
ClientSender {
client_id,
@ -359,9 +359,24 @@ impl ClientSender {
}
}
pub fn send_or_buffer(&self, msg: ServerToClientMsg) -> Result<()> {
let err_context = || format!("Client {} send buffer full", self.client_id);
let err_context = || {
format!(
"failed to send or buffer message for client {}",
self.client_id
)
};
self.client_buffer_sender
.try_send(msg)
.or_else(|err| {
if let TrySendError::Full(_) = err {
log::warn!(
"client {} is processing server messages too slow",
self.client_id
);
}
Err(err)
})
.with_context(err_context)
}
}

View file

@ -4,7 +4,9 @@ use async_std::task_local;
use std::cell::RefCell;
use crate::errors::{get_current_ctx, ErrorContext};
pub use crossbeam::channel::{bounded, unbounded, Receiver, RecvError, Select, SendError, Sender};
pub use crossbeam::channel::{
bounded, unbounded, Receiver, RecvError, Select, SendError, Sender, TrySendError,
};
/// An [MPSC](mpsc) asynchronous channel with added error context.
pub type ChannelWithContext<T> = (Sender<(T, ErrorContext)>, Receiver<(T, ErrorContext)>);

View file

@ -469,6 +469,9 @@ open an issue on GitHub:
#[error("an error occured")]
GenericError { source: anyhow::Error },
#[error("Client {client_id} is too slow to handle incoming messages")]
ClientTooSlow { client_id: u16 },
}
#[cfg(not(target_family = "wasm"))]

View file

@ -117,6 +117,7 @@ pub enum ExitReason {
NormalDetached,
ForceDetached,
CannotAttach,
Disconnect,
Error(String),
}
@ -133,6 +134,31 @@ impl Display for ExitReason {
f,
"Session attached to another client. Use --force flag to force connect."
),
Self::Disconnect => {
let session_tip = match crate::envs::get_session_name() {
Ok(name) => format!("`zellij attach {}`", name),
Err(_) => "see `zellij ls` and `zellij attach`".to_string(),
};
write!(
f,
"
Your zellij client lost connection to the zellij server.
As a safety measure, you have been disconnected from the current zellij session.
However, the session should still exist and none of your data should be lost.
This usually means that your terminal didn't process server messages quick
enough. Maybe your system is currently under high load, or your terminal
isn't performant enough.
There are a few things you can try now:
- Reattach to your previous session and see if it works out better this
time: {session_tip}
- Try using a faster terminal. GPU-accelerated terminals such as Kitty
or Alacritty are cross-platform and known to work well with zellij.
"
)
},
Self::Error(e) => write!(f, "Error occurred in server:\n{}", e),
}
}