Improve client disconnect handling (#2068)
* xtask/run: Use varargs when run with `-data-dir` Previously any additional arguments passed on the command line were ignored. Now they are appended to `cargo run ...` as documented. * server/os_i_o: Improve error message when IPC dies and display the last send/recv error to the user instead of a generic "Buffer full" message. * server/lib: Log error in `send_to_client!` so we will know when an error occured while trying to send a message to the client. The most likely cause for this is that the client buffer filled up and hence we cannot send any new messages. While we still disconnect the client as before, now we also write a log message that explains the situation. * utils/channel: Apply rustfmt * server/lib: Detect when client responds too slow and log a message before disconnecting it. * server/os_i_o: Add retry queue to client senders that is dynamically allocated on-demand and stores `ServerToClientMsg` in case the regular IPC channel is currently full. This acts as a dynamic buffer to hold and buffer messages for a while until the client hopefully catches up. Also write a message to the log to indicate when the client is recognized to be too slow in handling server messages. * server: apply rustfmt * utils/ipc: Add session name to "Disconnect" error * utils/ipc: Fix error message indent * server/os_i_o: Undo IPC channel extension via `Vec` and drastically increase the IPC message queue size instead. Measurements didn't discover a drastic increase in RAM caused by this, and it is a much easier fix for the problem at hand. * CHANGELOG: Add PR #2068
This commit is contained in:
parent
b274fc5ab1
commit
beddfb77a8
7 changed files with 74 additions and 9 deletions
|
|
@ -17,6 +17,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
|
||||||
* errors: Remove more `unwrwap`s from server code (https://github.com/zellij-org/zellij/pull/2069)
|
* errors: Remove more `unwrwap`s from server code (https://github.com/zellij-org/zellij/pull/2069)
|
||||||
* fix: support UTF-8 character in tab name and pane name (https://github.com/zellij-org/zellij/pull/2102)
|
* fix: support UTF-8 character in tab name and pane name (https://github.com/zellij-org/zellij/pull/2102)
|
||||||
* fix: handle missing/inaccessible cache directory (https://github.com/zellij-org/zellij/pull/2093)
|
* fix: handle missing/inaccessible cache directory (https://github.com/zellij-org/zellij/pull/2093)
|
||||||
|
* errors: Improve client disconnect handling (https://github.com/zellij-org/zellij/pull/2068)
|
||||||
|
|
||||||
## [0.34.4] - 2022-12-13
|
## [0.34.4] - 2022-12-13
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -104,6 +104,7 @@ pub fn run(sh: &Shell, flags: flags::Run) -> anyhow::Result<()> {
|
||||||
.arg("--no-default-features")
|
.arg("--no-default-features")
|
||||||
.args(["--features", "disable_automatic_asset_installation"])
|
.args(["--features", "disable_automatic_asset_installation"])
|
||||||
.args(["--", "--data-dir", &format!("{}", data_dir.display())])
|
.args(["--", "--data-dir", &format!("{}", data_dir.display())])
|
||||||
|
.args(&flags.args)
|
||||||
.run()
|
.run()
|
||||||
.map_err(anyhow::Error::new)
|
.map_err(anyhow::Error::new)
|
||||||
})
|
})
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,7 @@ use zellij_utils::{
|
||||||
cli::CliArgs,
|
cli::CliArgs,
|
||||||
consts::{DEFAULT_SCROLL_BUFFER_SIZE, SCROLL_BUFFER_SIZE},
|
consts::{DEFAULT_SCROLL_BUFFER_SIZE, SCROLL_BUFFER_SIZE},
|
||||||
data::{Event, PluginCapabilities},
|
data::{Event, PluginCapabilities},
|
||||||
errors::{ContextType, ErrorInstruction, FatalError, ServerContext},
|
errors::{prelude::*, ContextType, ErrorInstruction, FatalError, ServerContext},
|
||||||
input::{
|
input::{
|
||||||
command::{RunCommand, TerminalAction},
|
command::{RunCommand, TerminalAction},
|
||||||
get_mode_info,
|
get_mode_info,
|
||||||
|
|
@ -150,7 +150,21 @@ macro_rules! remove_client {
|
||||||
macro_rules! send_to_client {
|
macro_rules! send_to_client {
|
||||||
($client_id:expr, $os_input:expr, $msg:expr, $session_state:expr) => {
|
($client_id:expr, $os_input:expr, $msg:expr, $session_state:expr) => {
|
||||||
let send_to_client_res = $os_input.send_to_client($client_id, $msg);
|
let send_to_client_res = $os_input.send_to_client($client_id, $msg);
|
||||||
if let Err(_) = send_to_client_res {
|
if let Err(e) = send_to_client_res {
|
||||||
|
// Try to recover the message
|
||||||
|
let context = match e.downcast_ref::<ZellijError>() {
|
||||||
|
Some(ZellijError::ClientTooSlow { .. }) => {
|
||||||
|
format!(
|
||||||
|
"client {} is processing server messages too slow",
|
||||||
|
$client_id
|
||||||
|
)
|
||||||
|
},
|
||||||
|
_ => {
|
||||||
|
format!("failed to route server message to client {}", $client_id)
|
||||||
|
},
|
||||||
|
};
|
||||||
|
// Log it so it isn't lost
|
||||||
|
Err::<(), _>(e).context(context).non_fatal();
|
||||||
// failed to send to client, remove it
|
// failed to send to client, remove it
|
||||||
remove_client!($client_id, $os_input, $session_state);
|
remove_client!($client_id, $os_input, $session_state);
|
||||||
}
|
}
|
||||||
|
|
@ -552,6 +566,9 @@ pub fn start_server(mut os_input: Box<dyn ServerOsApi>, socket_path: PathBuf) {
|
||||||
// If `None`- Send an exit instruction. This is the case when a user closes the last Tab/Pane.
|
// If `None`- Send an exit instruction. This is the case when a user closes the last Tab/Pane.
|
||||||
if let Some(output) = &serialized_output {
|
if let Some(output) = &serialized_output {
|
||||||
for (client_id, client_render_instruction) in output.iter() {
|
for (client_id, client_render_instruction) in output.iter() {
|
||||||
|
// TODO: When a client is too slow or unresponsive, the channel fills up
|
||||||
|
// and this call will disconnect the client in turn. Should this be
|
||||||
|
// changed?
|
||||||
send_to_client!(
|
send_to_client!(
|
||||||
*client_id,
|
*client_id,
|
||||||
os_input,
|
os_input,
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@ use signal_hook::consts::*;
|
||||||
use sysinfo::{ProcessExt, ProcessRefreshKind, System, SystemExt};
|
use sysinfo::{ProcessExt, ProcessRefreshKind, System, SystemExt};
|
||||||
use zellij_utils::{
|
use zellij_utils::{
|
||||||
async_std, channels,
|
async_std, channels,
|
||||||
|
channels::TrySendError,
|
||||||
data::Palette,
|
data::Palette,
|
||||||
errors::prelude::*,
|
errors::prelude::*,
|
||||||
input::command::{RunCommand, TerminalAction},
|
input::command::{RunCommand, TerminalAction},
|
||||||
|
|
@ -343,15 +344,14 @@ struct ClientSender {
|
||||||
|
|
||||||
impl ClientSender {
|
impl ClientSender {
|
||||||
pub fn new(client_id: ClientId, mut sender: IpcSenderWithContext<ServerToClientMsg>) -> Self {
|
pub fn new(client_id: ClientId, mut sender: IpcSenderWithContext<ServerToClientMsg>) -> Self {
|
||||||
let (client_buffer_sender, client_buffer_receiver) = channels::bounded(50);
|
let (client_buffer_sender, client_buffer_receiver) = channels::bounded(5000);
|
||||||
std::thread::spawn(move || {
|
std::thread::spawn(move || {
|
||||||
let err_context = || format!("failed to send message to client {client_id}");
|
let err_context = || format!("failed to send message to client {client_id}");
|
||||||
for msg in client_buffer_receiver.iter() {
|
for msg in client_buffer_receiver.iter() {
|
||||||
let _ = sender.send(msg).with_context(err_context);
|
sender.send(msg).with_context(err_context).non_fatal();
|
||||||
}
|
}
|
||||||
let _ = sender.send(ServerToClientMsg::Exit(ExitReason::Error(
|
// If we're here, the message buffer is broken for some reason
|
||||||
"Buffer full".to_string(),
|
let _ = sender.send(ServerToClientMsg::Exit(ExitReason::Disconnect));
|
||||||
)));
|
|
||||||
});
|
});
|
||||||
ClientSender {
|
ClientSender {
|
||||||
client_id,
|
client_id,
|
||||||
|
|
@ -359,9 +359,24 @@ impl ClientSender {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn send_or_buffer(&self, msg: ServerToClientMsg) -> Result<()> {
|
pub fn send_or_buffer(&self, msg: ServerToClientMsg) -> Result<()> {
|
||||||
let err_context = || format!("Client {} send buffer full", self.client_id);
|
let err_context = || {
|
||||||
|
format!(
|
||||||
|
"failed to send or buffer message for client {}",
|
||||||
|
self.client_id
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
self.client_buffer_sender
|
self.client_buffer_sender
|
||||||
.try_send(msg)
|
.try_send(msg)
|
||||||
|
.or_else(|err| {
|
||||||
|
if let TrySendError::Full(_) = err {
|
||||||
|
log::warn!(
|
||||||
|
"client {} is processing server messages too slow",
|
||||||
|
self.client_id
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Err(err)
|
||||||
|
})
|
||||||
.with_context(err_context)
|
.with_context(err_context)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,9 @@ use async_std::task_local;
|
||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
|
|
||||||
use crate::errors::{get_current_ctx, ErrorContext};
|
use crate::errors::{get_current_ctx, ErrorContext};
|
||||||
pub use crossbeam::channel::{bounded, unbounded, Receiver, RecvError, Select, SendError, Sender};
|
pub use crossbeam::channel::{
|
||||||
|
bounded, unbounded, Receiver, RecvError, Select, SendError, Sender, TrySendError,
|
||||||
|
};
|
||||||
|
|
||||||
/// An [MPSC](mpsc) asynchronous channel with added error context.
|
/// An [MPSC](mpsc) asynchronous channel with added error context.
|
||||||
pub type ChannelWithContext<T> = (Sender<(T, ErrorContext)>, Receiver<(T, ErrorContext)>);
|
pub type ChannelWithContext<T> = (Sender<(T, ErrorContext)>, Receiver<(T, ErrorContext)>);
|
||||||
|
|
|
||||||
|
|
@ -469,6 +469,9 @@ open an issue on GitHub:
|
||||||
|
|
||||||
#[error("an error occured")]
|
#[error("an error occured")]
|
||||||
GenericError { source: anyhow::Error },
|
GenericError { source: anyhow::Error },
|
||||||
|
|
||||||
|
#[error("Client {client_id} is too slow to handle incoming messages")]
|
||||||
|
ClientTooSlow { client_id: u16 },
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(not(target_family = "wasm"))]
|
#[cfg(not(target_family = "wasm"))]
|
||||||
|
|
|
||||||
|
|
@ -117,6 +117,7 @@ pub enum ExitReason {
|
||||||
NormalDetached,
|
NormalDetached,
|
||||||
ForceDetached,
|
ForceDetached,
|
||||||
CannotAttach,
|
CannotAttach,
|
||||||
|
Disconnect,
|
||||||
Error(String),
|
Error(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -133,6 +134,31 @@ impl Display for ExitReason {
|
||||||
f,
|
f,
|
||||||
"Session attached to another client. Use --force flag to force connect."
|
"Session attached to another client. Use --force flag to force connect."
|
||||||
),
|
),
|
||||||
|
Self::Disconnect => {
|
||||||
|
let session_tip = match crate::envs::get_session_name() {
|
||||||
|
Ok(name) => format!("`zellij attach {}`", name),
|
||||||
|
Err(_) => "see `zellij ls` and `zellij attach`".to_string(),
|
||||||
|
};
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"
|
||||||
|
Your zellij client lost connection to the zellij server.
|
||||||
|
|
||||||
|
As a safety measure, you have been disconnected from the current zellij session.
|
||||||
|
However, the session should still exist and none of your data should be lost.
|
||||||
|
|
||||||
|
This usually means that your terminal didn't process server messages quick
|
||||||
|
enough. Maybe your system is currently under high load, or your terminal
|
||||||
|
isn't performant enough.
|
||||||
|
|
||||||
|
There are a few things you can try now:
|
||||||
|
- Reattach to your previous session and see if it works out better this
|
||||||
|
time: {session_tip}
|
||||||
|
- Try using a faster terminal. GPU-accelerated terminals such as Kitty
|
||||||
|
or Alacritty are cross-platform and known to work well with zellij.
|
||||||
|
"
|
||||||
|
)
|
||||||
|
},
|
||||||
Self::Error(e) => write!(f, "Error occurred in server:\n{}", e),
|
Self::Error(e) => write!(f, "Error occurred in server:\n{}", e),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue