Multi threading vs multi processing


  • Multithreading: Many small alternating tasks → looks like a checkerboard pattern.
  • Multiprocessing: Few long blocks per worker → each process hogs its slice until done.



  • Threading: runs 80 small tasks (short sleep) → gives many short alternating bars.
  • Processing: runs 1 long task per process → gives big contiguous blocks.
  • CPU Usage: shown in third subplot to compare how system cores are actually loaded.





import time

import random

import threading

import multiprocessing as mp

import matplotlib.pyplot as plt

import psutil


# -----------------------------

# Worker function

# -----------------------------

def worker(task_id, sleep_time, results, lock):

  start = time.perf_counter() * 1000 # ms

  time.sleep(sleep_time)       # simulate work

  end = time.perf_counter() * 1000

  with lock:

    results.append((task_id, start, end - start))



# -----------------------------

# Multithreading: many small tasks

# -----------------------------

def run_multithreading(num_threads=4, num_tasks=80):

  results = []

  lock = threading.Lock()

  threads = []


  for i in range(num_tasks):

    # short bursts, interleaved

    t = threading.Thread(

      target=worker,

      args=(i % num_threads, random.uniform(0.005, 0.02), results, lock)

    )

    threads.append(t)

    t.start()


  for t in threads:

    t.join()


  return results



# -----------------------------

# Multiprocessing: few long tasks

# -----------------------------

def run_multiprocessing(num_procs=4):

  manager = mp.Manager()

  results = manager.list()

  lock = manager.Lock()

  procs = []


  for i in range(num_procs):

    # each process does a single long job

    p = mp.Process(

      target=worker,

      args=(i, random.uniform(0.2, 0.4), results, lock)

    )

    procs.append(p)

    p.start()


  for p in procs:

    p.join()


  return list(results)



# -----------------------------

# Collect CPU usage

# -----------------------------

def collect_cpu_usage(duration=2, interval=0.05):

  cpu_data = []

  timestamps = []

  start = time.perf_counter()

  while time.perf_counter() - start < duration:

    usage = psutil.cpu_percent(percpu=True)

    cpu_data.append(usage)

    timestamps.append((time.perf_counter() - start) * 1000) # ms

    time.sleep(interval)

  return timestamps, cpu_data



# -----------------------------

# Plotting

# -----------------------------

def plot_results(results, mode="threading", cpu_timestamps=None, cpu_data=None):

  colors = ['#7fcfd4', '#fff29b', '#c8c0ff', '#ff8f80']

  num_workers = len(set(r[0] for r in results))


  fig, axs = plt.subplots(1, 3, figsize=(16, 5))


  # --- Timeline ---

  for task_id, start, dur in results:

    axs[0].broken_barh(

      [(start, dur)], (task_id + 0.1, 0.8),

      facecolors=colors[task_id % len(colors)]

    )

  axs[0].set_xlabel("time (ms)")

  axs[0].set_ylabel("worker")

  axs[0].set_yticks(range(num_workers))

  axs[0].set_title(f"{mode} timeline")

  axs[0].grid(True, linestyle=":", alpha=0.5)


  # --- Work totals ---

  totals = {}

  for task_id, _, dur in results:

    totals[task_id] = totals.get(task_id, 0) + dur

  axs[1].bar(

    list(totals.keys()),

    list(totals.values()),

    color=[colors[k % len(colors)] for k in totals.keys()],

    edgecolor="k"

  )

  axs[1].set_xlabel("worker")

  axs[1].set_ylabel("time (ms)")

  axs[1].set_title(f"{mode} total work")


  # --- CPU usage ---

  if cpu_timestamps and cpu_data:

    for core in range(len(cpu_data[0])):

      core_usage = [row[core] for row in cpu_data]

      axs[2].plot(cpu_timestamps, core_usage, label=f"core {core}")

    axs[2].set_xlabel("time (ms)")

    axs[2].set_ylabel("CPU %")

    axs[2].set_title("CPU utilization")

    axs[2].legend(fontsize="x-small", ncol=2)

  else:

    axs[2].set_visible(False)


  plt.tight_layout()

  plt.show()



# -----------------------------

# Main

# -----------------------------

if __name__ == "__main__":

  # --- Multithreading: many small jobs ---

  print("Running multithreading...")

  cpu_t, cpu_d = collect_cpu_usage(duration=2)

  thread_results = run_multithreading(num_threads=4, num_tasks=80)

  plot_results(thread_results, mode="threading", cpu_timestamps=cpu_t, cpu_data=cpu_d)


  # --- Multiprocessing: few long jobs ---

  print("Running multiprocessing...")

  cpu_t, cpu_d = collect_cpu_usage(duration=2)

  proc_results = run_multiprocessing(num_procs=4)

  plot_results(proc_results, mode="multiprocessing", cpu_timestamps=cpu_t, cpu_data=cpu_d)

From Blogger iPhone client