From 316c1efc847281923e17a21f0ce53786da097a89 Mon Sep 17 00:00:00 2001 From: sosokker Date: Tue, 15 Aug 2023 15:29:19 +0700 Subject: [PATCH] Add markdown files about Concurrency and Parallelism --- asynchronous.md | 42 ++++++++++++++++++++++++ combining.md | 36 +++++++++++++++++++++ introduction.md | 79 ++++++++++++++++++++++++++++++++++++++++++++++ multiprocessing.md | 46 +++++++++++++++++++++++++++ 4 files changed, 203 insertions(+) create mode 100644 asynchronous.md create mode 100644 combining.md create mode 100644 introduction.md create mode 100644 multiprocessing.md diff --git a/asynchronous.md b/asynchronous.md new file mode 100644 index 0000000..fc526f2 --- /dev/null +++ b/asynchronous.md @@ -0,0 +1,42 @@ +## Part 2: Asynchronous Programming with asyncio + +In this section, we'll dive into asynchronous programming using the asyncio module, enabling concurrent execution without relying on multiple threads. + +### 1. Introduction to Asynchronous Programming + +Asynchronous programming allows non-blocking execution of tasks, making efficient use of system resources. The asyncio module provides a framework for working with asynchronous operations. In this simple example, we define an asynchronous function main that prints "Hello," waits for a second using await asyncio.sleep(1), and then prints "World." + +```python +import asyncio + +async def main(): + print("Hello") + await asyncio.sleep(1) + print("World") + +asyncio.run(main()) + +``` + +### 2. Getting Started with asyncio + +To work with asyncio, we can use the async and await keywords. In this example, we define an asynchronous function fetch_data that simulates fetching data from a URL asynchronously. The asyncio.gather function is used to run multiple asynchronous tasks concurrently and gather their results. + +```python +import asyncio + +async def fetch_data(url): + # Simulate fetching data from a URL + await asyncio.sleep(2) + return f"Data from {url}" + +async def main(): + tasks = [fetch_data("url1"), fetch_data("url2"), fetch_data("url3")] + results = await asyncio.gather(*tasks) + print(results) + +asyncio.run(main()) + +``` + +[Continue to Part 3: Parallelism with multiprocessing](multiprocessing.md) \ No newline at end of file diff --git a/combining.md b/combining.md new file mode 100644 index 0000000..92a2705 --- /dev/null +++ b/combining.md @@ -0,0 +1,36 @@ +## Part 4: Combining Concurrency and Parallelism + +In this final section, we'll discuss how to leverage both concurrency and parallelism to optimize Python applications. +### 1. Concurrency vs. Parallelism: When to Use Which? + +Concurrency is suitable for I/O-bound tasks that involve waiting, such as network requests. Parallelism is ideal for CPU-bound tasks that can be divided into smaller units of work. In some cases, combining both approaches can provide the best performance. + +### 2. Real-world Application: Web Scraping with Concurrency and Parallelism + +As a practical example, let's consider web scraping. We can use asynchronous programming (`asyncio`) to handle multiple requests concurrently, and then use parallelism (`multiprocessing`) to process the scraped data in parallel. This combination can significantly speed up the entire scraping process. + +```python +import asyncio +import aiohttp +import multiprocessing + +async def fetch_url(session, url): + async with session.get(url) as response: + return await response.text() + +def process_data(data): + # Process the scraped data + pass + +async def main(): + urls = ["url1", "url2", "url3"] + async with aiohttp.ClientSession() as session: + tasks = [fetch_url(session, url) for url in urls] + responses = await asyncio.gather(*tasks) + + with multiprocessing.Pool() as pool: + processed_data = pool.map(process_data, responses) + + print("Processed data:", processed_data) + +asyncio.run(main()) diff --git a/introduction.md b/introduction.md new file mode 100644 index 0000000..e6546ea --- /dev/null +++ b/introduction.md @@ -0,0 +1,79 @@ +# Concurrency and Parallelism in Python + +## Table of Contents + +- [Concurrency and Parallelism in Python](#concurrency-and-parallelism-in-python) + - [Table of Contents](#table-of-contents) + - [Part 1: Introduction to Concurrency](#part-1-introduction-to-concurrency) + - [1. Global Interpreter Lock (GIL) Explained](#1-global-interpreter-lock-gil-explained) + - [2. Threading in Python](#2-threading-in-python) + +--- + +## Part 1: Introduction to Concurrency + +In this section, we'll explore the fundamental concepts of concurrency in Python, including the Global Interpreter Lock (GIL) and the `threading` module. + +### 1. Global Interpreter Lock (GIL) Explained + +The Global Interpreter Lock (GIL) is a mutex that prevents multiple native threads from executing Python bytecodes simultaneously in a single process. While it ensures memory safety, it can limit the parallelism of CPU-bound tasks. In this example, we'll create two threads to perform counting concurrently, but due to the GIL, they won't achieve true parallelism. + +```python +import threading + +def count_up(): + for _ in range(1000000): + pass + +def count_down(): + for _ in range(1000000): + pass + +thread1 = threading.Thread(target=count_up) +thread2 = threading.Thread(target=count_down) + +thread1.start() +thread2.start() + +thread1.join() +thread2.join() + +print("Counting done!") +``` + +### 2. Threading in Python + +The threading module allows us to work with threads in Python. In this example, we're using a lock (threading.Lock()) to synchronize access to a shared variable (x) across multiple threads. This ensures that only one thread modifies x at a time, preventing data corruption. + +```python + +import threading + +x = 0 +lock = threading.Lock() + +def increment(): + global x + for _ in range(1000000): + with lock: + x += 1 + +def decrement(): + global x + for _ in range(1000000): + with lock: + x -= 1 + +thread1 = threading.Thread(target=increment) +thread2 = threading.Thread(target=decrement) + +thread1.start() +thread2.start() + +thread1.join() +thread2.join() + +print("Result:", x) +`````` + +[Continue to Part 2: Asynchronous Programming with asyncio](asynchronous.md) \ No newline at end of file diff --git a/multiprocessing.md b/multiprocessing.md new file mode 100644 index 0000000..0eee94e --- /dev/null +++ b/multiprocessing.md @@ -0,0 +1,46 @@ +## Part 3: Parallelism with multiprocessing + +In this section, we'll explore parallelism in Python using the multiprocessing module, which allows for concurrent execution using multiple processes. +### 1. Introduction to Parallelism + +Parallelism involves executing multiple tasks simultaneously, utilizing multiple CPU cores. The multiprocessing module provides a way to achieve true parallelism by creating separate processes. In this example, we define a function square and use the multiprocessing.Pool to parallelize its execution across multiple processes. + +```python +import multiprocessing + +def square(number): + return number * number + +if __name__ == "__main__": + numbers = [1, 2, 3, 4, 5] + with multiprocessing.Pool() as pool: + results = pool.map(square, numbers) + print(results) + +``` + +### 2. Using multiprocessing + +The multiprocessing module allows us to create and manage processes in Python. In this example, we define a function worker_function that each process will execute. We create multiple processes and start them using the start method. Finally, we wait for all processes to finish using the join method. + +```python +import multiprocessing + +def worker_function(number): + print(f"Worker process {number} is executing") + +if __name__ == "__main__": + processes = [] + for i in range(4): + process = multiprocessing.Process(target=worker_function, args=(i,)) + processes.append(process) + process.start() + + for process in processes: + process.join() + + print("All processes have finished") + +``` + +[Continue to Part 4: Combining Concurrency and Parallelism](combining.md) \ No newline at end of file