前言

基于Python3.5+

1.第一阶段基础(必须)

2.第二阶段基础

Python3 pip (必须)

关于 pyecharts模块

3.Web开发基础

推荐使用 Djiango Rest Framework框架进行web-api开发
1.Django Rest Framework(DRF)框架搭建步骤
2.django-rest-framework官网

4.推荐了解的库,根据你的需要来

内置库:

通用库

结构化标记处理工具:

数据压缩和归档:

加密服务:

并发执行:

网络和进程间通信:

第三方库:

进度条

1.tqdm

#!/usr/bin/python3
# -*- coding: utf-8 -*-
from tqdm import tqdm
import time

def tqdm_process_do(items):
    pbar = tqdm(items)
    for char in pbar:
        time.sleep(0.25)
        #pbar.set_description("当前进度")
        #pbar.update(21)

if __name__ == "__main__":
    tqdm_process_do(range(100))

2.rich.progress

#!/usr/bin/python3
# -*- coding: utf-8 -*-
import time
from rich.progress import track, Progress
# 基础用法
def rich_progress_base_do(items):
    for i in track(items, description="Processing..."):
        time.sleep(0.1)  # Simulate work being done

# 高级用法  以下演示的是三个任务:下载 (Processing)加工 烹饪
def rich_advice_progress_do():
    with Progress() as progress:
        task1 = progress.add_task("[red]Downloading...", total=100)
        task2 = progress.add_task("[green]Processing...", total=100)
        ## 如果想隐藏任务 可以设置visible为False
        task3 = progress.add_task("[cyan]Cooking...", total=100, visible=False)

        while not progress.finished:
            progress.update(task1, advance=5)
            progress.update(task2, advance=4)
            progress.update(task3, advance=3)
            time.sleep(0.1)

def do_work(task):
    print("回调处理此任务-----------")

# 迅时进度  这个不会在控制台展示
def rich_progress_transient_do():
    with Progress(transient=True) as progress:
        task = progress.add_task("Working", total=100)
        do_work(task)

# 打印/日志
# progress.console
def rich_progress_console_do(items,processTiltle="当前进度",rowJobDesc="当前处理项:"):
    with Progress() as progress:
        task = progress.add_task(processTiltle, total=len(items))
        for job in items:
            progress.console.print(f"{rowJobDesc} #{job}")
            run_job(job)
            progress.advance(task)

def run_job(job):
    pass


if __name__ == "__main__":
    print("-----------基础用法-------------")
    rich_progress_base_do(range(50))
    print("-----------高级用法-------------")
    rich_advice_progress_do()
    print("-----------迅时用法-------------")
    rich_progress_transient_do()
    print("-----------打印/日志用法-------------")
    rich_progress_console_do(range(50))

#如果不确定的进度
#当您添加任务时,它会自动启动,
#1.先调用 add_task() 设置=》 start=False  total=None
#2.当你有步骤数时,你可以调用 start_task()
#3.然后像往常一样 update()

from time import sleep
from urllib.request import urlopen

from rich.progress import wrap_file

response = urlopen("https://www.textualize.io")
size = int(response.headers["Content-Length"])

with wrap_file(response, size) as file:
    for line in file:
        print(line.decode("utf-8"), end="")
        sleep(0.1)
import os.path
import sys
from concurrent.futures import ThreadPoolExecutor
import signal
from functools import partial
from threading import Event
from typing import Iterable
from urllib.request import urlopen

from rich.progress import (
    BarColumn,
    DownloadColumn,
    Progress,
    TaskID,
    TextColumn,
    TimeRemainingColumn,
    TransferSpeedColumn,
)

progress = Progress(
    TextColumn("[bold blue]{task.fields[filename]}", justify="right"),
    BarColumn(bar_width=None),
    "[progress.percentage]{task.percentage:>3.1f}%",
    "•",
    DownloadColumn(),
    "•",
    TransferSpeedColumn(),
    "•",
    TimeRemainingColumn(),
)


done_event = Event()


def handle_sigint(signum, frame):
    done_event.set()


signal.signal(signal.SIGINT, handle_sigint)


def copy_url(task_id: TaskID, url: str, path: str) -> None:
    """Copy data from a url to a local file."""
    progress.console.log(f"Requesting {url}")
    response = urlopen(url)
    # This will break if the response doesn't contain content length
    progress.update(task_id, total=int(response.info()["Content-length"]))
    with open(path, "wb") as dest_file:
        progress.start_task(task_id)
        for data in iter(partial(response.read, 32768), b""):
            dest_file.write(data)
            progress.update(task_id, advance=len(data))
            if done_event.is_set():
                return
    progress.console.log(f"Downloaded {path}")


def download(urls: Iterable[str], dest_dir: str):
    """Download multiple files to the given directory."""

    with progress:
        with ThreadPoolExecutor(max_workers=4) as pool:
            for url in urls:
                filename = url.split("/")[-1]
                dest_path = os.path.join(dest_dir, filename)
                task_id = progress.add_task("download", filename=filename, start=False)
                pool.submit(copy_url, task_id, url, dest_path)


if __name__ == "__main__":
    # Try with https://releases.ubuntu.com/20.04/ubuntu-20.04.3-desktop-amd64.iso
    if sys.argv[1:]:
        download(sys.argv[1:], "./")
    else:
        print("Usage:\n\tpython downloader.py URL1 URL2 URL3 (etc)")

3.alive-progress

#!/usr/bin/python3
# -*- coding: utf-8 -*-
# 导入 alive-progress 库
from alive_progress import alive_bar
import time

def alive_process_do(items):
    with alive_bar(len(items), force_tty=True) as bar:
        for item in items:
            # process item
            bar()  # 推进进度条
            time.sleep(0.1)


if __name__ == "__main__":
     alive_process_do(range(100))

Q&A
解决Pycharm不显示进度条

处理excel:

1.openpyxl的

from openpyxl import load_workbook, Workbook
from day01.calTime import CalTime

# 50万数据-(50w行-10列) 大概需要90s 
#如果是10万+数据需要写入 可以使用xlsxwriter来代替
# 如果是.xls可以使用 xlread来进行读取

##'get_index', 'get_sheet_by_name', 'get_sheet_names','worksheets','sheetnames'
##, 'remove', 'remove_sheet', 'save', , 'style_names', 'template'
# , 'read_only' 'write_only'
wb = load_workbook(filename='data/infile-3k.xlsx', read_only=True)
sheetnames = wb.get_sheet_names()  # wb.sheetnames
worksheet1 = wb.get_sheet_by_name("Sheet1")  # wb["Sheet1"]
sheets_count = len(sheetnames)  # sheets_count =len(wb.worksheets)
index1 = wb.get_index(worksheet1)

for sn in sheetnames:
    ws = wb.get_sheet_by_name(sn)
    for row in ws.rows:
        for cell in row:
            print(cell.value)

# for ws in wb.worksheets:
#     print(ws.title)
#     for row in ws.rows:
#         for cell in row:
#             print(cell.value)

# 与普通工作簿不同,只读工作簿将使用延迟加载。 
#必须使用该方法显式关闭工作簿
wb.close()

2.xlsxwriter

图表:

1.pyecharts

数据处理:

1.pandas

#!/usr/bin/python3
# -*- coding: utf-8 -*-
import pandas as pd
import pprint
#读取和写入 支持格式 .csv .xls .html .hdf5 .json .GBQ .Sql /Parquet
from day01.calTime import CalTime

#查看 pandas DataFrame 的前 8 行。
#ld = df.head(8)
#print(ld)

#查看 pandas DataFrame 的最后 8 行。
#ld = df.tail(8)
#print(ld)

# int64  float64 object
##将数据转化成 excel
#df.to_excel("titanic.xlsx", sheet_name="passengers", index=False,header=True)

#查看DataFrame
#print(df.info())

#Series 中的数据是有序的。
#可以将 Series 视为带有索引的一维数组。 可以是list/tuple/dict
#索引可以是唯一的,但不是必须的。 如果是list tuple 则索引个数需要与数据个数一致 dict则没那么多要求
#数据可以是标量、列表、NumPy 数组等。

#

##我想存储泰坦尼克号的乘客数据。对于许多乘客,我知道姓名(字符)、年龄(整数)和性别(男性/女性)数据。

df = pd.DataFrame(
    {
        "Name": [
            "Braund, Mr. Owen Harris",
            "Allen, Mr. William Henry",
            "Bonnell, Miss. Elizabeth",
        ],
        "Age": [22, 35, 58],
        "Sex": ["male", "male", "female"],
    }
)

#df.to_excel("ttnkh.xlsx")
#我只想看age部分
## way-1
# age_part =df["Age"]

## way-2 (重新加载series 列名为Age)
# age_part = pd.Series([22, 35, 58], name="Age")
# pprint.pprint(age_part)

#我想知道乘客的最大年龄
#print(df["Age"].max())
#print(pd.Series([22, 35, 58], name="Age").max())

#我对数据表的数值数据的一些基本统计感兴趣
## 平均值(mean) 总数(count) 最小(min) /大(max)值 各个年龄度占比
#print(df.describe())

# df = pd.read_excel('data/infile-500k.xlsx')
## for col in df.columns:
##     series = df[col]
# df.to_csv("data/500k-data.csv")
calTime = CalTime()
titanic = pd.read_csv("data/500k-data.csv")
datas = titanic.head(1)

for ind,row in datas.iterrows():
    print(ind,row)

calTime.print_and_write()
#方法和属性 方便第一次检查。head tail info dtypes
01-07 17:43