#421 Golang: 正则表达式

2020-12-06

Compile 系列

func Compile(expr string) (*Regexp, error)
func CompilePOSIX(expr string) (*Regexp, error)
func MustCompile(str string) *Regexp
func MustCompilePOSIX(str string) *Regexp

函数名称中的 Must 表示:如果正则错误,直接 panic

Match 系列

func (re *Regexp) Match(b []byte) bool
func (re *Regexp) MatchReader(r io.RuneReader) bool
func (re *Regexp) MatchString(s string) bool

Find 系列

func (re *Regexp) Find(b []byte) []byte
func (re *Regexp) FindAll(b []byte, n int) [][]byte
func (re *Regexp) FindAllIndex(b []byte, n int) [][]int
func (re *Regexp) FindAllString(s string, n int) []string
func (re *Regexp) FindAllStringIndex(s string, n int) [][]int
func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string
func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int
func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte
func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int
func (re *Regexp) FindIndex(b []byte) (loc []int)
func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int)
func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int
func (re *Regexp) FindString(s string) string
func (re *Regexp) FindStringIndex(s string) (loc []int)
func (re *Regexp) FindStringSubmatch(s string) []string
func (re *Regexp) FindStringSubmatchIndex(s string) []int
func (re *Regexp) FindSubmatch(b []byte) [][]byte
func (re *Regexp) FindSubmatchIndex(b []byte) []int

其实好记,Find(All)?(String)?(Submatch)?(Index)? 一组合就有 16 种了,再加上两个 FindReader 方法。

Replace 系列

func (re *Regexp) ReplaceAll(src, repl []byte) []byte
func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte
func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte
func (re *Regexp) ReplaceAllLiteralString(src, repl string) string
func (re *Regexp) ReplaceAllString(src, repl string) string
func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string

其他

func (re *Regexp) Copy() *Regexp // DEPRECATED
func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte
func (re *Regexp) ExpandString(dst []byte, template string, src string, match []int) []byte
func (re *Regexp) LiteralPrefix() (prefix string, complete bool)
func (re *Regexp) Longest()
func (re *Regexp) NumSubexp() int
func (re *Regexp) Split(s string, n int) []string
func (re *Regexp) String() string
func (re *Regexp) SubexpIndex(name string) int
func (re *Regexp) SubexpNames() []string

四个封装方法

func Match(pattern string, b []byte) (matched bool, err error)
func MatchReader(pattern string, r io.RuneReader) (matched bool, err error)
func MatchString(pattern string, s string) (matched bool, err error)
func QuoteMeta(s string) string

示例

func main() {
    text := "Hello, 2021! The year 2020 was great, but 2021 will be even better."

    pattern := `\b\d{4}\b` // 匹配四个数字的单词

    regex, err := regexp.Compile(pattern)
    if err != nil {
        fmt.Println("Error compiling regex:", err)
        return
    }

    matches := regex.FindAllString(text, -1)
    for _, match := range matches {
        fmt.Println(match)
    }
}

Output:

2021
2020
2021

#419 Golang:newmake

2020-12-05
$ go doc builtin.new
package builtin // import "builtin"

func new(Type) *Type
    The new built-in function allocates memory. The first argument is a type,
    not a value, and the value returned is a pointer to a newly allocated zero
    value of that type.

$ go doc builtin.make
package builtin // import "builtin"

func make(t Type, size ...IntegerType) Type
    The make built-in function allocates and initializes an object of type
    slice, map, or chan (only). Like new, the first argument is a type, not a
    value. Unlike new, make's return type is the same as the type of its
    argument, not a pointer to it. The specification of the result depends on
    the type:

        Slice: The size specifies the length. The capacity of the slice is
        equal to its length. A second integer argument may be provided to
        specify a different capacity; it must be no smaller than the
        length. For example, make([]int, 0, 10) allocates an underlying array
        of size 10 and returns a slice of length 0 and capacity 10 that is
        backed by this underlying array.
        Map: An empty map is allocated with enough space to hold the
        specified number of elements. The size may be omitted, in which case
        a small starting size is allocated.
        Channel: The channel's buffer is initialized with the specified
        buffer capacity. If zero, or the size is omitted, the channel is
        unbuffered.
  • func new(Type) *Type
  • func make(t Type, size ...IntegerType) Type

newmake 的区别

  1. new 没有类型限制,make 只能用来分配和初始化 slice,map,chan。
  2. new 返回指针,make 返回引用(引用类型的值)。
  3. new 会将分配的空间置零(对应类型的零值),make 则可以类型初始化,比如 slice 的长度和容量。
package main

import "fmt"

type User struct {
    Name string
}

type Addr struct{}

func main() {
    user1 := new(User)
    user2 := new(User)
    fmt.Printf("%#v (%p) %d\n", user1, user1, &user1)
    fmt.Printf("%#v (%p) %d\n", user2, user2, &user2)
    fmt.Printf("user1 == user2 : %#v\n", user1 == user2)

    addr1 := new(Addr)
    addr2 := new(Addr)
    fmt.Printf("%#v (%p) %d\n", addr1, addr1, &addr1)
    fmt.Printf("%#v (%p) %d\n", addr2, addr2, &addr2)
    fmt.Printf("addr1 == addr2 : %#v\n", addr1 == addr2)
}
&main.User{Name:""} (0xc00006a250) 824633745448
&main.User{Name:""} (0xc00006a260) 824633745456
user1 == user2 : false
&main.Addr{} (0xf61438) 824633745472
&main.Addr{} (0xf61438) 824633745480
addr1 == addr2 : true

注意上面这一点,空 struct 多次 new 出的指针完全相同。暂时没有想明白这样设计的好处。

#418 Go 关键字

2020-12-05

25 个关键字

break        default      func         interface    select
case         defer        go           map          struct
chan         else         goto         package      switch
const        fallthrough  if           range        type
continue     for          import       return       var

声明 (4)

  1. var 变量
  2. const 常量
  3. type 类型
  4. func 函数

并发相关 (3)

  1. go 并发
  2. chan 信道
  3. select 分支

类型 (3)

  1. interface 接口
  2. map 映射
  3. struct 结构体

流程控制 (3 + 4 + 6)

  1. defer 延迟执行
  2. goto 跳转
  3. return 返回

循环 (4)

  1. for
  2. continue
  3. break
  4. range 用于读取 slice、map、channel 数据

分支 (6)

  1. if
  2. else
  3. switch
  4. case
  5. default
  6. fallthrough

包 (2)

  1. package
  2. import

39 个预定义标识符

Types:
    any bool byte comparable
    complex64 complex128 error float32 float64
    int int8 int16 int32 int64 rune string
    uint uint8 uint16 uint32 uint64 uintptr

Constants:
    true false iota

Zero value:
    nil

Functions:
    append cap close complex copy delete imag len
    make new panic print println real recover

值 (4)

  1. true
  2. false
  3. iota
  4. nil

类型 (20 + 2)

int (10)

  1. int
  2. int8
  3. int16
  4. int32
  5. int64
  6. uint
  7. uint8
  8. uint16
  9. uint32
  10. uint64

complex (2)

  1. complex64
  2. complex128

float (2)

  1. float32
  2. float64

字符与字符串 (3)

  1. byte => uint8
  2. rune => int32
  3. string

泛型相关 (2) Go1.18+

  1. any
  2. comparable

其他 (3)

  1. bool
  2. uintptr 指针
  3. error 一个内置的 interface

Builtin 函数 (15)

  1. append
  2. delete
  3. close

  4. cap

  5. len

  6. copy

  7. make

  8. new

  9. panic

  10. recover

  11. print

  12. println

  13. real

  14. imag
  15. complex

#417 Go 模板

2020-12-01

fmt.Sprintf 字符串格式化

tpl := `[%s] Your verify code is %s.`
s := fmt.Sprintf(tpl, "Markjour", "1234")
println(s)

os.Expand 变量替换

tpl := `[${sign}] Your verify code is ${code}.`
params := map[string]string{"sign": "Markjour", "code": "1234"}
println(os.Expand(tpl, func(k string) string { return params[k] }))

text/template 和 html/template

这两个就可以处理复杂的情况,嵌套模板,控制语句都支持。

package main

import (
    "os"
    "text/template"
)

func main() {
    tpl := `[{{.sign}}] Your verify code is {{.code}}.`
    t := template.New("just-a-name")
    t, _ = t.Parse(tpl)
    params := map[string]string{"sign": "Markjour", "code": "1234"}
    t.Execute(os.Stdout, params)
}

附:strings.Map / bytes.Map 提供单个字符的替换

func Map(mapping func(rune) rune, s string) string
package main

import (
    "fmt"
    "strings"
)

const A = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ \n"
const B = "N'|&4:@ j{BI+Y!H/Q_iR\\FM}$moLe?#X\"WCE3S,8(r1f%T.;6DbaG]y`q~ltJxu-k2gA\nvhnd=)*s7Z5p^OK[V0z>9<UcwP"

func main() {
    encrypt := func(r rune) rune {
        if !strings.ContainsRune(A, r) {
            return 0
        }
        return rune(B[strings.IndexRune(A, r)])
    }
    decrypt := func(r rune) rune {
        if !strings.ContainsRune(B, r) {
            return 0
        }
        return rune(A[strings.IndexRune(B, r)])
    }

    raw := "Life was like a box of chocolate, you never know what you're gonna get."
    fmt.Println(raw)

    encrypted := strings.Map(encrypt, raw)
    fmt.Println(encrypted)
    // 3j:4wFN_wIjB4wNw'!Mw!:w| !|!INi4dw}!RwY4\4QwBY!FwF Niw}!RAQ4w@!YYNw@4i)

    decrypted := strings.Map(decrypt, encrypted)
    fmt.Println(decrypted)
}

参考资料与拓展阅读

#416 转载:Python 异步编程与数据库

2020-11-22

这是大神 zzzeek 2015 年发表的一篇文章,详细介绍了关于 SQLAlchemy 与异步编程的一些事情。解答了我关于如何实现异步编程的一些疑惑。
我曾反复阅读这篇文章好多遍,以求能够更加准确地领会到大佬阐述的意思。我认为每个 Python 的使用者都应该阅读阅读。

#415 Golang: cannot assign to struct field xxx in map

2020-11-20
package main

import (
    "fmt"
)

type Person struct {
    FirstName string
    LastName  string
}

func main() {
    // 准备 =========================
    people := make(map[int]Person)
    person := Person{
        FirstName: "John",
        LastName:  "Doe",
    }
    people[1] = person

    // 报错:cannot assign to struct field people[1].FirstName in map
    // people[1].FirstName = "Jim"

    // 方式 1
    p := people[1]
    p.FirstName = "Alice"
    people[1] = p
    fmt.Println(people)
    // map[1:{Alice Doe}]
    fmt.Println(people[1])
    // {Alice Doe}

    // if p, ok := people[1]; ok {
    //  p.Field = 5
    //  people[1] = p
    // }

    // 方式 2
    people2 := make(map[int]*Person)
    people2[1] = &person
    people2[1].FirstName = "Adam"
    fmt.Println(people2)
    // map[1:0xc000060020]
    fmt.Println(people2[1])
    // &{Adam Doe}
}

总之,不能直接通过 key 找到 value(struct),然后修改其中的一个字段。

#414 Java 现状

2020-11-11

谷歌 Java 趋势

Oracle Java SE Support Roadmap

Release GA Date Premier Support Until Extended Support Until
7 (LTS) July 2011 July 2019 July 2022
8 (LTS) March 2014 March 2022 December 2030
9 September 2017 March 2018 -
10 March 2018 September 2018 -
11 (LTS) September 2018 September 2023 September 2026
12 March 2019 September 2019 -
13 September 2019 March 2020 -
14 March 2020 September 2020 -
15 September 2020 March 2021 -
16 March 2021 September 2021 -
17 (LTS) September 2021 September 2026 September 2029
18 March 2022 September 2022 -
19 September 2022 March 2023 -
20 March 2023 September 2023 -
21 (LTS) September 2023 September 2028 September 2031

PS:Java 9 开始引入了新的模块机制,标准库结构。

PS: 2021 年 Java 17 发布时,Oracle 宣布以后每两年一个 LTS 版本,也就是说下一个 LTS 版本是 21 而非 23。

参考资料与拓展阅读

#413 BOM 头的研究

2020-11-03

BOM 是 Byte Order Mark 的缩写,代表一个 Unicode 字符 FEFF
Windows 系统下的很多软件就用 BOM 字符作为 Magic Number, 用来确认文件的字符编码和字节顺序。

#412 字符编码

2020-11-01

从原理上来讲,我们的计算机其实只认识数字(要不然为什么叫做计算机),确切的说是 0 和 1,我们的文字信息存放在计算机中也是以数字形式存在。
所谓字符编码就是字符和数字之间的对应关系和转换规则。