This commit is contained in:
Parikshit Gothwal 2024-10-07 22:41:59 +05:30
parent a6da23819b
commit 7b46b3b0e8
10 changed files with 105 additions and 54 deletions

View File

@ -15,5 +15,4 @@ func Close(c *gin.Context) {
return return
} }
c.Status(http.StatusOK) c.Status(http.StatusOK)
} }

View File

@ -26,6 +26,7 @@ func Open(c *gin.Context) {
DBPath: "index.db", DBPath: "index.db",
}) })
if err != nil { if err != nil {
log.Println("client.Configure() err:", err)
c.Status(http.StatusInternalServerError) c.Status(http.StatusInternalServerError)
return return
} }

View File

@ -2,11 +2,28 @@ package elastic
import ( import (
"context" "context"
"fmt"
"elasticstream/source" "elasticstream/source"
) )
func (c *Client) Ack(ctx context.Context, position source.Position) error { func (c *Client) Ack(ctx context.Context, position source.Position) error {
curr := c.offsets[position.Index]
fmt.Println("curr:", curr)
fmt.Println("asked:", position.Pos)
for _, p := range c.positions {
if p.Index == position.Index {
fmt.Println("initial:", p.Pos)
if p.Pos > position.Pos {
return fmt.Errorf("not acknowledged pos less than initial position")
}
}
}
if curr < position.Pos {
return fmt.Errorf("not acknowledged pos more than current position")
}
return nil return nil
} }

View File

@ -1,24 +1,29 @@
package elastic package elastic
import ( import (
"sync"
"elasticstream/config" "elasticstream/config"
"elasticstream/opencdc" "elasticstream/opencdc"
"elasticstream/source"
"github.com/boltdb/bolt"
"github.com/elastic/go-elasticsearch/v8" "github.com/elastic/go-elasticsearch/v8"
) )
type Client struct { type Client struct {
cfg *config.Config cfg *config.Config
es *elasticsearch.Client es *elasticsearch.Client
db *bolt.DB offsets map[string]int
offsets map[string]int positions []source.Position
ch chan opencdc.Data ch chan opencdc.Data
shutdown chan struct{}
wg *sync.WaitGroup
} }
func NewClient() *Client { func NewClient() *Client {
client := &Client{ client := &Client{
offsets: make(map[string]int), offsets: make(map[string]int),
wg: &sync.WaitGroup{},
} }
return client return client
} }

View File

@ -9,7 +9,8 @@ import (
func (c *Client) Configure(ctx context.Context, cfg *config.Config) error { func (c *Client) Configure(ctx context.Context, cfg *config.Config) error {
if c == nil || c.ch == nil { // if c == nil || c.ch == nil {
if c == nil {
return fmt.Errorf("error source not opened for reading") return fmt.Errorf("error source not opened for reading")
} }

View File

@ -29,8 +29,11 @@ func (c *Client) Open(ctx context.Context, positions []source.Position) error {
// create a buffer channel // create a buffer channel
c.ch = make(chan opencdc.Data, c.cfg.BatchSize) c.ch = make(chan opencdc.Data, c.cfg.BatchSize)
c.shutdown = make(chan struct{})
for _, index := range c.cfg.Indexes { for _, index := range c.cfg.Indexes {
c.wg.Add(1)
offset := 0 offset := 0
for _, position := range positions { for _, position := range positions {
if index == position.Index { if index == position.Index {
@ -42,5 +45,7 @@ func (c *Client) Open(ctx context.Context, positions []source.Position) error {
NewWorker(c, index, offset) NewWorker(c, index, offset)
} }
c.positions = positions
return nil return nil
} }

View File

@ -5,15 +5,27 @@ import (
"encoding/json" "encoding/json"
"fmt" "fmt"
"strings" "strings"
"time"
"elasticstream/opencdc"
"github.com/elastic/go-elasticsearch/esapi" "github.com/elastic/go-elasticsearch/esapi"
"github.com/elastic/go-elasticsearch/v8" "github.com/elastic/go-elasticsearch/v8"
) )
type SearchResponse struct {
Hits struct {
Total struct {
Value int `json:"value"`
} `json:"total"`
Hits []struct {
Index string `json:"_index"`
ID string `json:"_id"`
Source map[string]interface{} `json:"_source"`
} `json:"hits"`
} `json:"hits"`
}
// search is calling Elastic Search search API // search is calling Elastic Search search API
func search(client *elasticsearch.Client, index string, offset, size *int) ([]opencdc.Data, error) { func search(client *elasticsearch.Client, index string, offset, size *int) (*SearchResponse, error) {
query := fmt.Sprintf(`{ query := fmt.Sprintf(`{
"query": { "query": {
"match_all": {} "match_all": {}
@ -29,7 +41,8 @@ func search(client *elasticsearch.Client, index string, offset, size *int) ([]op
} }
// Perform the request // Perform the request
res, err := req.Do(context.Background(), client) ctx, _ := context.WithTimeout(context.TODO(), 5*time.Second)
res, err := req.Do(ctx, client)
if err != nil { if err != nil {
return nil, fmt.Errorf("error getting response: %s", err) return nil, fmt.Errorf("error getting response: %s", err)
} }
@ -39,37 +52,10 @@ func search(client *elasticsearch.Client, index string, offset, size *int) ([]op
return nil, fmt.Errorf("res.IsError() error: %s", res.String()) return nil, fmt.Errorf("res.IsError() error: %s", res.String())
} }
// Parse the response result := &SearchResponse{}
var result struct { if err := json.NewDecoder(res.Body).Decode(result); err != nil {
Hits struct {
Hits []struct {
Source map[string]interface{} `json:"_source"`
} `json:"hits"`
} `json:"hits"`
}
if err := json.NewDecoder(res.Body).Decode(&result); err != nil {
return nil, fmt.Errorf("error parsing the response body: %s", err) return nil, fmt.Errorf("error parsing the response body: %s", err)
} }
// Collect the records return result, nil
newRecords := make([]map[string]interface{}, len(result.Hits.Hits))
for i, hit := range result.Hits.Hits {
newRecords[i] = hit.Source
}
header := opencdc.Header{Index: index}
var records []opencdc.Data
for _, v := range newRecords {
data := opencdc.Data{
Header: header,
Payload: v,
}
records = append(records, data)
}
// log.Println("records:", records)
return records, nil
} }

View File

@ -3,13 +3,26 @@ package elastic
import ( import (
"context" "context"
"fmt" "fmt"
"log"
) )
// close the client // close the client
func (c *Client) Teardown(ctx context.Context) error { func (c *Client) Teardown(ctx context.Context) error {
log.Println(">>>> elastic.Teardown()")
defer log.Println("<<<< elastic.Teardown()")
if c == nil || c.ch == nil { if c == nil || c.ch == nil {
return fmt.Errorf("error source not opened for reading") return fmt.Errorf("error source not opened for reading")
} }
close(c.shutdown)
c.wg.Wait()
close(c.ch)
c.ch = nil
// c.es.Close()
return nil return nil
} }

View File

@ -1,8 +1,11 @@
package elastic package elastic
import ( import (
"fmt"
"log" "log"
"time" "time"
"elasticstream/opencdc"
) )
type Worker struct { type Worker struct {
@ -22,21 +25,42 @@ func NewWorker(client *Client, index string, offset int) {
} }
func (w *Worker) start() { func (w *Worker) start() {
defer w.client.wg.Done()
for { for {
log.Printf("worker index=%s offset=%d size=%d\n", w.index, w.offset, w.client.cfg.BatchSize) log.Printf("worker index=%s offset=%d size=%d\n", w.index, w.offset, w.client.cfg.BatchSize)
dataArray, err := search(w.client.es, w.index, &w.offset, &w.client.cfg.BatchSize) searchResponse, err := search(w.client.es, w.index, &w.offset, &w.client.cfg.BatchSize)
if err != nil { if err != nil || len(searchResponse.Hits.Hits) == 0 {
log.Println("search() err:", err) // log.Println("search() err:", err)
time.Sleep(1 * time.Second) select {
continue case <-w.client.shutdown:
fmt.Println("shuting donw..")
return
case <-time.After(time.Second):
continue
}
} }
for _, data := range dataArray { for _, hit := range searchResponse.Hits.Hits {
w.client.ch <- data data := opencdc.Data{
w.offset++ Header: opencdc.Header{
ID: hit.ID,
Index: hit.Index,
Position: w.offset + 1,
},
Payload: hit.Source,
}
select {
case w.client.ch <- data:
w.offset++
case <-w.client.shutdown:
fmt.Println("Stopping worker...")
return
}
} }
} }
} }

View File

@ -8,9 +8,9 @@ import (
) )
type Position struct { type Position struct {
ID string ID string `json:"id"`
Index string Index string `json:"index"`
Pos int Pos int `json:"pos"`
} }
type Source interface { type Source interface {