feat(poller): support scheduler to fetch task and dispatch to worker
This commit is contained in:
parent
5fc35faf17
commit
a13ed0c52e
2 changed files with 104 additions and 71 deletions
|
@ -59,6 +59,7 @@ func runDaemon(ctx context.Context, envFile string) func(cmd *cobra.Command, arg
|
||||||
poller := poller.New(
|
poller := poller.New(
|
||||||
cli,
|
cli,
|
||||||
runner.Run,
|
runner.Run,
|
||||||
|
cfg.Runner.Capacity,
|
||||||
)
|
)
|
||||||
|
|
||||||
g.Go(func() error {
|
g.Go(func() error {
|
||||||
|
@ -81,7 +82,7 @@ func runDaemon(ctx context.Context, envFile string) func(cmd *cobra.Command, arg
|
||||||
Errorln("failed to update runner")
|
Errorln("failed to update runner")
|
||||||
}
|
}
|
||||||
|
|
||||||
return poller.Poll(ctx, cfg.Runner.Capacity)
|
return poller.Poll(ctx)
|
||||||
})
|
})
|
||||||
|
|
||||||
g.Go(func() error {
|
g.Go(func() error {
|
||||||
|
|
166
poller/poller.go
166
poller/poller.go
|
@ -3,6 +3,7 @@ package poller
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"gitea.com/gitea/act_runner/client"
|
"gitea.com/gitea/act_runner/client"
|
||||||
|
@ -14,12 +15,13 @@ import (
|
||||||
|
|
||||||
var ErrDataLock = errors.New("Data Lock Error")
|
var ErrDataLock = errors.New("Data Lock Error")
|
||||||
|
|
||||||
func New(cli client.Client, dispatch func(context.Context, *runnerv1.Task) error) *Poller {
|
func New(cli client.Client, dispatch func(context.Context, *runnerv1.Task) error, workerNum int) *Poller {
|
||||||
return &Poller{
|
return &Poller{
|
||||||
Client: cli,
|
Client: cli,
|
||||||
Dispatch: dispatch,
|
Dispatch: dispatch,
|
||||||
routineGroup: newRoutineGroup(),
|
routineGroup: newRoutineGroup(),
|
||||||
metric: &metric{},
|
metric: &metric{},
|
||||||
|
workerNum: workerNum,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,83 +30,60 @@ type Poller struct {
|
||||||
Filter *client.Filter
|
Filter *client.Filter
|
||||||
Dispatch func(context.Context, *runnerv1.Task) error
|
Dispatch func(context.Context, *runnerv1.Task) error
|
||||||
|
|
||||||
|
sync.Mutex
|
||||||
routineGroup *routineGroup
|
routineGroup *routineGroup
|
||||||
metric *metric
|
metric *metric
|
||||||
|
ready chan struct{}
|
||||||
|
workerNum int
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Poller) Wait() {
|
func (p *Poller) Wait() {
|
||||||
p.routineGroup.Wait()
|
p.routineGroup.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Poller) Poll(ctx context.Context, n int) error {
|
func (p *Poller) schedule() {
|
||||||
for i := 0; i < n; i++ {
|
p.Lock()
|
||||||
func(i int) {
|
defer p.Unlock()
|
||||||
p.routineGroup.Run(func() {
|
if int(p.metric.BusyWorkers()) >= p.workerNum {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case p.ready <- struct{}{}:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Poller) Poll(ctx context.Context) error {
|
||||||
|
l := log.WithField("func", "Poll")
|
||||||
|
|
||||||
|
for {
|
||||||
|
// check worker number
|
||||||
|
p.schedule()
|
||||||
|
|
||||||
|
select {
|
||||||
|
// wait worker ready
|
||||||
|
case <-p.ready:
|
||||||
|
case <-ctx.Done():
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
LOOP:
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
log.Infof("stopped the runner: %d", i+1)
|
break LOOP
|
||||||
return
|
|
||||||
default:
|
default:
|
||||||
if ctx.Err() != nil {
|
task, err := p.pollTask(ctx)
|
||||||
log.Infof("stopping the runner: %d", i+1)
|
if task == nil || err != nil {
|
||||||
return
|
|
||||||
}
|
|
||||||
if err := p.poll(ctx, i+1); err != nil {
|
|
||||||
log.WithField("thread", i+1).
|
|
||||||
WithError(err).Error("poll error")
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return
|
|
||||||
case <-time.After(5 * time.Second):
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}(i)
|
|
||||||
}
|
|
||||||
p.routineGroup.Wait()
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *Poller) poll(ctx context.Context, thread int) error {
|
|
||||||
l := log.WithField("thread", thread)
|
|
||||||
l.Info("poller: request stage from remote server")
|
|
||||||
|
|
||||||
reqCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
// request a new build stage for execution from the central
|
|
||||||
// build server.
|
|
||||||
resp, err := p.Client.FetchTask(reqCtx, connect.NewRequest(&runnerv1.FetchTaskRequest{}))
|
|
||||||
if err == context.Canceled || err == context.DeadlineExceeded {
|
|
||||||
l.WithError(err).Trace("poller: no stage returned")
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if err != nil && err == ErrDataLock {
|
|
||||||
l.WithError(err).Info("task accepted by another runner")
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
l.WithError(err).Error("cannot accept task")
|
l.Errorf("can't find the task: %v", err.Error())
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
time.Sleep(5 * time.Second)
|
||||||
// exit if a nil or empty stage is returned from the system
|
break
|
||||||
// and allow the runner to retry.
|
|
||||||
if resp.Msg.Task == nil || resp.Msg.Task.Id == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
runCtx, cancel := context.WithTimeout(ctx, time.Hour)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
// update runner status
|
// update runner status
|
||||||
// running: idle -> active
|
// running: idle -> active
|
||||||
// stopped: active -> idle
|
|
||||||
if val := p.metric.IncBusyWorker(); val == 1 {
|
if val := p.metric.IncBusyWorker(); val == 1 {
|
||||||
if _, err := p.Client.UpdateRunner(
|
if _, err := p.Client.UpdateRunner(
|
||||||
ctx,
|
ctx,
|
||||||
|
@ -116,24 +95,77 @@ func (p *Poller) poll(ctx context.Context, thread int) error {
|
||||||
}
|
}
|
||||||
l.Info("update runner status to active")
|
l.Info("update runner status to active")
|
||||||
}
|
}
|
||||||
|
p.routineGroup.Run(func() {
|
||||||
defer func() {
|
if err := p.dispatchTask(ctx, task); err != nil {
|
||||||
if val := p.metric.DecBusyWorker(); val != 0 {
|
l.Errorf("execute task: %v", err.Error())
|
||||||
return
|
}
|
||||||
|
})
|
||||||
|
break LOOP
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *Poller) pollTask(ctx context.Context) (*runnerv1.Task, error) {
|
||||||
|
l := log.WithField("func", "pollTask")
|
||||||
|
l.Info("poller: request stage from remote server")
|
||||||
|
|
||||||
|
reqCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// request a new build stage for execution from the central
|
||||||
|
// build server.
|
||||||
|
resp, err := p.Client.FetchTask(reqCtx, connect.NewRequest(&runnerv1.FetchTaskRequest{}))
|
||||||
|
if err == context.Canceled || err == context.DeadlineExceeded {
|
||||||
|
l.WithError(err).Trace("poller: no stage returned")
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil && err == ErrDataLock {
|
||||||
|
l.WithError(err).Info("task accepted by another runner")
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
l.WithError(err).Error("cannot accept task")
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// exit if a nil or empty stage is returned from the system
|
||||||
|
// and allow the runner to retry.
|
||||||
|
if resp.Msg.Task == nil || resp.Msg.Task.Id == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp.Msg.Task, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *Poller) dispatchTask(ctx context.Context, task *runnerv1.Task) error {
|
||||||
|
l := log.WithField("func", "dispatchTask")
|
||||||
defer func() {
|
defer func() {
|
||||||
|
val := p.metric.DecBusyWorker()
|
||||||
|
e := recover()
|
||||||
|
if e != nil {
|
||||||
|
l.Errorf("panic error: %v", e)
|
||||||
|
}
|
||||||
|
p.schedule()
|
||||||
|
|
||||||
|
if val != 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
if _, err := p.Client.UpdateRunner(
|
if _, err := p.Client.UpdateRunner(
|
||||||
ctx,
|
ctx,
|
||||||
connect.NewRequest(&runnerv1.UpdateRunnerRequest{
|
connect.NewRequest(&runnerv1.UpdateRunnerRequest{
|
||||||
Status: runnerv1.RunnerStatus_RUNNER_STATUS_IDLE,
|
Status: runnerv1.RunnerStatus_RUNNER_STATUS_IDLE,
|
||||||
}),
|
}),
|
||||||
); err != nil {
|
); err != nil {
|
||||||
log.Errorln("update status error:", err.Error())
|
l.Errorln("update status error:", err.Error())
|
||||||
}
|
}
|
||||||
l.Info("update runner status to idle")
|
l.Info("update runner status to idle")
|
||||||
}()
|
}()
|
||||||
}()
|
|
||||||
|
|
||||||
return p.Dispatch(runCtx, resp.Msg.Task)
|
runCtx, cancel := context.WithTimeout(ctx, time.Hour)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
return p.Dispatch(runCtx, task)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue